Repository: apache/incubator-liminal
Branch: master
Commit: aee9827f3b33
Files: 266
Total size: 596.4 KB

Directory structure:
gitextract_29sbg3vw/

├── .asf.yaml
├── .bumpversion.cfg
├── .github/
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── pre_commits.yml
│       ├── unittests.yml
│       └── versioning.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── DISCLAIMER-WIP
├── LICENSE
├── MANIFEST.in
├── NOTICE
├── README.md
├── RETIRED.txt
├── dev/
│   ├── LICENSE.txt
│   ├── RELEASE.md
│   └── sign.sh
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── architecture.md
│   ├── conf.py
│   ├── getting-started/
│   │   ├── hello_world.md
│   │   ├── iris_classification.md
│   │   └── spark_app_demo.md
│   ├── index.rst
│   ├── liminal/
│   │   ├── README.md
│   │   ├── advanced.liminal.yml.md
│   │   ├── executors/
│   │   │   ├── README.md
│   │   │   ├── emr.md
│   │   │   ├── index.rst
│   │   │   └── kubernetes.md
│   │   ├── extensibility.md
│   │   ├── images/
│   │   │   ├── README.md
│   │   │   ├── index.rst
│   │   │   ├── python.md
│   │   │   └── python_server.md
│   │   ├── index.rst
│   │   ├── kubernetes/
│   │   │   └── secret_util.md
│   │   ├── liminal.yml.md
│   │   ├── metrics_backends/
│   │   │   ├── README.md
│   │   │   ├── aws_cloudwatch.md
│   │   │   └── index.rst
│   │   ├── monitoring.md
│   │   ├── pipelines.md
│   │   ├── services.md
│   │   └── tasks/
│   │       ├── README.md
│   │       ├── create_cloudformation_stack.md
│   │       ├── delete_cloudformation_stack.md
│   │       ├── index.rst
│   │       ├── python.md
│   │       └── spark.md
│   ├── make.bat
│   └── source/
│       ├── How_to_install_liminal_in_airflow_on_kubernetes.md
│       └── liminal_aws_deploy.sh
├── examples/
│   ├── aws-ml-app-demo/
│   │   ├── __init__.py
│   │   ├── liminal.yml
│   │   ├── manifests/
│   │   │   └── aws-ml-app-demo.yaml
│   │   ├── model_store.py
│   │   ├── requirements.txt
│   │   ├── serving.py
│   │   └── training.py
│   ├── extensibility/
│   │   ├── __init__.py
│   │   ├── executors/
│   │   │   ├── __init__.py
│   │   │   └── custom_executor.py
│   │   ├── images/
│   │   │   ├── __init__.py
│   │   │   ├── custom_image.py
│   │   │   └── custom_image_builder/
│   │   │       ├── Dockerfile
│   │   │       └── __init__.py
│   │   ├── liminal.yml
│   │   └── tasks/
│   │       ├── __init__.py
│   │       └── custom_task.py
│   ├── liminal-getting-started/
│   │   ├── __init__.py
│   │   ├── hello_world.json
│   │   ├── helloworld/
│   │   │   ├── __init__.py
│   │   │   └── hello_world.py
│   │   ├── liminal.yml
│   │   ├── myserver/
│   │   │   ├── __init__.py
│   │   │   └── my_server.py
│   │   ├── pip.conf
│   │   └── requirements.txt
│   ├── sagemaker_example/
│   │   ├── README.md
│   │   ├── archetype/
│   │   │   └── liminal.yaml
│   │   ├── data_preparation/
│   │   │   ├── __init__.py
│   │   │   ├── data_preparation.py
│   │   │   └── data_uploader.py
│   │   ├── data_train/
│   │   │   ├── __init__.py
│   │   │   └── train.py
│   │   ├── inference.py
│   │   ├── liminal.yaml
│   │   ├── liminal_sm.py
│   │   ├── requirements.txt
│   │   └── sm_ops.py
│   └── spark-app-demo/
│       └── k8s/
│           ├── __init__.py
│           ├── archetype/
│           │   └── liminal.yml
│           ├── data/
│           │   └── iris.csv
│           ├── data_cleanup.py
│           ├── liminal.yml
│           ├── manifests/
│           │   └── spark-app-demo.yaml
│           ├── model_store.py
│           ├── requirements.txt
│           ├── serving.py
│           └── training.py
├── install.sh
├── liminal/
│   ├── __init__.py
│   ├── build/
│   │   ├── __init__.py
│   │   ├── image/
│   │   │   ├── __init__.py
│   │   │   ├── python/
│   │   │   │   ├── Dockerfile
│   │   │   │   ├── __init__.py
│   │   │   │   └── python.py
│   │   │   ├── python_server/
│   │   │   │   ├── Dockerfile
│   │   │   │   ├── __init__.py
│   │   │   │   ├── liminal_python_server.py
│   │   │   │   ├── python_server.py
│   │   │   │   └── python_server_requirements.txt
│   │   │   └── spark/
│   │   │       ├── Dockerfile
│   │   │       ├── __init__.py
│   │   │       └── spark.py
│   │   ├── image_builder.py
│   │   ├── liminal_apps_builder.py
│   │   └── python.py
│   ├── core/
│   │   ├── __init__.py
│   │   ├── config/
│   │   │   ├── __init__.py
│   │   │   ├── config.py
│   │   │   └── defaults/
│   │   │       ├── __init__.py
│   │   │       ├── base/
│   │   │       │   ├── __init__.py
│   │   │       │   └── liminal.yml
│   │   │       └── default_configs.py
│   │   ├── environment.py
│   │   └── util/
│   │       ├── __init__.py
│   │       ├── class_util.py
│   │       ├── dict_util.py
│   │       ├── env_util.py
│   │       ├── extensible.py
│   │       └── files_util.py
│   ├── docker/
│   │   └── __init__.py
│   ├── kubernetes/
│   │   ├── __init__.py
│   │   ├── secret_util.py
│   │   └── volume_util.py
│   ├── logging/
│   │   ├── __init__.py
│   │   └── logging_setup.py
│   ├── monitoring/
│   │   └── __init__.py
│   ├── runners/
│   │   ├── __init__.py
│   │   └── airflow/
│   │       ├── __init__.py
│   │       ├── config/
│   │       │   ├── __init__.py
│   │       │   └── standalone_variable_backend.py
│   │       ├── dag/
│   │       │   ├── __init__.py
│   │       │   ├── liminal_dags.py
│   │       │   └── liminal_register_dags.py
│   │       ├── executors/
│   │       │   ├── __init__.py
│   │       │   ├── airflow.py
│   │       │   ├── emr.py
│   │       │   └── kubernetes.py
│   │       ├── model/
│   │       │   ├── __init__.py
│   │       │   ├── executor.py
│   │       │   └── task.py
│   │       ├── operators/
│   │       │   ├── __init__.py
│   │       │   ├── cloudformation.py
│   │       │   ├── job_status_operator.py
│   │       │   └── operator_with_variable_resolving.py
│   │       └── tasks/
│   │           ├── __init__.py
│   │           ├── airflow.py
│   │           ├── containerable.py
│   │           ├── create_cloudformation_stack.py
│   │           ├── delete_cloudformation_stack.py
│   │           ├── hadoop.py
│   │           ├── job_end.py
│   │           ├── job_start.py
│   │           ├── python.py
│   │           ├── spark.py
│   │           └── sql.py
│   ├── settings.py
│   └── sql/
│       └── __init__.py
├── liminal-arch.md
├── pyproject.toml
├── requirements.txt
├── run_tests.sh
├── scripts/
│   ├── Dockerfile-airflow
│   ├── __init__.py
│   ├── docker-compose.yml
│   ├── liminal
│   ├── requirements-airflow.txt
│   └── webserver_config.py
├── setup.py
├── tests/
│   ├── __init__.py
│   ├── liminal/
│   │   ├── __init__.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── config/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── defaults/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test_apply_variables_substitution.py
│   │   │   │   │   ├── test_defaults_pipeline_config.py
│   │   │   │   │   ├── test_defaults_service_config.py
│   │   │   │   │   └── test_defaults_tasks_config.py
│   │   │   │   └── test_config.py
│   │   │   └── util/
│   │   │       ├── __init__.py
│   │   │       └── test_dict_utils.py
│   │   └── kubernetes/
│   │       ├── __init__.py
│   │       └── test_volume_util.py
│   ├── runners/
│   │   ├── __init__.py
│   │   ├── airflow/
│   │   │   ├── __init__.py
│   │   │   ├── build/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── http/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── python/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── test_python_server_image_builder.py
│   │   │   │   ├── python/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── test_python_image_builder.py
│   │   │   │   ├── spark/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── test_spark_image_builder.py
│   │   │   │   └── test_liminal_apps_builder.py
│   │   │   ├── compiler/
│   │   │   │   └── __init__.py
│   │   │   ├── dag/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_liminal_dags.py
│   │   │   ├── executors/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── test_airflow_executor.py
│   │   │   │   └── test_emr.py
│   │   │   ├── liminal/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── liminal.yml
│   │   │   │   ├── myserver/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── my_server.py
│   │   │   │   ├── pip.conf
│   │   │   │   ├── requirements.txt
│   │   │   │   ├── write_inputs/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── write_inputs.py
│   │   │   │   └── write_outputs/
│   │   │   │       ├── __init__.py
│   │   │   │       └── write_outputs.py
│   │   │   ├── operators/
│   │   │   │   ├── __init__.py
│   │   │   │   └── test_operator_with_variable_resolving.py
│   │   │   └── tasks/
│   │   │       ├── __init__.py
│   │   │       ├── test_create_cloudformation_stack.py
│   │   │       ├── test_delete_cloudformation_stack.py
│   │   │       ├── test_job_end.py
│   │   │       ├── test_job_start.py
│   │   │       ├── test_python.py
│   │   │       └── test_spark_task.py
│   │   └── apps/
│   │       ├── __init__.py
│   │       ├── test/
│   │       │   └── liminal.yml
│   │       ├── test_app/
│   │       │   ├── __init__.py
│   │       │   ├── defaults/
│   │       │   │   ├── __init__.py
│   │       │   │   └── liminal.yml
│   │       │   ├── extra/
│   │       │   │   ├── __init__.py
│   │       │   │   └── liminal.yml
│   │       │   ├── helloworld/
│   │       │   │   ├── __init__.py
│   │       │   │   └── hellp_world.py
│   │       │   ├── liminal.yml
│   │       │   └── my_server/
│   │       │       ├── __init__.py
│   │       │       └── my_server.py
│   │       └── test_spark_app/
│   │           ├── __init__.py
│   │           ├── liminal.yml
│   │           └── wordcount/
│   │               ├── __init__.py
│   │               ├── requirements.txt
│   │               ├── wordcount.py
│   │               └── words.txt
│   ├── test_licenses.py
│   └── util/
│       ├── __init__.py
│       ├── dag_test_utils.py
│       ├── test_class_utils.py
│       └── test_pkg_1/
│           ├── __init__.py
│           ├── test_clazz_base.py
│           └── test_pkg_1_1/
│               ├── __init__.py
│               ├── test_clazz_child_1.py
│               ├── test_clazz_child_2.py
│               ├── test_pkg_1_1_1/
│               │   ├── __init__.py
│               │   └── test_clazz_leaf_1.py
│               └── test_pkg_1_1_2/
│                   ├── __init__.py
│                   └── test_clazz_leaf_2.py
└── yamllint-config.yml

================================================
FILE CONTENTS
================================================

================================================
FILE: .asf.yaml
================================================
---
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

github:
  description: Apache Liminals goal is to operationalise the machine learning process, allowing data scientists to quickly transition from a successful
    experiment to an automated pipeline of model training, validation, deployment and inference in production. Liminal provides a Domain Specific Language
    to build ML workflows on top of Apache Airflow.
  homepage: https://liminal.apache.org
  labels:
    - data-science
    - big-data
    - machine-learning
    - airflow
    - ai
    - ml
    - workflows

notifications:
  commits: commits@liminal.apache.org
  issues: issues@liminal.apache.org
  pullrequests: dev@liminal.apache.org
  jira_options: link label worklog


================================================
FILE: .bumpversion.cfg
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

[bumpversion]
current_version = 0.0.1rc6
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\.?\w*)(\-(?P<release>[a-z]+)(?P<build>\d+))?
serialize =
	{major}.{minor}.{patch}-dev{build}
	{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = prod
first_value = dev
values =
 dev
 prod


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## Changes Title (replace this with a logical title for your changes)

*Issue #, if available:*

### *Description of changes:*

Replace this with the PR description (mention the changes you have made, why you have made them, provide some background and any references to the provider documentation if needed, etc.).

For more information on contributing, please see [Contributing](../CONTRIBUTING.md)
section of our documentation.

### Status

Replace this: describe the PR status. Examples:

- WIP - work in progress
- DONE - ready for review

### Checklist (tick everything that applies)

- [ ] [PreCommitChecks - Code linting](../CONTRIBUTING.md#InstallRunPreCommit) (required)
- [ ] [Tests](../CONTRIBUTING.md#RunningTests)


================================================
FILE: .github/workflows/pre_commits.yml
================================================
---
name: PreCommitChecks

on:
  push:
    branches:
      - master
      # - '!release*'
  pull_request:
    branches:
      - master
      # - '!release*'
jobs:
  linting:
    name: Run pre-commit hooks on py3.6
    runs-on: ubuntu-20.04
    steps:
      #----------------------------------------------
      # Checkout, SetUp Python, Load Cache and Run PreCommitChecks
      #----------------------------------------------
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0

      # Install Python
      - uses: actions/setup-python@v2
        with:
          python-version: 3.6

      - name: Check Python ${{ matrix.python-version }} version
        run: python -V

      - uses: actions/cache@v2
        with:
          path: ~/.cache/pip
          key: ${{ runner.os }}-pip
          restore-keys: ${{ runner.os }}-pip

      - name: Install python requirements
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
          pip install pre-commit

      # load cached venv if cache exists
      - name: Load cached venv
        id: cached-poetry-dependencies
        uses: actions/cache@v2
        with:
          path: .venv
          key: venv-${{ runner.os }}-${{ hashFiles('**/requirements.txt') }}

      - run: pre-commit install
      - name: Run pre-commit hooks on all the files
        run: pre-commit run --all-files --show-diff-on-failure --color always --verbose
      # - uses: pre-commit/action@v2.0.3
      #   with:
      #     token: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/unittests.yml
================================================
---
name: Running unittest

on:
  # Trigger the workflow on push or pull request, but only for the master branch
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  # Trigger the workflow on cron schedule
  schedule:
    - cron: '7 0 * * *'

jobs:
  unittest:
    runs-on: ubuntu-20.04
    timeout-minutes: 20
    steps:
      - name: Checkout
        uses: actions/checkout@v1
      - name: Setup Minikube
        uses: manusa/actions-setup-minikube@v2.7.1
        with:
          minikube version: 'v1.26.1'
          kubernetes version: 'v1.25.0'
          github token: ${{ secrets.GITHUB_TOKEN }}
      - name: Set up Python
        uses: actions/setup-python@v1
        with:
          python-version: '3.6'
      - name: Install python requirements
        run: |
          python -m pip install --upgrade pip
          pip install -r requirements.txt
      - name: Run unittest - runners
        run: ./run_tests.sh
  approve:
    needs: unittest
    runs-on: ubuntu-20.04

    steps:
      - run: | # approve the pull request
          curl --request POST \
          --url https://api.github.com/repos/${{github.repository}}/pulls/${{github.event.number}}/reviews \
          --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \
          --header 'content-type: application/json' \
          -d '{"event":"APPROVE"}'


================================================
FILE: .github/workflows/versioning.yml
================================================
---
name: Versioning RC

on:
  workflow_run:
    workflows: [Running unittest]
    branches: [master]
    types:
      - completed

jobs:
  versioning-auto-commit:
    if: ${{ github.event.workflow_run.conclusion == 'success' }}
    name: Publish package
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v1
      - name: Set up Python
        uses: actions/setup-python@v1
        with:
          python-version: '3.6'
      - name: Install dependencies for setup
        run: |
          python -m pip install --upgrade pip
          pip install bump2version
      - name: Project versioning
        run: bumpversion build
      - name: Commit report
        run: |
          git config --global user.name 'Liminal Bot'
          git commit -am "Increment version"
          git push


================================================
FILE: .gitignore
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
env
.idea
bin
include
lib
venv
/build
.Python
*.pyc
pip-selfcheck.json
.DS_Store
/build
apache_liminal.egg-info
scripts/*.tar.gz
scripts/*.whl
dist


================================================
FILE: .pre-commit-config.yaml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
minimum_pre_commit_version: 2.16.0
exclude: >
  (?x)^(
      .+/.venv/.+|.+/dist/.+|.+/.autovenv|.+/docs/|.github
  )$
fail_fast: true
default_language_version:
  python: python3
default_stages:
  - prepare-commit-msg
  - commit
  # - push

repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.0.1
    hooks:
      - id: check-case-conflict
      - id: check-merge-conflict
        stages:
          - commit
      - id: check-added-large-files
        args: [--maxkb=1000]
        stages:
          - commit
      - id: detect-aws-credentials
        args:
          - --allow-missing-credentials
      - id: fix-encoding-pragma
        args:
          - --remove
      - id: detect-private-key
      - id: destroyed-symlinks
      - id: mixed-line-ending
      - id: trailing-whitespace
      - id: check-toml
      # To match test*.py instead
      # - id: name-tests-test
      #   args: [--django]
      #   exclude: |
      #     - tests/test_licenses
      - id: end-of-file-fixer
        description: Ensures that a file is either empty, or ends with one newline.
        exclude_types: [sql]
        # types: [text]
      - id: pretty-format-json
        args:
          - --autofix
          - --no-sort-keys
          - --indent
          - '2'
        # files:
        pass_filenames: true

  - repo: https://github.com/Lucas-C/pre-commit-hooks
    rev: v1.1.10
    hooks:
      - id: insert-license
        name: Add license for all md files
        files: \.md$
        exclude: ^\.github/.*$
        args:
          - --comment-style
          - <!--|| -->
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo
      - id: insert-license
        name: Add license for all SQL files
        files: \.sql$
        exclude: ^\.github/.*$|^airflow/_vendor/
        args:
          - --comment-style
          - /*||*/
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo
      - id: insert-license
        name: Add license for all other files
        exclude: ^\.github/.*$|^airflow/_vendor/
        args:
          - --comment-style
          - '|#|'
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo
        files: >
          \.properties$|\.cfg$|\.conf$|\.ini$|\.ldif$|\.readthedocs$|\.service$|\.tf$|Dockerfile.*$
      - id: insert-license
        name: Add license for all JINJA template files
        files: ^airflow/www/templates/.*\.html$|^docs/templates/.*\.html$.*\.jinja2
        exclude: ^\.github/.*$^airflow/_vendor/
        args:
          - --comment-style
          - '{#||#}'
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo
      - id: insert-license
        name: Add license for all shell files
        exclude: ^\.github/.*$|^airflow/_vendor/
        files: ^breeze$|^breeze-complete$|\.sh$|\.bash$|\.bats$
        args:
          - --comment-style
          - '|#|'
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo
      - id: insert-license
        name: Add license for all Python files
        exclude: ^\.github/.*$|^airflow/_vendor/
        types: [python]
        args:
          - --comment-style
          - '|#|'
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo
      - id: insert-license
        name: Add license for all YAML files
        exclude: ^\.github/.*$|^airflow/_vendor/
        types: [yaml]
        files: \.yml$|\.yaml$
        args:
          - --comment-style
          - '|#|'
          - --license-filepath
          - dev/LICENSE.txt
          - --fuzzy-match-generates-todo

  # Python: Black formatter
  - repo: https://github.com/psf/black
    rev: 21.12b0
    hooks:
      - id: black
        args: [--safe, --quiet, --config=./pyproject.toml]
        files: \.pyi?$
        exclude: .github/
        # override until resolved: https://github.com/psf/black/issues/402
        types: []

  # - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
  #   rev: 0.1.0
  #   hooks:
  #     - id: yamlfmt
  #       args: [--mapping, '2', --sequence, '4', --offset, '2']

  # Yaml: lint
  - repo: https://github.com/adrienverge/yamllint
    rev: v1.26.3
    hooks:
      - id: yamllint
        name: Check YAML files with yamllint
        entry: yamllint -c yamllint-config.yml # --strict
        types: [yaml]

  # Python - isort to sort imports in Python files
  - repo: https://github.com/timothycrosley/isort
    rev: 5.10.1
    hooks:
      - id: isort
        name: Run isort to sort imports in Python files
        args: [--profile, black]
        files: \.py$
        # To keep consistent with the global isort skip config defined in setup.cfg
        exclude: ^build/.*$|^.tox/.*$|^venv/.*$

  # PyUpgrade - 3.6
  - repo: https://github.com/asottile/pyupgrade
    rev: v2.29.1
    hooks:
      - id: pyupgrade
        args: [--py36-plus]
        exclude: ^scripts/|^docs

  - repo: https://github.com/asottile/blacken-docs
    rev: v1.12.0
    hooks:
      - id: blacken-docs
        alias: black
        additional_dependencies: [black==21.9b0]

  # - repo: https://github.com/PyCQA/bandit
  #   rev: 1.7.1
  #   hooks:
  #     - id: bandit
  #       args: [-ll, -r, liminal]
  #       files: .py$
  #       exclude: tests/test_licenses.py|^tests/runners


================================================
FILE: CONTRIBUTING.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Contributing to Liminal docs

Apache Liminal use Sphinx and readthedocs.org to create and publish documentation to readthedocs.org

## Basic setup
Here is what you need to do in order to work and create docs locally

## Installing sphinx and documentation

More details  about sphinx and readthedocs can be found [here:](https://docs.readthedocs.io/en/stable/intro/getting-started-with-sphinx.html)

Here is what you need to do:
```
pip install sphinx
```
Inside your project go to docs lib, update the docs you like and build them:
```
cd docs
make html
```

## Automatically rebuild sphinx docs
```
pip install sphinx-autobuild
cd docs
sphinx-autobuild source/ build/html/
```
Now open a browser on
http://localhost:8000/
Your docs will automatically update

## Rebuilding the pythondoc liminal code documentation

```
cd docs
sphinx-apidoc -o source/ ../liminal
```

## RunningTests

When doing local development and running Liminal unit-tests, make sure to set LIMINAL_STAND_ALONE_MODE=True

1. Setup Minikube
2.Install python requirements: `pip install -r requirements.txt`
3. Run tests: `./run_tests.sh`

## InstallRunPreCommit

[pre-commit](https://pre-commit.com/) is used to install Python code linting and formatting tools:

### Getting started

**Requires `python >=3.6`, `pre-commit>=1.14` and a `git` repository**

1. Run `pip install pre-commit` or `pip install  -r requirements.txt`
2. Install the hooks:
  Simple install without post hooks: `pre-commit install`

  OR
  Install the hooks: `pre-commit install --install-hooks`

  Optional:
  Install the post commit hooks: `pre-commit install --hook-type post-commit`

1. To run pre commit hooks:
   1. Either run `git commit` the new configuration files

   1. Run `pre-commit run -a` to lint and format your entire project

Now on every commit, `pre-commit` will use a git hook to run the tools.
**Warning: the first commit will take some time because the tools are being installed by
`pre-commit`**

`pre-commit run -a` to lint and format your entire project

### Resolving failed commits

* If `black` fail, they have reformatted your code.
* You should check the changes made. Then simply "git add --update ." and re-commit or `git add` and `git commit` the changes.

Example:

![Fixing black failed commits](https://user-images.githubusercontent.com/16241795/146011210-7bc11b24-2033-43f7-8150-5ece4fe7bfea.png)

### EnabledHooks

1. [black](https://black.readthedocs.io/en/stable/): a Python automatic code formatter
1. [yamllint](https://yamllint.readthedocs.io/): A linter for YAML files.
  yamllint does not only check for syntax validity, but for weirdnesses like key repetition and cosmetic problems such as lines length, trailing spaces, indentation, etc.
1. [OutOfBoxHooks](https://github.com/pre-commit/pre-commit-hooks) - Out-of-the-box hooks for pre-commit like Check for files that  contain merge conflict strings
1. [Bandit](https://bandit.readthedocs.io/en/latest/) is a tool designed to find common security issues in Python code.
1. [blacken-docs](https://github.com/asottile/blacken-docs) Run `black` on python code blocks in documentation files
1. [pyupgrade](https://github.com/asottile/pyupgrade) A tool (and pre-commit hook) to automatically upgrade syntax for newer versions of the language.
1. [isort](https://pycqa.github.io/isort/) A Python utility / library to sort imports.


================================================
FILE: DISCLAIMER-WIP
================================================
Apache Liminal is an effort undergoing incubation at the Apache Software
Foundation (ASF), sponsored by the Apache Incubator PMC.

Incubation is required of all newly accepted projects until a further review
indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects.

While incubation status is not necessarily a reflection of the completeness
or stability of the code, it does indicate that the project has yet to be
fully endorsed by the ASF.

Some of the incubating project’s releases may not be fully compliant with ASF policy.
For example, releases may have incomplete or un-reviewed licensing conditions.
What follows is a list of known issues the project is currently aware of
(note that this list, by definition, is likely to be incomplete):

1. The Liminal source code is distributed under the Apache License, Version 2.0. However
building Liminal requires using a transitive required library chardet V 4.0.0, which is under LGPL license, Version 2.1.

If you are planning to incorporate this work into your product/project,
please be aware that you will need to conduct a thorough licensing review
to determine the overall implications of including this work.


================================================
FILE: LICENSE
================================================
                              Apache License
                        Version 2.0, January 2004
                     http://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

1. Definitions.

   "License" shall mean the terms and conditions for use, reproduction,
   and distribution as defined by Sections 1 through 9 of this document.

   "Licensor" shall mean the copyright owner or entity authorized by
   the copyright owner that is granting the License.

   "Legal Entity" shall mean the union of the acting entity and all
   other entities that control, are controlled by, or are under common
   control with that entity. For the purposes of this definition,
   "control" means (i) the power, direct or indirect, to cause the
   direction or management of such entity, whether by contract or
   otherwise, or (ii) ownership of fifty percent (50%) or more of the
   outstanding shares, or (iii) beneficial ownership of such entity.

   "You" (or "Your") shall mean an individual or Legal Entity
   exercising permissions granted by this License.

   "Source" form shall mean the preferred form for making modifications,
   including but not limited to software source code, documentation
   source, and configuration files.

   "Object" form shall mean any form resulting from mechanical
   transformation or translation of a Source form, including but
   not limited to compiled object code, generated documentation,
   and conversions to other media types.

   "Work" shall mean the work of authorship, whether in Source or
   Object form, made available under the License, as indicated by a
   copyright notice that is included in or attached to the work
   (an example is provided in the Appendix below).

   "Derivative Works" shall mean any work, whether in Source or Object
   form, that is based on (or derived from) the Work and for which the
   editorial revisions, annotations, elaborations, or other modifications
   represent, as a whole, an original work of authorship. For the purposes
   of this License, Derivative Works shall not include works that remain
   separable from, or merely link (or bind by name) to the interfaces of,
   the Work and Derivative Works thereof.

   "Contribution" shall mean any work of authorship, including
   the original version of the Work and any modifications or additions
   to that Work or Derivative Works thereof, that is intentionally
   submitted to Licensor for inclusion in the Work by the copyright owner
   or by an individual or Legal Entity authorized to submit on behalf of
   the copyright owner. For the purposes of this definition, "submitted"
   means any form of electronic, verbal, or written communication sent
   to the Licensor or its representatives, including but not limited to
   communication on electronic mailing lists, source code control systems,
   and issue tracking systems that are managed by, or on behalf of, the
   Licensor for the purpose of discussing and improving the Work, but
   excluding communication that is conspicuously marked or otherwise
   designated in writing by the copyright owner as "Not a Contribution."

   "Contributor" shall mean Licensor and any individual or Legal Entity
   on behalf of whom a Contribution has been received by Licensor and
   subsequently incorporated within the Work.

2. Grant of Copyright License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   copyright license to reproduce, prepare Derivative Works of,
   publicly display, publicly perform, sublicense, and distribute the
   Work and such Derivative Works in Source or Object form.

3. Grant of Patent License. Subject to the terms and conditions of
   this License, each Contributor hereby grants to You a perpetual,
   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
   (except as stated in this section) patent license to make, have made,
   use, offer to sell, sell, import, and otherwise transfer the Work,
   where such license applies only to those patent claims licensable
   by such Contributor that are necessarily infringed by their
   Contribution(s) alone or by combination of their Contribution(s)
   with the Work to which such Contribution(s) was submitted. If You
   institute patent litigation against any entity (including a
   cross-claim or counterclaim in a lawsuit) alleging that the Work
   or a Contribution incorporated within the Work constitutes direct
   or contributory patent infringement, then any patent licenses
   granted to You under this License for that Work shall terminate
   as of the date such litigation is filed.

4. Redistribution. You may reproduce and distribute copies of the
   Work or Derivative Works thereof in any medium, with or without
   modifications, and in Source or Object form, provided that You
   meet the following conditions:

   (a) You must give any other recipients of the Work or
       Derivative Works a copy of this License; and

   (b) You must cause any modified files to carry prominent notices
       stating that You changed the files; and

   (c) You must retain, in the Source form of any Derivative Works
       that You distribute, all copyright, patent, trademark, and
       attribution notices from the Source form of the Work,
       excluding those notices that do not pertain to any part of
       the Derivative Works; and

   (d) If the Work includes a "NOTICE" text file as part of its
       distribution, then any Derivative Works that You distribute must
       include a readable copy of the attribution notices contained
       within such NOTICE file, excluding those notices that do not
       pertain to any part of the Derivative Works, in at least one
       of the following places: within a NOTICE text file distributed
       as part of the Derivative Works; within the Source form or
       documentation, if provided along with the Derivative Works; or,
       within a display generated by the Derivative Works, if and
       wherever such third-party notices normally appear. The contents
       of the NOTICE file are for informational purposes only and
       do not modify the License. You may add Your own attribution
       notices within Derivative Works that You distribute, alongside
       or as an addendum to the NOTICE text from the Work, provided
       that such additional attribution notices cannot be construed
       as modifying the License.

   You may add Your own copyright statement to Your modifications and
   may provide additional or different license terms and conditions
   for use, reproduction, or distribution of Your modifications, or
   for any such Derivative Works as a whole, provided Your use,
   reproduction, and distribution of the Work otherwise complies with
   the conditions stated in this License.

5. Submission of Contributions. Unless You explicitly state otherwise,
   any Contribution intentionally submitted for inclusion in the Work
   by You to the Licensor shall be under the terms and conditions of
   this License, without any additional terms or conditions.
   Notwithstanding the above, nothing herein shall supersede or modify
   the terms of any separate license agreement you may have executed
   with Licensor regarding such Contributions.

6. Trademarks. This License does not grant permission to use the trade
   names, trademarks, service marks, or product names of the Licensor,
   except as required for reasonable and customary use in describing the
   origin of the Work and reproducing the content of the NOTICE file.

7. Disclaimer of Warranty. Unless required by applicable law or
   agreed to in writing, Licensor provides the Work (and each
   Contributor provides its Contributions) on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
   implied, including, without limitation, any warranties or conditions
   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
   PARTICULAR PURPOSE. You are solely responsible for determining the
   appropriateness of using or redistributing the Work and assume any
   risks associated with Your exercise of permissions under this License.

8. Limitation of Liability. In no event and under no legal theory,
   whether in tort (including negligence), contract, or otherwise,
   unless required by applicable law (such as deliberate and grossly
   negligent acts) or agreed to in writing, shall any Contributor be
   liable to You for damages, including any direct, indirect, special,
   incidental, or consequential damages of any character arising as a
   result of this License or out of the use or inability to use the
   Work (including but not limited to damages for loss of goodwill,
   work stoppage, computer failure or malfunction, or any and all
   other commercial damages or losses), even if such Contributor
   has been advised of the possibility of such damages.

9. Accepting Warranty or Additional Liability. While redistributing
   the Work or Derivative Works thereof, You may choose to offer,
   and charge a fee for, acceptance of support, warranty, indemnity,
   or other liability obligations and/or rights consistent with this
   License. However, in accepting such obligations, You may act only
   on Your own behalf and on Your sole responsibility, not on behalf
   of any other Contributor, and only if You agree to indemnify,
   defend, and hold each Contributor harmless for any liability
   incurred by, or claims asserted against, such Contributor by reason
   of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

   To apply the Apache License to your work, attach the following
   boilerplate notice, with the fields enclosed by brackets "[]"
   replaced with your own identifying information. (Don't include
   the brackets!)  The text should be enclosed in the appropriate
   comment syntax for the file format. We also recommend that a
   file or class name and description of purpose be included on the
   same "printed page" as the copyright notice for easier
   identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


================================================
FILE: MANIFEST.in
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


include scripts/* requirements.txt requirements-airflow.txt
recursive-include liminal/build/ *


================================================
FILE: NOTICE
================================================
Apache Liminal
Copyright 2020-2023 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Licensed under the Apache License 2.0.


================================================
FILE: README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Apache Liminal

Apache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,
train and deploy machine learning models in a robust and agile way.

The platform provides the abstractions and declarative capabilities for
data extraction & feature engineering followed by model training and serving.
Liminal's goal is to operationalize the machine learning process, allowing data scientists to
quickly transition from a successful experiment to an automated pipeline of model training,
validation, deployment and inference in production, freeing them from engineering and
non-functional tasks, and allowing them to focus on machine learning code and artifacts.

# Basics

Using simple YAML configuration, create your own schedule data pipelines (a sequence of tasks to
perform), application servers,  and more.

## Getting Started

A simple getting stated guide for Liminal can be found [here](docs/getting-started/hello_world.md)

## Apache Liminal Documentation

Full documentation of Apache Liminal can be found [here](docs/liminal)

## High Level Architecture

High level architecture documentation can be found [here](docs/architecture.md)

## Example YAML config file

```yaml
---
name: MyLiminalStack
owner: Bosco Albert Baracus
volumes:
  - volume: myvol1
    local:
      path: /Users/me/myvol1
images:
  - image: my_python_task_img
    type: python
    source: write_inputs
  - image: my_parallelized_python_task_img
    source: write_outputs
  - image: my_server_image
    type: python_server
    source: myserver
    endpoints:
      - endpoint: /myendpoint1
        module: my_server
        function: myendpoint1func
pipelines:
  - pipeline: my_pipeline
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    metrics:
      namespace: TestNamespace
      backends: [ 'cloudwatch' ]
    tasks:
      - task: my_python_task
        type: python
        description: static input task
        image: my_python_task_img
        env_vars:
          NUM_FILES: 10
          NUM_SPLITS: 3
        mounts:
          - mount: mymount
            volume: myvol1
            path: /mnt/vol1
        cmd: python -u write_inputs.py
      - task: my_parallelized_python_task
        type: python
        description: parallelized python task
        image: my_parallelized_python_task_img
        env_vars:
          FOO: BAR
        executors: 3
        mounts:
          - mount: mymount
            volume: myvol1
            path: /mnt/vol1
        cmd: python -u write_inputs.py
services:
  - service: my_python_server
    description: my python server
    image: my_server_image
```

# Installation

1. Install this repository (HEAD)

```bash
   pip install git+https://github.com/apache/incubator-liminal.git
```

2. Optional: set LIMINAL_HOME to path of your choice (if not set, will default to ~/liminal_home)

```bash
echo 'export LIMINAL_HOME=</path/to/some/folder>' >> ~/.bash_profile && source ~/.bash_profile
```

# Authoring pipelines

This involves at minimum creating a single file called liminal.yml as in the example above.

If your pipeline requires custom python code to implement tasks, they should be organized
[like this](https://github.com/apache/incubator-liminal/tree/master/tests/runners/airflow/liminal)

If your pipeline  introduces imports of external packages which are not already a part
of the liminal framework (i.e. you had to pip install them yourself), you need to also provide
a requirements.txt in the root of your project.

# Testing the pipeline locally

When your pipeline code is ready, you can test it by running it locally on your machine.

1. Ensure you have The Docker engine running locally, and enable a local Kubernetes cluster:

  ![Kubernetes configured](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/k8s_running.png)

  And allocate it at least 3 CPUs (under "Resources" in the Docker preference UI).

  If you want to execute your pipeline on a remote kubernetes cluster, make sure the cluster is configured using:

  ```bash
  kubectl config set-context <your remote kubernetes cluster>
  ```

2. Build the docker images used by your pipeline.

In the example pipeline above, you can see that tasks and services have an "image" field - such as
"my_static_input_task_image". This means that the task is executed inside a docker container, and the docker container
is created from a docker image where various code and libraries are installed.

You can take a look at what the build process looks like, e.g.
[here](https://github.com/apache/incubator-liminal/tree/master/liminal/build/image/python)

In order for the images to be available for your pipeline, you'll need to build them locally:

```bash
cd </path/to/your/liminal/code>
liminal build
```

You'll see that a number of outputs indicating various docker images built.

3. Create a kubernetes local volume \
In case your Yaml includes working with [volumes](https://github.com/apache/incubator-liminal/blob/6253f8b2c9dc244af032979ec6d462dc3e07e170/docs/getting_started.md#mounted-volumes)
please first run the following command:

```bash
cd </path/to/your/liminal/code>
liminal create
```

4. Deploy the pipeline:

```bash
cd </path/to/your/liminal/code>
liminal deploy
```

Note: after upgrading liminal, it's recommended to issue the command

```bash
liminal deploy --clean
```

This will rebuild the airlfow docker containers from scratch with a fresh version of liminal, ensuring consistency.

5. Start the server

```bash
liminal start
```

6. Stop the server

```bash
liminal stop
```

7. Display the server logs

```bash
liminal logs --follow/--tail

Number of lines to show from the end of the log:
liminal logs --tail=10

Follow log output:
liminal logs --follow
```

8. Navigate to [http://localhost:8080/admin](http://localhost:8080/admin)

9. You should see your ![pipeline](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/airflow.png)
The pipeline is scheduled to run according to the ```json schedule: 0 * 1 * *``` field in the .yml file you provided.

10. To manually activate your pipeline:

- Click your pipeline and then click "trigger DAG"
- Click "Graph view"
You should see the steps in your pipeline getting executed in "real time" by clicking "Refresh" periodically.

![Pipeline activation](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/airflow_trigger.png)

# Contributing

More information on contributing can be found [here](CONTRIBUTING.md)

# Community

The Liminal community holds a public call every Monday

- [Liminal Community Calendar](https://calendar.google.com/calendar/u/0/r?cid=jom1i20emghura6s6ookhe2skk@group.calendar.google.com)
- [Dev-Mailing-List](https://lists.apache.org/list.html?dev@liminal.apache.org)

## Running Tests (for contributors)

When doing local development and running Liminal unit-tests, make sure to set LIMINAL_STAND_ALONE_MODE=True


================================================
FILE: RETIRED.txt
================================================
This podling has been retired. Please see http://incubator.apache.org/projects/index.html#liminal


================================================
FILE: dev/LICENSE.txt
================================================
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.


================================================
FILE: dev/RELEASE.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->


- [Apache Liminal source releases](#apache-liminal-source-releases)
  - [Apache Liminal Package](#apache-liminal-package)
- [Prerequisites for the release manager preparing the release](#prerequisites-for-the-release-manager-preparing-the-release)
  - [version IDs](#version-ids)
  - [Testing the release locally](#testing-the-release-locally)
  - [Upload Public keys to id.apache.org](#upload-public-keys-to-idapacheorg)
  - [Configure PyPI uploads](#configure-pypi-uploads)
  - [Hardware used to prepare and verify the packages](#hardware-used-to-prepare-and-verify-the-packages)
- [Apache Liminal packages](#apache-liminal-packages)
  - [Prepare the Apache Liminal Package RC](#prepare-the-apache-liminal-package-rc)
    - [Build RC artifacts (both source packages and convenience packages)](#build-rc-artifacts-both-source-packages-and-convenience-packages)
    - [Prepare PyPI convenience "snapshot" packages](#prepare-pypi-convenience-snapshot-packages)
  - [Vote and verify the Apache Liminal release candidate](#vote-and-verify-the-apache-liminal-release-candidate)
    - [Prepare Vote email on the Apache Liminal release candidate](#prepare-vote-email-on-the-apache-liminal-release-candidate)
    - [Verify the release candidate by PMCs (legal)](#verify-the-release-candidate-by-pmcs-legal)
      - [PMC responsibilities](#pmc-responsibilities)
      - [SVN check](#svn-check)
      - [Verify the licences](#verify-the-licences)
      - [Verify the signatures](#verify-the-signatures)
      - [Verify the SHA512 sum](#verify-the-sha512-sum)
    - [Verify if the release candidate "works" by Contributors](#verify-if-the-release-candidate-works-by-contributors)
  - [Publish the final Apache Liminal release](#publish-the-final-apache-liminal-release)
    - [Summarize the voting for the Apache Liminal release](#summarize-the-voting-for-the-apache-liminal-release)
    - [Publish release to SVN](#publish-release-to-svn)
    - [Prepare PyPI "release" packages](#prepare-pypi-release-packages)
    - [Update CHANGELOG.md](#update-changelogmd)
    - [Notify developers of release](#notify-developers-of-release)

<!-- END doctoc generated TOC please keep comment here to allow auto update -->

# Apache Liminal source releases

## Apache Liminal Package

This package contains sources that allow the user building fully-functional Apache Liminal package.
They contain sources for "apache-liminal" python package that installs "liminal" Python package and includes
all the assets required to release the webserver UI coming with Apache Liminal

The Source releases are the only "official" Apache Software Foundation releases, and they are distributed
via [Official Apache Download sources](https://downloads.apache.org/)

Following source releases Apache Liminal release manager also distributes convenience packages:

* PyPI packages released via https://pypi.org/project/apache-liminal/
* Docker Images released via https://hub.docker.com/repository/docker/apache/liminal (coming soon...)

Those convenience packages are not "official releases" of Apache Liminal, but the users who
cannot or do not want to build the packages themselves can use them as a convenient way of installing
Apache Liminal, however they are not considered as "official source releases". You can read more
details about it in the [ASF Release Policy](http://www.apache.org/legal/release-policy.html).

This document describes the process of releasing both - official source packages and convenience
packages for Apache Liminal packages.

# Prerequisites for the release manager preparing the release

The person acting as release manager has to fulfill certain pre-requisites. More details and FAQs are
available in the [ASF Release Policy](http://www.apache.org/legal/release-policy.html) but here some important
pre-requisites are listed below. Note that release manager does not have to be a PMC - it is enough
to be committer to assume the release manager role, but there are final steps in the process (uploading
final releases to SVN) that can only be done by PMC member. If needed, the release manager
can ask PMC to perform that final step of release.

## version IDs

Should follow https://www.python.org/dev/peps/pep-0440/

## Testing the release locally

- Build a local version of liminal:
```bash
# Set Version
export LIMINAL_BUILD_VERSION=0.0.1rc1-incubating
python setup.py sdist bdist_wheel
```

- Start a test project with a liminal .yml file in it

- Clear folder $LIMINAL_HOME

- Install liminal in the test project

```bash
pip install <path to lminal .whl created by setup.py>
liminal build
liminal deploy --clean
liminal start
```

Note:
When you run liminal deploy, a liminal installs itself inside airflow container.
You can control which version of liminal gets installed inside docker using LIMINAL_VERSION environment variable.

This can be any string which results in a legal call to:
```bash
pip install ${LIMINAL_VERSION}
```

This includes
- A standard pip version like apache-liminal==0.0.1dev1 avail from pypi
- A URL for git e.g. git+https://github.com/apache/incubator-liminal.git
- A string indicating where to get the package from like --index <url> apache-liminal==xyz
- A local path to a .whl which is available inside the docker (e.g. which you placed
in the scripts/ folder)

This is useful if you are making changes in liminal locally and want to test them.

If you don't specify this variable, liminal attempts to discover how to install itself
by running a pip freeze and looking at the result.
This covers pip repositories, files and installtion from URL.

The fallback in case no string is found, is simply 'apache-liminal' assuming your .pypirc contains an
index which has this package.


## Testing a version from testpypi:

Installing liminal locally:
pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple apache-liminal

Tell liminal where to take the version to put inside the airflow docker:
export LIMINAL_VERSION='--index-url https://test.pypi.org/simple/ apache-liminal==0.0.1rc1-incubating'

## Upload Public keys to id.apache.org

Make sure your public key is on id.apache.org and in KEYS. You will need to sign the release artifacts
with your pgp key. After you have created a key, make sure you:

- Add your GPG pub key to https://dist.apache.org/repos/dist/release/liminal/KEYS , follow the instructions at the top of that file. Upload your GPG public key to https://pgp.mit.edu
- Add your key fingerprint to https://id.apache.org/ (login with your apache credentials, paste your fingerprint into the pgp fingerprint field and hit save).

```shell script
# Create PGP Key
gpg --gen-key

# Checkout ASF dist repo
svn checkout https://dist.apache.org/repos/dist/release/incubator/liminal
cd liminal


# Add your GPG pub key to KEYS file. Replace "Aviem Zur" with your name
(gpg --list-sigs "Aviem Zur" && gpg --armor --export "Aviem Zur" ) >> KEYS


# Commit the changes
svn commit -m "Add PGP keys of Liminal developers"
```

See this for more detail on creating keys and what is required for signing releases.

http://www.apache.org/dev/release-signing.html#basic-facts

## Configure PyPI uploads

In order to not reveal your password in plain text, it's best if you create and configure API Upload tokens.
You can add and copy the tokens here:

* [Test PyPI](https://test.pypi.org/manage/account/token/)
* [Prod PyPI](https://pypi.org/manage/account/token/)


Create a `~/.pypirc` file:

```ini
[distutils]
index-servers =
  pypi
  pypitest

[pypi]
username=__token__
password=<API Upload Token>

[pypitest]
repository=https://test.pypi.org/legacy/
username=__token__
password=<API Upload Token>
```

Set proper permissions for the pypirc file:

```shell script
$ chmod 600 ~/.pypirc
```

- Install [twine](https://pypi.org/project/twine/) if you do not have it already (it can be done
  in a separate virtual environment).

```shell script
pip install twine
```

(more details [here](https://peterdowns.com/posts/first-time-with-pypi.html).)

- Set proper permissions for the pypirc file:
`$ chmod 600 ~/.pypirc`

## Hardware used to prepare and verify the packages

The best way to prepare and verify the releases is to prepare them on a hardware owned and controlled
by the committer acting as release manager. While strictly speaking, releases must only be verified
on hardware owned and controlled by the committer, for practical reasons it's best if the packages are
prepared using such hardware. More information can be found in this
[FAQ](http://www.apache.org/legal/release-policy.html#owned-controlled-hardware)

# Apache Liminal packages

## Prepare the Apache Liminal Package RC

### Build RC artifacts (both source packages and convenience packages)

The Release Candidate artifacts we vote upon should be the exact ones we vote against, without any modification than renaming – i.e. the contents of the files must be the same between voted release canidate and final release. Because of this the version in the built artifacts that will become the official Apache releases must not include the rcN suffix.

- Set environment variables

```bash
# Set Version
export LIMINAL_BUILD_VERSION=0.0.1rc1-incubating

# Example after cloning
git clone https://github.com/apache/incubating-liminal.git
cd incubating-liminal
```

- Mark the next version
```bash
# Set Version
sed -i '/^current_version /s/=.*$/= '"$LIMINAL_BUILD_VERSION"'/' .bumpversion.cfg
```

- Tag your release

```bash
git tag -a ${LIMINAL_BUILD_VERSION} -m "some message"

```

- Clean the checkout: the sdist step below will

```bash
git clean -fxd
```

- Tarball the repo

```bash
git archive --format=tar.gz ${LIMINAL_BUILD_VERSION} --prefix=apache-liminal-${LIMINAL_BUILD_VERSION}/ -o apache-liminal-${LIMINAL_BUILD_VERSION}-source.tar.gz
```


- Generate sdist

    NOTE: Make sure your checkout is clean at this stage - any untracked or changed files will otherwise be included
     in the file produced.

```bash
python setup.py sdist bdist_wheel
```

- Generate SHA512/ASC (If you have not generated a key yet, generate it by following instructions on http://www.apache.org/dev/openpgp.html#key-gen-generate-key)

```bash
dev/sign.sh apache-liminal-${LIMINAL_BUILD_VERSION}-source.tar.gz
dev/sign.sh dist/apache-liminal-${LIMINAL_BUILD_VERSION}.tar.gz
dev/sign.sh dist/apache_liminal-${LIMINAL_BUILD_VERSION}-py3-none-any.whl
```

- Push Tags

```bashs
git push origin ${LIMINAL_BUILD_VERSION}
```

- Push the artifacts to ASF dev dist repo
```
# First clone the repo
svn checkout https://dist.apache.org/repos/dist/dev/liminal liminal-dev

# Create new folder for the release
cd liminal-dev
svn mkdir ${LIMINAL_BUILD_VERSION}

# Move the artifacts to svn folder & commit
mv apache{-,_}liminal-${LIMINAL_BUILD_VERSION}* ${LIMINAL_BUILD_VERSION}/
cd ${LIMINAL_BUILD_VERSION}
svn add *
svn commit -m "Add artifacts for Liminal ${LIMINAL_BUILD_VERSION}"
```

### Prepare PyPI convenience "snapshot" packages

At this point we have the artefact that we vote on, but as a convenience to developers we also want to
publish "snapshots" of the RC builds to pypi for installing via pip. To do this we need to

- Verify the artifacts that would be uploaded:

```bash
twine check dist/*
```

- Upload the package to PyPi's test environment:

```bash
twine upload -r pypitest dist/*
```

- Verify that the test package looks good by downloading it and installing it into a virtual environment. The package download link is available at:
https://test.pypi.org/project/apache-liminal/#files

- Upload the package to PyPi's production environment:
`twine upload -r pypi dist/*`

- Again, confirm that the package is available here:
https://pypi.python.org/pypi/apache-liminal


It is important to stress that this snapshot should not be named "release", and it
is not supposed to be used by and advertised to the end-users who do not read the devlist.

## Vote and verify the Apache Liminal release candidate

### Prepare Vote email on the Apache Liminal release candidate

- Use the dev/liminal-jira script to generate a list of Liminal JIRAs that were closed in the release.

- Send out a vote to the dev@liminal.apache.org mailing list:

Subject:
```
[VOTE] Liminal 1.10.2rc3
```

Body:

```
Hey all,

I have cut Liminal 1.10.2 RC3. This email is calling a vote on the release,
which will last for 72 hours. Consider this my (binding) +1.

Liminal 1.10.2 RC3 is available at:
https://dist.apache.org/repos/dist/dev/liminal/1.10.2rc3/

*apache-liminal-1.10.2rc3-source.tar.gz* is a source release that comes
with INSTALL instructions.
*apache-liminal-1.10.2rc3-bin.tar.gz* is the binary Python "sdist" release.

Public keys are available at:
https://dist.apache.org/repos/dist/release/liminal/KEYS

Only votes from PMC members are binding, but the release manager should encourage members of the community
to test the release and vote with "(non-binding)".

The test procedure for PMCs and Contributors who would like to test this RC are described in
https://github.com/apache/liminal/blob/master/dev/README.md#vote-and-verify-the-apache-liminal-release-candidate

Please note that the version number excludes the `rcX` string, so it's now
simply 1.10.2. This will allow us to rename the artifact without modifying
the artifact checksums when we actually release.


Changes since 1.10.2rc2:
*Bugs*:
[LIMINAL-3732] ...
...


*Improvements*:
[LIMINAL-3302] ...
...


*New features*:
[LIMINAL-2874] ...
...


*Doc-only Change*:
[LIMINAL-XXX] Fix Minor issues in Documentation
...

Cheers,
<your name>
```

### Verify the release candidate by PMCs (legal)

#### PMC responsibilities

The PMCs should verify the releases in order to make sure the release is following the
[Apache Legal Release Policy](http://www.apache.org/legal/release-policy.html).

At least 3 (+1) votes should be recorded in accordance to
[Votes on Package Releases](https://www.apache.org/foundation/voting.html#ReleaseVotes)

The legal checks include:

* checking if the packages are present in the right dist folder on svn
* verifying if all the sources have correct licences
* verifying if release manager signed the releases with the right key
* verifying if all the checksums are valid for the release

#### SVN check

The files should be present in the sub-folder of
[Liminal dist](https://dist.apache.org/repos/dist/dev/incubator/liminal/)

The following files should be present (9 files):

* .tar.gz + .asc + .sha512
* -.whl + .asc + .sha512

As a PMC you should be able to clone the SVN repository:

```shell script
    svn co https://dist.apache.org/repos/dist/dev/incubator/liminal
```

Or update it if you already checked it out:

```shell script
svn update .
```

#### Verify the licences

This can be done with the Apache RAT tool.

* Download the latest jar from https://creadur.apache.org/rat/download_rat.cgi (unpack the sources,
  the jar is inside)
* Unpack the .tar.gz to a folder
* Enter the folder and run the check (point to the place where you extracted the .jar)

```shell script
java -jar ../../apache-rat-0.13/apache-rat-0.13.jar -E .rat-excludes -d .
```

#### Verify the signatures

Make sure you have the key of person signed imported in your GPG. You can find the valid keys in
[KEYS](https://dist.apache.org/repos/dist/release/liminal/KEYS).

You can import the whole KEYS file:

```shell script
gpg --import KEYS
```

You can also import the keys individually from a keyserver. The below one uses Kaxil's key and
retrieves it from the default GPG keyserver
[OpenPGP.org](https://keys.openpgp.org):

```shell script
gpg --receive-keys 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B
```

You should choose to import the key when asked.

Note that by being default, the OpenPGP server tends to be overloaded often and might respond with
errors or timeouts. Many of the release managers also uploaded their keys to the
[GNUPG.net](https://keys.gnupg.net) keyserver, and you can retrieve it from there.

```shell script
gpg --keyserver keys.gnupg.net --receive-keys 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B
```

Once you have the keys, the signatures can be verified by running this:

```shell script
for i in *.asc
do
   echo "Checking $i"; gpg --verify `basename $i .sha512 `
done
```

This should produce results similar to the below. The "Good signature from ..." is indication
that the signatures are correct. Do not worry about the "not certified with a trusted signature"
warning. Most of the certificates used by release managers are self signed, that's why you get this
warning. By importing the server in the previous step and importing it via ID from
[KEYS](https://dist.apache.org/repos/dist/release/liminal/KEYS) page, you know that
this is a valid Key already.

```
Checking apache-liminal-1.10.12rc4-bin.tar.gz.asc
gpg: assuming signed data in 'apache-liminal-1.10.12rc4-bin.tar.gz'
gpg: Signature made sob, 22 sie 2020, 20:28:28 CEST
gpg:                using RSA key 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B
gpg: Good signature from "Kaxil Naik <kaxilnaik@gmail.com>" [unknown]
gpg: WARNING: This key is not certified with a trusted signature!
gpg:          There is no indication that the signature belongs to the owner.
Primary key fingerprint: 1271 7556 040E EF2E EAF1  B9C2 75FC CD0A 25FA 0E4B
Checking apache_liminal-1.10.12rc4-py2.py3-none-any.whl.asc
gpg: assuming signed data in 'apache_liminal-1.10.12rc4-py2.py3-none-any.whl'
gpg: Signature made sob, 22 sie 2020, 20:28:31 CEST
gpg:                using RSA key 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B
gpg: Good signature from "Kaxil Naik <kaxilnaik@gmail.com>" [unknown]
gpg: WARNING: This key is not certified with a trusted signature!
gpg:          There is no indication that the signature belongs to the owner.
Primary key fingerprint: 1271 7556 040E EF2E EAF1  B9C2 75FC CD0A 25FA 0E4B
Checking apache-liminal-1.10.12rc4-source.tar.gz.asc
gpg: assuming signed data in 'apache-liminal-1.10.12rc4-source.tar.gz'
gpg: Signature made sob, 22 sie 2020, 20:28:25 CEST
gpg:                using RSA key 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B
gpg: Good signature from "Kaxil Naik <kaxilnaik@gmail.com>" [unknown]
gpg: WARNING: This key is not certified with a trusted signature!
gpg:          There is no indication that the signature belongs to the owner.
Primary key fingerprint: 1271 7556 040E EF2E EAF1  B9C2 75FC CD0A 25FA 0E4B
```

#### Verify the SHA512 sum

Run this:

```shell script
for i in *.sha512
do
    echo "Checking $i"; gpg --print-md SHA512 `basename $i .sha512 ` | diff - $i
done
```

You should get output similar to:

```
Checking apache_liminal-1.10.12rc4-py3-none-any.whl.sha512
Checking apache-liminal-1.10.12rc4.tar.gz.sha512
```

### Verify if the release candidate "works" by Contributors

This can be done (and we encourage to) by any of the Contributors. In fact, it's best if the
actual users of Apache Liminal test it in their own staging/test installations. Each release candidate
is available on PyPI apart from SVN packages, so everyone should be able to install
the release candidate version of Liminal via simply (<VERSION> is 1.10.12 for example, and <X> is
release candidate number 1,2,3,....).

```shell script
pip install apache-liminal==<LIMINAL_BUID_VERSION>rc<X>
```

### Seek approval from Incubator PMC

[Post to the Incubator’s general list requesting approval from the Incubator PMC](https://lists.apache.org/thread.html/06655226ba08c16a8cb273f9b45e0b0a15ebaed0d06783fdd06a03f6@%3Cgeneral.incubator.apache.org%3E).
Should the Incubator PMC vote to approve a release, the Podling MAY make that release available to the public under these conditions:

* The release archive(s) MUST include the word "incubating" in the filename.
* The release archive(s) MUST contain a disclaimer found in DISCLAIMER file.

Releases for the Podling MUST be distributed through http://www.apache.org/dist/incubator/Podling
https://lists.apache.org/thread.html/06655226ba08c16a8cb273f9b45e0b0a15ebaed0d06783fdd06a03f6@%3Cgeneral.incubator.apache.org%3E

## Publish the final Apache Liminal release

### Summarize the voting for the Apache Liminal release

Once the vote has been passed, you will need to send a result vote to dev@liminal.apache.org:

Subject:
```
[RESULT][VOTE] Liminal 1.10.2rc3
```

Message:

```
Hello,

Apache Liminal 1.10.2 (based on RC3) has been accepted.

3 “+1” binding votes received:
....

3 "+1" non-binding votes received:

...

Vote thread:
...

I'll continue with the release process, and the release announcement will follow shortly.

Cheers,
<your name>
```


### Publish release to SVN

You need to migrate the RC artifacts that passed to this repository:
https://dist.apache.org/repos/dist/release/liminal/
(The migration should include renaming the files so that they no longer have the RC number in their filenames.)

The best way of doing this is to svn cp  between the two repos (this avoids having to upload the binaries again, and gives a clearer history in the svn commit logs):

```shell script
# First clone the repo
export RC=1.10.4rc5
export VERSION=${RC/rc?/}
svn checkout https://dist.apache.org/repos/dist/release/liminal liminal-release

# Create new folder for the release
cd liminal-release
svn mkdir ${VERSION}
cd ${VERSION}

# Move the artifacts to svn folder & commit
for f in ../../liminal-dev/$RC/*; do svn cp $f ${$(basename $f)/rc?/}; done
svn commit -m "Release Liminal ${VERSION} from ${RC}"

# Remove old release
# http://www.apache.org/legal/release-policy.html#when-to-archive
cd ..
export PREVIOUS_VERSION=1.10.1
svn rm ${PREVIOUS_VERSION}
svn commit -m "Remove old release: ${PREVIOUS_VERSION}"
```

Verify that the packages appear in [liminal](https://dist.apache.org/repos/dist/release/liminal/)

### Prepare PyPI "release" packages

At this point we release an official package:

- Build the package:

    ```shell script
    python setup.py sdist bdist_wheel`
    ```

- Verify the artifacts that would be uploaded:

    ```shell script
    twine check dist/*`
    ```

- Upload the package to PyPi's test environment:

    ```shell script
    twine upload -r pypitest dist/*
    ```

- Verify that the test package looks good by downloading it and installing it into a virtual environment.
    The package download link is available at: https://test.pypi.org/project/apache-liminal/#files

- Upload the package to PyPi's production environment:

    ```shell script
    twine upload -r pypi dist/*
    ```

- Again, confirm that the package is available here: https://pypi.python.org/pypi/apache-liminal

### Update CHANGELOG.md

- Get a diff between the last version and the current version:

    ```shell script
    $ git log 1.8.0..1.9.0 --pretty=oneline
    ```
- Update CHANGELOG.md with the details, and commit it.

### Notify developers of release

- Notify users@liminal.apache.org (cc'ing dev@liminal.apache.org and announce@apache.org) that
the artifacts have been published:

Subject:
```shell script
cat <<EOF
Liminal ${VERSION} is released
EOF
```

Body:
```shell script
cat <<EOF
Dear Liminal community,

I'm happy to announce that Liminal ${VERSION} was just released.

The source release, as well as the binary "sdist" release, are available
here:

https://dist.apache.org/repos/dist/release/liminal/${VERSION}/

We also made this version available on PyPi for convenience (`pip install apache-liminal`):

https://pypi.python.org/pypi/apache-liminal

The documentation is available on:
https://liminal.apache.org/
https://liminal.apache.org/1.10.2/
https://liminal.readthedocs.io/en/1.10.2/
https://liminal.readthedocs.io/en/stable/

Find the CHANGELOG here for more details:

https://liminal.apache.org/changelog.html#liminal-1-10-2-2019-01-19

Cheers,
<your name>
EOF
```


================================================
FILE: dev/sign.sh
================================================
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Use this to sign the tar balls generated from
# python setup.py sdist --formats=gztar
# ie. sign.sh <my_tar_ball>
# you will still be required to type in your signing key password
# or it needs to be available in your keychain

NAME="${1}"

gpg --armor --output "${NAME}.asc" --detach-sig "${NAME}"
gpg --print-md SHA512 "${NAME}" > "${NAME}.sha512"


================================================
FILE: docs/Makefile
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Minimal makefile for Sphinx documentation

# You can set these variables from the command line.
SPHINXOPTS    =
SPHINXBUILD   = sphinx-build
SOURCEDIR     = .
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Apache Liminal

Apache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,
train and deploy machine learning models in a robust and agile way.

The platform provides the abstractions and declarative capabilities for
data extraction & feature engineering followed by model training and serving.
Liminal's goal is to operationalize the machine learning process, allowing data scientists to
quickly transition from a successful experiment to an automated pipeline of model training,
validation, deployment and inference in production, freeing them from engineering and
non-functional tasks, and allowing them to focus on machine learning code and artifacts.

## Basics

Using simple YAML configuration, create your own schedule data pipelines (a sequence of tasks to
perform), application servers,  and more.

## Getting Started
A simple hello world guide for Liminal can be found [here](getting-started/hello_world.md) \
A more advanced example which demonstrates a simple data-science workflow can be found [here](getting-started/iris_classification.md)

## Apache Liminal Documentation
Full documentation of Apache Liminal can be found [here](liminal)

## High Level Architecture
High level architecture documentation can be found [here](architecture.md)


================================================
FILE: docs/architecture.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# High Level Architecture
Liminal is an end-to-end platform for data engineers & scientists, allowing them to build, train and deploy machine learning models in a robust and agile way. The platform provides the abstractions and declarative capabilities for data extraction & feature engineering followed by model training and serving. Apache Liminal's goal is to operationalise the machine learning process, allowing data scientists to quickly transition from a successful experiment to an automated pipeline of model training, validation, deployment and inference in production, freeing them from engineering and non-functional tasks, and allowing them to focus on machine learning code and artifacts.

## Motivation
The challenges involved in operationalizing machine learning models are one of the main reasons why many machine learning projects never make it to production.
The process involves automating and orchestrating multiple steps which run on heterogeneous infrastructure - different compute environments, data processing platforms, ML frameworks, notebooks, containers and monitoring tools.

There are no mature standards for this workflow, and most organizations do not have the experience to build it in-house. In the best case, dev-ds-devops teams form in order to accomplish this task together; in many cases, it's the data scientists who try to deal with this themselves without the knowledge or the inclination to become infrastructure experts.
As a result, many projects never make it through the cycle. Those who do suffer from a very long lead time from a successful experiment to an operational, refreshable, deployed and monitored model in production.

The goal of Apache Liminal is to simplify the creation and management of machine learning pipelines by data engineers & scientists. The platform provides declarative building blocks which define the workflow, orchestrate the underlying infrastructure,  take care of non functional concerns, enabling focus in business logic / algorithm code.
Some Commercial E2E solutions have started to emerge in the last few years, however, they are limited to specific parts of the workflow, such as Databricks MLFlow. Other solutions are tied to specific environments (e.g. SageMaker on AWS).

## High Level Architecture
The platform is aimed to provide data engineers & scientists with a solution for end to end flows from model training to real time inference in production. It’s architecture enables and promotes adoption of specific components in existing (non-Liminal) frameworks, as well as seamless integration with other open source projects. Liminal was created to enable scalability in ML efforts and after a thorough review of available solutions and frameworks, which did not meet our main KPIs:
- Provide an opinionated but customizable end-to-end workflow
- Abstract away the complexity of underlying infrastructure
- Support major open source tools and cloud-native infrastructure to carry out many of the steps
- Allow teams to leverage their existing investments or bring in their tools of choice into the workflow

We have found that other tech companies in the Hi-Tech ecosystem also have an interest in such a platform, hence decided to share our work with the community.
The following diagram depicts these main components and where Apache Liminal comes in:

![](nstatic/liminal_arch_001.png)

A classical data scientist workflow includes some base phases:
_Train, Deploy and Consume._

**The Train phase includes the following tasks:**

1. Fetch -  get the data needed to build a model - usually using SQL
1. Clean - make sure the data is useful for building the model
1. Prepare - split data and encode features from the data according to model needs
1. Train - Build the model and tune it
1. Evaluate - make sure the model is correct - run it on a test set, etc…
1. Validate - make sure the model is up to the standards you need

**The Deploy phase includes these tasks:**
1. Deploy - make it available for usage in production
1. Inference - Batch or Real-time - use the model to evaluate data by your offline or online by your applications
1. Consume - The actual use of the models created by applications and ETLs, usually through APIs to the batch or real-time inference that usually rely on Model and Feature stores.

Liminal provides its users a declarative composition capabilities to materialize these steps in a robust way, while exploiting existing frameworks and tools. e.g. Data science frameworks such as scikit-learn, Tensor flow, Keras and such, for running core data science algorithms; as numerous core mechanisms as data stores, processing engines, parallelism, schedulers, code deployment as well as batch and real-time inference.
Liminal allows the creation and wiring of these kinds of functional and non functional tasks while making the underlying infrastructure used by these tasks very easy to use and even abstracted away entirely. While handling the non-functional aspects as monitoring (in a standard fashion) deployment, scheduling, resource management and execution.

![](nstatic/liminal_arch_002.png)


================================================
FILE: docs/conf.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config

# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys

sys.path.insert(0, os.path.abspath('source'))
sys.path.insert(0, os.path.abspath('/'))

# -- Project information -----------------------------------------------------

project = u'Apchae Liminal'
copyright = u'Apache Liminal'
author = u'Apache Software Foundation'

# The short X.Y version
version = u''
# The full version, including alpha/beta/rc tags
release = u'0.0.1'

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'recommonmark',
    'sphinx.ext.autodoc',
]

html_logo = 'nstatic/liminal_logo.png'
# Add any paths that contain templates here, relative to this directory.
templates_path = ['ntemplates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['README.md', '**/README.md']

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#
html_theme = 'classic'

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['nstatic']

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}


# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'ApchaeLiminaldoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'ApchaeLiminal.tex', u'Apchae Liminal Documentation', u'Apache Software Foundation', 'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'apchaeliminal', u'Apchae Liminal Documentation', [author], 1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (
        master_doc,
        'ApchaeLiminal',
        u'Apchae Liminal Documentation',
        author,
        'ApchaeLiminal',
        'One line description of project.',
        'Miscellaneous',
    ),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']


def on_missing_reference(app, env, node, contnode):
    if node['reftype'] == 'any':
        return contnode
    else:
        return None


def setup(app):
    app.connect('missing-reference', on_missing_reference)


================================================
FILE: docs/getting-started/hello_world.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Getting started / ***Hello World***

This guide will allow you to set up your first Apache Liminal environment and allow you to create
some simple ML pipelines. These will be very similar to the ones you are going to build for real
production scenarios.

## Prerequisites

Python 3 (3.6 and up)

[Docker Desktop](https://www.docker.com/products/docker-desktop)

*Note: Make sure kubernetes cluster is running in docker desktop (or custom kubernetes installation
on your machine).*

## Deploying the Example

In this tutorial, we will go through setting up Liminal for the first time on your local machine.

### First, let’s build our example project:

In the dev folder, just clone the example code from liminal:


```BASH
git clone https://github.com/apache/incubator-liminal
```
***Note:*** *You just cloned the entire Liminal Project, you actually only need examples folder.*

Create a python virtual environment to isolate your runs:

```BASH
cd incubator-liminal/examples/liminal-getting-started
python3 -m venv env
```

Activate your virtual environment:

```BASH
source env/bin/activate
```

Now we are ready to install liminal:

```BASH
pip install apache-liminal
```
Let's build the images you need for the example:
```BASH
liminal build
```
##### The build will create docker images based on the liminal.yml file in the `images` section.

```BASH
liminal deploy --clean
```
The deploy command deploys a liminal server and deploys any liminal.yml files in your working
directory or any of its subdirectories to your liminal home directory.

*Note: liminal home directory is located in the path defined in LIMINAL_HOME env variable.
If the LIMINAL_HOME environemnet variable is not defined, home directory defaults to
~/liminal_home directory.*

Now lets runs liminal:
```BASH
liminal start
```
The start command spins up the liminal server containers which will run pipelines based on your
deployed liminal.yml files.
It runs the following three containers:
* liminal-postgress
* liminal-webserver
* liminal-scheduler

Once liminal server has completed starting up, you can navigate to admin UI in your browser:
[http://localhost:8080](http://localhost:8080)
By default liminal server starts Apache Airflow servers and admin UI will be that of Apache Airflow.


![](../nstatic/hello-world/airflow_main.png)

***Important:** Set off/on toggle to activate your pipeline (DAG), nothing will happen otherwise!*

You can go to graph view to see all the tasks configured in the liminal.yml file:
[http://localhost:8080/admin/airflow/graph?dag_id=example_pipeline](
http://localhost:8080/admin/airflow/graph?dag_id=example_pipeline
)

#### Now lets see what actually happened to our task:

![](../nstatic/hello-world/airflow_view_dag.png)


#### Click on “hello_world_example” and you will get this popup:

![](../nstatic/hello-world/airflow_view_log.png)


#### Click on “view log” button and you can see the log of the current task run:

![](../nstatic/hello-world/airflow_task_log.png)


## Mounted volumes
All tasks use a mounted volume as defined in the pipeline YAML:
```YAML
name: GettingStartedPipeline
volumes:
  - volume: gettingstartedvol
    claim_name: gettingstartedvol-pvc
    local:
      path: .
```
In our case the mounted volume will point to the liminal hello world example. \
The hello world task will read the **hello_world.json** file from the mounted volume and will write
the **hello_world_output.json** to it.

*Note:* Each task will internally mount the volume defined above to an internal representation,
described under the task section in the yml:

```YAML
  task:
  ...
  mounts:
     - mount: taskmount
       volume: gettingstartedvol
       path: /mnt/vol1
```


## Here are the entire list of commands, if you want to start from scratch:

```
git clone https://github.com/apache/incubator-liminal
cd examples/liminal-getting-started
python3 -m venv env
source env/bin/activate
pip uninstall apache-liminal
pip install apache-liminal
Liminal build
liminal deploy --clean
liminal start
```

## Closing up

To make sure liminal containers are stopped use:
```
liminal stop
```

To deactivate the python virtual env use:
```
deactivate
```


================================================
FILE: docs/getting-started/iris_classification.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Getting started / ***Iris Classification***

* [Setup your local environment](#Setup-your-local-environment)
* [Setup liminal](#setup-liminal)
    * [Liminal build](#Liminal-build)
    * [Liminal create](#Liminal-create)
    * [Liminal deploy](#Liminal-deploy)
    * [Liminal start](#Liminal-start)
* [Liminal YAML walkthrough](#Liminal-YAML-walkthrough)
* [Evaluate the Iris Classification model](#Evaluate-the-iris-classification-model)
* [Debugging Kubernetes Deployments](#Debugging-Kubernetes-Deployments)
* [Closing up](#Closing-up)

In this tutorial, we will guide you through setting up Apache Liminal on your local machine and run a simple machine-learning workflow, based on the classic Iris dataset classification example. \
More details in this [link](https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html).

#### Prerequisites

* [Python 3 (3.6 and up)](https://www.python.org/downloads)
* [Python Virtual Environments](https://pypi.org/project/virtualenv)
* [Docker Desktop](https://www.docker.com/products/docker-desktop)
* [Kubernetes CLI (kubectl)](https://kubernetes.io/docs/tasks/tools/install-kubectl-macos)

*Note: Make sure kubernetes cluster is running in docker desktop*

We will define the following steps and services to implement the Iris classification example: \
Train, Validate & Deploy - Training and validation execution is managed by Liminal Airflow extension. The training task trains a regression model using a public dataset. \
We then validate the model and deploy it to a model-store in mounted volume. \
Inference - online inference is done using a Python Flask service running on the local Kubernetes in docker desktop. The service exposes the `/predict` endpoint. It reads the model stored in the mounted drive and uses it to evaluate the request.
## Setup your local env environment

In the dev folder, clone the example code from liminal:


```BASH
git clone https://github.com/apache/incubator-liminal
```
***Note:*** *You just cloned the entire Liminal Project, you actually only need examples folder.*


Create a python virtual environment to isolate your runs:

```BASH
cd incubator-liminal/examples/aws-ml-app-demo
python3 -m venv env
```

Activate your virtual environment:

```BASH
source env/bin/activate
```

Now we are ready to install liminal:

```BASH
pip install apache-liminal
```

## Setup liminal
### Liminal build
The build will create docker images based on the liminal.yml file in the `images` section and will create a kubernetes local volume.

Be informed that all tasks use a mounted volume as defined in the pipeline YAML. \
In our case the mounted volume will point to the liminal Iris Classification example.
The training task trains a regression model using a public dataset. We then validate the model and deploy it to a model-store in the mounted volume.
```BASH
liminal build
```

### Liminal deploy
The deploy command deploys a liminal server and deploys any liminal.yml files in your working directory or any of its subdirectories to your liminal home directory.
```BASH
liminal deploy --clean
```

*Note: liminal home directory is located in the path defined in LIMINAL_HOME env variable.
If the LIMINAL_HOME environemnet variable is not defined, home directory defaults to
~/liminal_home directory.*

### Liminal start
The start command spins up 3 containers that load the Apache Airflow stack. Liminal's Airflow extension is responsible to execute the workflows defined in the liminal.yml file as standard Airflow DAGs.
```BASH
liminal start
```

It runs the following three containers:
* liminal-postgress
* liminal-webserver
* liminal-scheduler

Once liminal server has completed starting up, you can navigate to admin UI in your browser:
[http://localhost:8080](http://localhost:8080)


![](../nstatic/iris-classification/airflow_main.png)

***Important:** Set off/on toggle to activate your pipeline (DAG), nothing will happen otherwise!*

You can go to graph view to see all the tasks configured in the liminal.yml file:
[http://localhost:8080/admin/airflow/graph?dag_id=my_datascience_pipeline](
http://localhost:8080/admin/airflow/graph?dag_id=my_datascience_pipeline
)

#### Now lets see what actually happened to our task:
![](../nstatic/iris-classification/airflow_view_dag.png)


#### Click on “train” and you will get this popup:
![](../nstatic/iris-classification/airflow_view_log.png)


#### Click on “view log” button and you can see the log of the current task run:
![](../nstatic/iris-classification/airflow_task_log.png)

## Liminal YAML walkthrough
* [Mounted volumes](#Mounted-volumes)
* [Pipeline flow](#Pipeline-flow)

### Mounted volumes
Declaration of the mounted volume in your liminal YAML:
```YAML
name: MyDataScienceApp
owner: Bosco Albert Baracus
volumes:
  - volume: gettingstartedvol
    claim_name: gettingstartedvol-pvc
    local:
      path: .
```

### Pipeline flow
Declaration of the pipeline tasks flow in your liminal YAML:
```YAML
pipelines:
  - pipeline: my_datascience_pipeline
    ...
    schedule: 0 * 1 * *
    tasks:
      - task: train
        type: python
        description: train model
        image: myorg/mydatascienceapp
        cmd: python -u training.py train
        ...
      - task: validate
        type: python
        description: validate model and deploy
        image: myorg/mydatascienceapp
        cmd: python -u training.py validate
        ...
```

##### Each task will internally mount the volume defined above to an internal representation, described under the task section in the yml:

```YAML
pipelines:
    ...
    tasks:
      - task: train
        ...
        env:
          MOUNT_PATH: /mnt/gettingstartedvol
        mounts:
          - mount: mymount
            volume: gettingstartedvol
            path: /mnt/gettingstartedvol
```
###### We specify the `MOUNT_PATH` in which we store the trained model.

## Evaluate the iris classification model

Once the Iris Classification model trainging is completed and model is deployed (to the mounted volume), you can launch a pod of the pre-built image which contains a flask server, by applying the following Kubernetes manifest configuration:
```BASH
kubectl apply -f manifests/aws-ml-app-demo.yaml
```

Alternatively, create a Kubernetes pod from stdin:
```YAML
cat <<EOF | kubectl apply -f -
---
apiVersion: v1
kind: Pod
metadata:
  name: aws-ml-app-demo
spec:
  volumes:
    - name: task-pv-storage
      persistentVolumeClaim:
        claimName: gettingstartedvol-pvc
  containers:
    - name: task-pv-container
      imagePullPolicy: Never
      image: myorg/mydatascienceapp
      lifecycle:
        postStart:
          exec:
            command: ["/bin/bash", "-c", "apt update && apt install curl -y"]
      ports:
        - containerPort: 80
          name: "http-server"
      volumeMounts:
        - mountPath: "/mnt/gettingstartedvol"
          name: task-pv-storage
EOF
```

Check that the service is running:
```BASH
kubectl get pods --namespace=default
```

Check that the service is up:
```BASH
kubectl exec -it --namespace=default aws-ml-app-demo -- /bin/bash -c "curl localhost/healthcheck"
```

Check the prediction:
```BASH
kubectl exec -it --namespace=default aws-ml-app-demo -- /bin/bash -c "curl -X POST -d '{\"petal_width\": \"2.1\"}' localhost/predict"
```

## Debugging Kubernetes Deployments
kubectl get pods will help you check your pod status:
```BASH
kubectl get pods --namespace=default
```
kubectl logs will help you check your pods log:
```BASH
kubectl logs --namespace=default aws-ml-app-demo
```
kubectl exec to get a shell to a running container:
```BASH
kubectl exec --namespace=default aws-ml-app-demo -- bash
```
Then you can check the mounted volume `df -h` and to verify the result of the model.


## Here are the entire list of commands, if you want to start from scratch:

```
git clone https://github.com/apache/incubator-liminal
cd examples/aws-ml-app-demo
python3 -m venv env
source env/bin/activate
rm -rf ~/liminal_home
pip uninstall apache-liminal
pip install apache-liminal
Liminal build
Liminal create
liminal deploy --clean
liminal start
```

## Closing up

To make sure liminal containers are stopped use:
```
liminal stop
```

To deactivate the python virtual env use:
```
deactivate
```

To terminate the kubernetes pod:
```
kubectl delete pod --namespace=default aws-ml-app-demo
```


================================================
FILE: docs/getting-started/spark_app_demo.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Getting started / ***Spark application***

* [Setup your local environment](#Setup-your-local-environment)
* [Spark on K8S](#Spark-On-K8S)
    * [Setup liminal](#setup-liminal)
    * [Liminal YAML walkthrough](#Liminal-YAML-walkthrough)
    * [Evaluate the Iris Classification model](#Evaluate-the-iris-classification-model)
    * [Debugging Kubernetes Deployments](#Debugging-Kubernetes-Deployments)

* [Closing up](#Closing-up)

In this tutorial, we will guide you through setting up Apache Liminal on your local machine and run
a simple machine-learning workflow, based on the classic Iris dataset classification example
including a feature engineering with Spark engine. \
More details in
this [link](https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html).

#### Prerequisites

* [Python 3 (3.6 and up)](https://www.python.org/downloads)
* [Python Virtual Environments](https://pypi.org/project/virtualenv)
* [Docker Desktop](https://www.docker.com/products/docker-desktop)
* [Kubernetes CLI (kubectl)](https://kubernetes.io/docs/tasks/tools/install-kubectl-macos)

*Note: Make sure kubernetes cluster is running in docker desktop*

## Setup your local env environment

In the dev folder, clone the example code from liminal:

```BASH
git clone https://github.com/apache/incubator-liminal
```

***Note:*** *You just cloned the entire Liminal Project, you actually only need examples folder.*

Create a python virtual environment to isolate your runs:

```BASH
cd incubator-liminal/examples/spark-app-demo/
python3 -m venv env
```

Activate your virtual environment:

```BASH
source env/bin/activate
```

Now we are ready to install liminal:

```BASH
pip install apache-liminal
```

## Spark On K8S

We will define the following steps and services to implement the Iris classification example
including a simple feature engineering with Apache Spark: \
Clean data, Train, Validate & Deploy - Cleaning the input data set and prepare it for training and
validation execution is managed by Liminal Airflow extension. The training task trains a regression
model using a public dataset. \
We then validate the model and deploy it to a model-store in mounted volume. \
Inference - online inference is done using a Python Flask service running on the local Kubernetes in
docker desktop. The service exposes the `/predict` endpoint. It reads the model stored in the
mounted drive and uses it to evaluate the request.

### Setup liminal

```BASH
cd incubator-liminal/examples/spark-app-demo/k8s
```

#### Liminal build
The build will create docker images based on the liminal.yml file in the `images` section and will create a kubernetes local volume.

Be informed that all tasks use a mounted volume as defined in the pipeline YAML. \
In our case the mounted volume will point to the liminal Iris Classification example.
The training task trains a regression model using a public dataset. We then validate the model and deploy it to a model-store in the mounted volume.
```BASH
liminal build
```

#### Liminal deploy

The deploy command deploys a liminal server and deploys any liminal.yml files in your working
directory or any of its subdirectories to your liminal home directory.

```BASH
liminal deploy --clean
```

*Note: liminal home directory is located in the path defined in LIMINAL_HOME env variable. If the
LIMINAL_HOME environemnet variable is not defined, home directory defaults to
~/liminal_home directory.*

#### Liminal start

The start command spins up 3 containers that load the Apache Airflow stack. Liminal's Airflow
extension is responsible to execute the workflows defined in the liminal.yml file as standard
Airflow DAGs.

```BASH
liminal start
```

You can go to graph view to see all the tasks configured in the liminal.yml file:
[http://localhost:8080/admin/airflow/graph?dag_id=my_first_spark_pipeline](
http://localhost:8080/admin/airflow/graph?dag_id=my_first_spark_pipeline
)

You should see the following dag:

![](../nstatic/spark-demo-app/k8s_dag.png)

### Liminal YAML walkthrough

* [Local archetype](#Local-archetype)
* [Pipeline flow](#Pipeline-flow)

#### Local archetype

A superliminal for an easy local development

```YAML
name: InfraSpark
owner: Bosco Albert Baracus
type: super
executors:
  - executor: k8s
    type: kubernetes
variables:
  output_root_dir: /mnt/gettingstartedvol
  input_root_dir: ''
images:
  - image: my_spark_image
    type: spark
    source: .
    no_cache: True
task_defaults:
  spark:
    executor: k8s
    executors: 2
    application_source: '{{application}}'
    mounts:
      - mount: mymount
        volume: gettingstartedvol
        path: /mnt/gettingstartedvol
```

###### We specify the `MOUNT_PATH` in which we store the trained model.

You can read more about `superliminal` `variables` and `defaults`
in [advanced.liminal.yml](../liminal/advanced.liminal.yml.md)

#### Pipeline flow

Declaration of the pipeline tasks flow in your liminal YAML:

```YAML
name: MyFirstLiminalSparkApp
super: InfraSpark
owner: Bosco Albert Baracus
variables:
  output_path: '{{output_root_dir}}/my_first_liminal_spark_app_outputs/'
  application: data_cleanup.py
task_defaults:
  python:
    mounts:
      - mount: mymount
        volume: gettingstartedvol
        path: /mnt/gettingstartedvol
pipelines:
  - pipeline: my_first_spark_pipeline
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    tasks:
      - task: data_preprocessing
        type: spark
        description: prepare the data for training
        application_arguments:
          - '{{input_root_dir}}data/iris.csv'
          - '{{output_path}}'
      - task: train
        type: python
        description: train model
        image: myorg/mydatascienceapp
        cmd: python -u training.py train '{{output_path}}'
        env:
          MOUNT_PATH: /mnt/gettingstartedvol
        ...
```

### Evaluate the iris classification model

Once the Iris Classification model trainging is completed and model is deployed (to the mounted
volume), you can launch a pod of the pre-built image which contains a flask server, by applying the
following Kubernetes manifest configuration:

```BASH
kubectl apply -f manifests/spark-app-demo.yaml
```

Alternatively, create a Kubernetes pod from stdin:

```YAML
cat <<EOF | kubectl apply -f -
---
apiVersion: v1
kind: Pod
metadata:
  name: spark-app-demo
spec:
  volumes:
    - name: task-pv-storage
      persistentVolumeClaim:
        claimName: gettingstartedvol-pvc
  containers:
    - name: task-pv-container
      imagePullPolicy: Never
      image: myorg/mydatascienceapp
      lifecycle:
        postStart:
          exec:
            command: [ "/bin/bash", "-c", "apt update && apt install curl -y" ]
      ports:
        - containerPort: 80
          name: "http-server"
      volumeMounts:
        - mountPath: "/mnt/gettingstartedvol"
          name: task-pv-storage
EOF
```

Check that the service is running:

```BASH
kubectl get pods --namespace=default
```

Check that the service is up:

```BASH
kubectl exec -it --namespace=default spark-app-demo -- /bin/bash -c "curl localhost/healthcheck"
```

Check the prediction:

```BASH
kubectl exec -it --namespace=default spark-app-demo -- /bin/bash -c "curl -X POST -d '{\"petal_width\": [1.1,1.1,1.1,1.1]}' localhost/predict"
```

## Debugging Kubernetes Deployments

kubectl get pods will help you check your pod status:

```BASH
kubectl get pods --namespace=default
```

kubectl logs will help you check your pods log:

```BASH
kubectl logs --namespace=default spark-app-demo
```

kubectl exec to get a shell to a running container:

```BASH
kubectl exec --namespace=default spark-app-demo -- bash
```

Then you can check the mounted volume `df -h` and to verify the result of the model.

You can go to graph view to see all the tasks configured in the liminal.yml file:
[http://localhost:8080/admin/airflow/graph?dag_id=my_first_pipeline](
http://localhost:8080/admin/airflow/graph?dag_id=my_first_pipeline

## Here are the entire list of commands, if you want to start from scratch:

```
git clone https://github.com/apache/incubator-liminal
cd examples/spark-app-demo/k8s
# cd examples/spark-app-demo/emr
python3 -m venv env
source env/bin/activate
rm -rf ~/liminal_home
pip uninstall apache-liminal
pip install apache-liminal
Liminal build
Liminal create
liminal deploy --clean
liminal start
```

## Closing up

To make sure liminal containers are stopped use:

```
liminal stop
```

To deactivate the python virtual env use:

```
deactivate
```

To terminate the kubernetes pod:

```
kubectl delete pod --namespace=default spark-app-demo
```


================================================
FILE: docs/index.rst
================================================
.. Apchae Liminal documentation master file, created by
   sphinx-quickstart on Sun Nov 15 08:45:27 2020.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..

Apache Liminal
==============

Apache Liminal is a data systems orchestration platform. Liminal enables data scientists and data
engineers to define their data systems and flow using configuration. From feature engineering to
production monitoring - and the framework takes care of the infra behind the scenes and seamlessly
integrates with the infrastructure behind the scenes.

.. toctree::
   :maxdepth: 1

   getting_started
   liminal/index
   architecture

Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


================================================
FILE: docs/liminal/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Apache Liminal Documentation

## Overview

Apache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,
train and deploy machine learning models in a robust and agile way.

The platform provides the abstractions and declarative capabilities for
data extraction & feature engineering followed by model training and serving.
Liminal's goal is to operationalize the machine learning process, allowing data scientists to
quickly transition from a successful experiment to an automated pipeline of model training,
validation, deployment and inference in production, freeing them from engineering and
non-functional tasks, and allowing them to focus on machine learning code and artifacts.

## Chapters

1. [liminal.yml](liminal.yml.md)
2. [Images](images)
3. [Pipelines](pipelines.md)
4. [Tasks](tasks)
5. [Services](services.md)
6. [Monitoring](monitoring.md)
7. [Metrics Backends](metrics_backends)
8. [Alerts Backends](alerts_backends)
9. [Advanced liminal.yml](advanced.liminal.yml.md)
10. [Executors](executors)


================================================
FILE: docs/liminal/advanced.liminal.yml.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Advanced liminal.yml

In this section you will learn about advanced features of liminal.yml

## Variables

Much like in programming languages, you can define variables for re-use across your liminal.yml
file.

```yaml
variables:
  myvar1: myvalue1
  myvar2: myvalue2
```

In the `variables` section of your liminal.yml you can define your variables as key value pairs

## Placeholders

You can use placeholders of format `{{myplaceholder}}` in most any string value in your liminal.yml
Make sure that any string that includes placeholders is surrounded by single or double quotes.

For example:

```yaml
variables:
  image_name: myorg/myrepo:myapp
images:
  - image: '{{image_name}}'
    type: python
    source: .
pipelines:
  - pipeline: my_pipeline
    tasks:
      - task: my_python_task
        type: python
        image: "{{image_name}}"
        cmd: python -u my_module.py
```

### Placeholders rendering

The process of rendering placeholders checks for values in several places, by order.

#### Pipeline placeholder rendering

When running pipelines placeholders will be rendered by replacing values from sources in the
following order:

1. Current [DAG run conf](https://airflow.apache.org/docs/apache-airflow/stable/dag-run.html)
   (Airflow).
2. liminal.yml [variables](#variables) section.
3. [Airflow variables](https://airflow.apache.org/docs/apache-airflow/stable/howto/variable.html)
   (Airflow).
4. [Airflow macros](https://airflow.apache.org/docs/apache-airflow/stable/macros-ref.html) (Airflow)
   . For example: `"{{yesterday_ds}}"`. For more information see:
5.

Airflow [Jinja Templating](https://airflow.apache.org/docs/apache-airflow/stable/concepts.html#jinja-templating)
(Airflow).

#### Build placeholder rendering

When using `liminal build` placeholders will be rendered by replacing values from sources in the
following order:

1. Environment variables.
2. liminal.yml [variables](#variables) section.

## Task variables

In addition to the variables section you can also set specific variables for specific `task`s using
the `variables`
attribute of that task. For example:

```yaml
  - task: my_task
    type: python
    cmd: python -u my_module.py
    variables:
      myvar1: myvalue1
      myvar2: myvalue2
```

You can also pass a reference to a variable map (dictionary) set in your variables section:

```yaml
variables:
  my_variable_map:
    myvar1: myvalue1
    myvar2: myvalue2
  my_variable_map2:
    myvar1: myvalue_a
    myvar2: myvalue_x
pipelines:
  - pipeline: my_pipeline
    tasks:
      - task: my_task1
        type: python
        cmd: python -u my_module.py
        variables: my_variable_map1
      - task: my_task2
        type: python
        cmd: python -u my_module.py
        variables: my_variable_map2
```

## Executors

For fully detailed information on executors see: [executors](executors).

Each `task` in your pipelines has a reference to an executor from the `executors` section of your
liminal.yml file. If not specified, the appropriate default executor for that task type is used.

```yaml
executors:
  - executor: my_kubernetes_executor
    type: kubernetes
    resources:
      request_memory: 128Mi
pipelines:
  pipeline: my_pipeline
  tasks:
    - task: my_python_task
      type: python
      image: myorg/myrepo:mypythonapp
      executor: my_kubernetes_executor
      cmd: python -u my_module.py
```

In the example above we define an `executor` of type `kubernetes` with custom resources
configuration.

`executors` is a section in the root of your liminal.yml file and is a list of `executor`s defined
by the following attributes:

### executor attributes

`executor`: name of your executor.

`type`: type of the executor. The current available image types are: `kubernetes` and `emr`.

Different executor types support their own additional configuration.

## Task Defaults

`task_defaults` is a section in the root of your liminal.yml file in which default attributes can be
set for each `task`
type.

```yaml
task_defaults:
  python:
    executor: my_kubernetes_executor
    env_vars:
      env: {{env}}
      foo: bar
```

In the example above we set default attributes for any `python` task in any pipeline in the liminal
system defined by our liminal.yml file. Each `python` task that does not set these attributes will
default to the setting in `task_defaults`.

If the same attribute is defined in both `task_defaults` and in the `task`, definitions from the
`task` take precedence. If a map (dictionary) of values (for example `env_vars`) is defined in both
`task_defaults` the two maps will be merged, with definitions in the `task` taking precedence in
case key is defined in both maps.

## Pipeline Defaults

`pipeline_defaults` is a section in the root of your liminal.yml file in which default attributes
can be set for each `pipeline` in the liminal system defined in your liminal.yml file.

```yaml
pipeline_defaults:
  start_date: 2021-01-01
  timeout_minutes: 30
```

In the example above we set default attributes for any `pipeline` defined in our liminal.yml file.
Each `pipeline` that does not set these attributes will default to the setting in
`pipeline_defaults`.

If the same attribute is defined in both `pipeline_defaults` and in the `pipeline`, definitions from
the `pipeline` take precedence.

If `tasks` section is defined in `pipeline_defaults` each pipeline defined in our liminal.yml file
will have the tasks defined in `pipeline_defaults`.

A special `task` type `pipeline` may be used in `pipeline_defaults` `tasks` section. This task type
is interpreted as "
tasks defined in pipeline go here". This allows flexibility of defining common tasks to be run
before and after the tasks of each pipeline defined in our liminal.yml file. For example:

```yaml
pipeline_defaults:
  before_tasks:
    - task: my_common_setup_task
      type: python
      image: myorg/myrepo:mypythonapp
      cmd: python -u my_setup_module.py
  after_tasks:
    - task: my_common_teardown_task1
      type: python
      image: myorg/myrepo:mypythonapp
      cmd: python -u my_teardown_module1.py
    - task: my_common_teardown_task2
      type: python
      image: myorg/myrepo:mypythonapp
      cmd: python -u my_teardown_module2.py
```

In the example above we set a list of `tasks` in `pipeline_defaults` which leads to each pipeline
defined in our liminal.yml file will have `my_common_setup_task` run before its tasks and
`my_common_teardown_task1` and `my_common_teardown_task2` after its tasks.

## Inheritence

Each liminal.yml defines a subliminal layer of a liminal system. A subliminal layer can inherit
attributes of a superliminal layer by specifying `super: name_of_super` in the root of the yml.

```yaml
name: my_system
super: my_super
```

A super is found by this name if a liminal.yml file in your environment exists with that name. A
superliminal layer liminal.yml file needs to define its `type` as `super`:

```yaml
name: my_super
type: super
```

If not specified, the `type` of a liminal.yml file defaults to `sub` (subliminal).

A superliminal can also inherit from another superliminal by defining its own `super` attribute.

```yaml
name: my_super
type: super
super: my_other_super
```

A liminal system can have 1 subliminal layer but many superliminal layers using inheritence. This
allows us to chain common behaviors of our systems into several superliminal layers.

If no `super` is defined for a liminal.yml file then it defaults to having the
[base](https://github.com/incubator-liminal/liminal/core/config/defaults/base/liminal.yml)
be its super.

### superliminal attribtues

A superliminal layer can define the following attributes:

```
variables
executors
monitoring
task_defaults
pipeline_defaults
```

If the same attribute section is defined in both a superliminal and a lower layer of the system they
will be merged, with key collisions favoring the lower level layer.


================================================
FILE: docs/liminal/executors/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Executors

Each `task` in your pipelines has a reference to an executor from the `executors` section of your
liminal.yml file. If not specified, the appropriate default executor for that task type is used.

```yaml
executors:
  - executor: my_kubernetes_executor
    type: kubernetes
    resources:
      request_memory: 128Mi
pipelines:
  pipeline: my_pipeline
  tasks:
    - task: my_python_task
      type: python
      image: myorg/myrepo:mypythonapp
      executor: my_kubernetes_executor
      cmd: python -u my_module.py
```

In the example above we define an `executor` of type `kubernetes` with custom resources
configuration.

`executors` is a section in the root of your liminal.yml file and is a list of `executor`s defined
by the following attributes:

## executor attributes

`executor`: name of your executor.

`type`: type of the executor. The current available image types are: `kubernetes` and `emr`.

Different executor types support their own additional configuration.

## task types

1. [kubernetes](kubernetes.md)
2. [emr](emr.md)


================================================
FILE: docs/liminal/executors/emr.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# emr executor

The `emr` executor allows you to run tasks on AWS EMR cluster.

```yaml
  - executor: my_emr_cluster
    type: emr
    cluser_name: liminal-cluster
```

## attributes

`cluster_name`: the name of the cluster uses by the executor

OR

`cluster_id`: the unique identifier of the cluster used by the executor.

`aws_conn_id`: a reference to the emr connection. default: `aws_default`

`cluster_states`: the cluster state filters to apply when searching for an existing cluster.
default: `['RUNNING', 'WAITING']`

`properties`: any attribute of [aws-resource-emr-step-properties](
https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-emr-step.html#aws-resource-emr-step-properties)
(Optional)

## supported task types
- [spark](../tasks/spark.md)
- [sql](../tasks/sql.md)


================================================
FILE: docs/liminal/executors/index.rst
================================================
.. Apchae Liminal documentation master file, created by
   sphinx-quickstart on Sun Nov 15 08:45:27 2020.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..

Executors
=========

Each ``task`` in your pipelines has a reference to an executor from the ``executors`` section of
your liminal.yml file. If not specified, the appropriate default executor for that task type is
used.

.. code-block:: yaml

   executors:
     - executor: my_kubernetes_executor
       type: kubernetes
       resources:
         request_memory: 128Mi
   pipelines:
     pipeline: my_pipeline
     tasks:
       - task: my_python_task
         type: python
         image: myorg/myrepo:mypythonapp
         executor: my_kubernetes_executor
         cmd: python -u my_module.py

..

In the example above we define an ``executor`` of type ``kubernetes`` with custom resources
configuration.

``executors`` is a section in the root of your liminal.yml file and is a list of ``executor`` s
defined by the following attributes:

executor attributes
'''''''''''''''''''

``executor``: name of your executor.

``type``: type of the executor. The current available image types are: ``kubernetes`` and ``emr``.

Different executor types support their own additional configuration.

executor types
''''''''''''''

.. toctree::
   :maxdepth: 1

   kubernetes


================================================
FILE: docs/liminal/executors/kubernetes.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# kubernetes executor

The `kubernetes` executor allows you to run tasks on kubernetes.

```yaml
  - executor: my_kubernetes_executor
    type: kubernetes
```

## attributes

If running pipelines on Airflow, any attribute of
[KubernetesPodOperator](https://airflow.apache.org/docs/apache-airflow/1.10.12/_api/airflow/contrib/operators/kubernetes_pod_operator/index.html)
can be set as an attribute of the executor.

Note that some of these attributes are set in the `task`:
```
image
cmd
mounts
name
env_vars
```
For example, see: [python task](../tasks/python.md)


================================================
FILE: docs/liminal/extensibility.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Extensibility

The extensibility designed to have an easy way for the users to add their tasks, executors, image
builders and extend the library so that it fits the level of abstraction that suits the user
environment.

## Location

Tasks folder: `{LIMINAL_HOME}/plugins/tasks`

Executors folder: `{LIMINAL_HOME}/plugins/executors`

Image builders folder: `{LIMINAL_HOME}/plugins/images`

## Example

### Prerequisites

Apache Liminal

### Guide

Check out the examples for each one of the extensible item
in [examples/extensibility](../../examples/extensibility)

Copy the extensible items to the plugin location:

```shell
cp -r ../../examples/extensibility/executors/* $LIMINAL_HOME/liminal/plugins/executors/
```

```shell
cp -r ../../examples/extensibility/tasks/* $LIMINAL_HOME/liminal/plugins/tasks/
```

```shell
cp -r ../../examples/extensibility/images/* $LIMINAL_HOME/liminal/plugins/images/
```

```shell
liminal build .
liminal deploy --clean
liminal start
```


================================================
FILE: docs/liminal/images/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Images

In the `images` section you can configure how to pack your code into docker images. This can be
achieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:

```yaml
images:
  - image: myorg/myrepo:mypythonapp
    type: python
    source: .
  - image: myorg/myrepo:myserver
    type: python_server
    source: path/to/my/server/code
    endpoints:
      - endpoint: /myendpoint
        module: my_module
```

`images` is a section in the root lof your liminal.yml file and is a list of `image`s, defined
by the following attributes:

## image attributes

`image`: name of your image, usually will be in the form of `user/repository:tag` common in docker
image repositories such as Docker Hub, but for local development this can be any string.

`type`: type of the image. Usually this will match your coding language, or your coding langauge
followed by an `_` and a specific style of application (such as a server).

`source`: location of source files to include in the image, this can be a relative path within your
project, or even `.` which means the entire project should be packaged in the image.

`build_cmd`: certain languages require to be built or compiled, here you can provide your build
command to be executed in order to build your code.

Other image types might require additional configuration, for example, servers require `endpoints`
to be configured.

## image types

1. [python](python.md)
2. [python server](python_server.md)


================================================
FILE: docs/liminal/images/index.rst
================================================
.. Apchae Liminal documentation master file, created by
   sphinx-quickstart on Sun Nov 15 08:45:27 2020.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..

Images
======

In the ``images`` section you can configure how to pack your code into docker images. This can be
achieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:

.. code-block:: yaml

   images:
     - image: myorg/myrepo:mypythonapp
       type: python
       source: .
     - image: myorg/myrepo:myserver
       type: python_server
       source: path/to/my/server/code
       endpoints:
         - endpoint: /myendpoint
           module: my_module
..

``images`` is a section in the root lof your liminal.yml file and is a list of
``image`` s, defined by the following attributes:

image attributes
''''''''''''''''

``image``: name of your image, usually will be in the form of ``user/repository:tag`` common in
docker image repositories such as Docker Hub, but for local development this can be any string.

``type``: type of the image. Usually this will match your coding language, or your coding langauge
followed by an ``_`` and a specific style of application (such as a server).

``source``: location of source files to include in the image, this can be a relative path within
your project, or even ``.`` which means the entire project should be packaged in the image.

``build_cmd``: certain languages require to be built or compiled, here you can provide your build
command to be executed in order to build your code.

Other image types might require additional configuration, for example, servers require ``endpoints``
to be configured.

image types
'''''''''''

.. toctree::
   :maxdepth: 1

   python
   python_server


================================================
FILE: docs/liminal/images/python.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# python image

The `python` image builder packages your python code as a docker image.

```yaml
  - image: myorg/myrepo:mypythonapp
    type: python
    source: relative/path/to/source
```

## attributes

`image`: name of your image.

`source`: location of source files to include in the image, this can be a relative path within your
project, or even `.` which means the entire project should be packaged in the image.


================================================
FILE: docs/liminal/images/python_server.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# python_server image

The `python_server` image builder packages your python code as a docker image which starts a web
server to serve your code via HTTP calls.

```yaml
  - image: myorg/myrepo:mypythonserver
    type: python_server
    source: relative/path/to/source
    endpoints:
      - endpoint: /myendpoint
        module: my_module
        function: my_function
      - endpoint: /myotherendpoint
        module: my_module2
        function: my_function2
```

## attributes

`image`: name of your image.

`source`: location of source files to include in the image, this can be a relative path within your
project, or even `.` which means the entire project should be packaged in the image.

`endpoint`: list of `endpoint`s to expose in the server.

## endpoint attributes

`endpoint`: path to your endpoint in the server.

`module`: module containing your user code.

`function`: name of the function within the module that should be called when handling this
endpoint. the function should expect a dictionary as an input (or None if no input) and should
return a string to return in the response to the HTTP call.


================================================
FILE: docs/liminal/index.rst
================================================
.. Apchae Liminal documentation master file, created by
   sphinx-quickstart on Sun Nov 15 08:45:27 2020.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..

Apache Liminal Documentation
============================

Chapters:

.. toctree::
   :maxdepth: 1

   liminal.yml
   images/index.rst
   pipelines
   tasks/index.rst
   services
   monitoring
   metrics_backends/index.rst
   advanced.liminal.yml
   executors/index.rst


================================================
FILE: docs/liminal/kubernetes/secret_util.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Secret Utils

Each `task` in your pipelines has a reference to an secret from the `secrets` section of your
liminal.yml file.

```yaml
---
name: k8s_secret_example
secrets:
  - secret: aws
    local_path_file: "~/.aws/credentials"
executors:
  - executor: k8s
    type: kubernetes
variables:
  AWS_CONFIG_FILE: /mnt/credentials
task_defaults:
  python:
    executor: k8s
    image: amazon/aws-cli:2.7.23
    executors: 2
    env_vars:
      AWS_CONFIG_FILE: "/secret/credentials"
    secrets:
      - secret: aws
        remote_path: "/secret"
pipelines:
  - pipeline: k8s_secret_example
    owner: Bosco Albert Baracus
    start_date: 1970-01-01
    timeout_minutes: 10
    schedule: 0 * 1 * *
    tasks:
      - task: my_python_task
        type: python
        cmd: aws s3 ls
```

That example manifest defines a Secret Opaque for AWS credentials used. The values are Base64 strings in the manifest; however, when you use the Secret with a Pod then the kubelet provides the decoded data to the Pod and its containers.

In order to make use of the AWS credentials we define an environment variable `AWS_CONFIG_FILE` to authenticate our requests.

For example, the files generated by the AWS CLI for a default profile configured with aws configure looks similar to the following.

`~/.aws/credentials`
```
[default]
aws_access_key_id=<aws_access_key_id>
aws_secret_access_key=<aws_secret_access_key>
region = us-east-1
aws_session_token=<aws_session_token>
```


================================================
FILE: docs/liminal/liminal.yml.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

## liminal.yml

Definition of your own liminal system is done via yml configuration file named liminal.yml in
your project. This yml is the one place in which you define all characteristics and behavior of your
liminal system.
In this section we'll go over the anatomy of the liminal.yml file.

### root attributes

At the root level of your limimal.yml you can define the most basic root characteristics of your
liminal system such as name and owner:

```yaml
name: MyLiminalSystem
owner: Bosco Albert Baracus
```

### images

For fully detailed information on pipelines see: [images](images).

In the `images` section you can configure how to pack your code into docker images. This can be
achieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:

```yaml
images:
  - image: myorg/myrepo:mypythonapp
    type: python
    source: .
  - image: myorg/myrepo:myserver
    type: python_server
    source: path/to/my/server/code
    endpoints:
      - endpoint: /myendpoint
        module: my_module
        function: my_function
```

`images` is a section in the root lof your liminal.yml file and is a list of `image`s, defined
by the following attributes:

#### image attributes

`image`: name of your image, usually will be in the form of `user/repository:tag` common in docker
image repositories such as Docker Hub, but for local development this can be any string.

`type`: type of the image. Usually this will match your coding language, or your coding langauge
followed by an `_` and a specific style of application (such as a server).
The current available image types are: `python`.

`source`: location of source files to include in the image, this can be a relative path within your
project, or even `.` which means the entire project should be packaged in the image.

`build_cmd`: certain languages require to be built or compiled, here you can provide your build
command to be executed in order to build your code.

Other image types might require additional configuration, for example, servers require `endpoints`
to be configured.

### pipelines

For fully detailed information on pipelines see: [pipelines](pipelines.md).

In the `pipelines` section you can configure scheduled/manually run pipelines of tasks to be
executed sequentially:

```yaml
pipelines:
  - pipeline: my_data_pipeline
    timeout_minutes: 30
    start_date: 2020-03-01
    schedule: 0 10 * * *
    tasks:
      - task: my_sql_task
        type: sql
        query: "SELECT * FROM
        {{my_database_name}}.{{my_table_name}}
        WHERE event_date_prt >=
              '{{yesterday_ds}}'"
              AND cms_platform = 'xsite'
        output_table: my_db.my_out_table
        output_path: s3://my_bky/{{env}}/mydir
      - task: my_python_task
        type: python
        image: myorg/myrepo:pythonapp
        cmd: python my_python_app.py
        env_vars:
          env: {{env}}
          fizz: buzz
      - task: my_python_task
        type: python
        image: myorg/myrepo:mypythonapp
        cmd: python -u my_module.py
        env_vars:
          env: {{env}}
          fizz: buzz
```

`pipelines` is a section in the root lof your liminal.yml file and is a list of `pipeline`s defined
by the following attributes:

#### pipeline attributes

`pipeline`: name of your pipeline (must be unique per liminal server).

`timeout_minutes`: maximum allowed pipeline run time in minutes, if run exceeds this time, pipeline
and all running tasks will fail.

`start_date`: start date for the pipeline.

`schedule`: to be configured if the pipeline should run on a schedule. Format is
[cron expression](https://en.wikipedia.org/wiki/Cron#CRON_expression).

`tasks`: list of `task`s, defined by the following attributes:

##### task attributes

For fully detailed information on tasks see: [tasks](tasks).

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`type`: type of the task. Examples of available task types are: `python`.
and more..

Different task types require their own additional configuration. For example, `python` task requires
`image` to be configured.

### services

For fully detailed information on services see: [services](services.md).

In the `services` section you can configure constantly running applications such as
servers.

```yaml
services:
  - service: my_server
    image: myorg/myrepo:myserver
```

`services` is a section in the root lof your liminal.yml file and is a list of `service`s, defined
by the following attributes:

#### service attributes

`service`: name of your service.

`image`: the service's docker image.

### monitoring

For fully detailed information on monitoring see: [monitoring](monitoring.md).

In the `monitoring` section you can configure monitoring for your pipelines and services.

```yaml
monitoring:
  metrics_backends:
    - metrics_backend: cloudwatch_metrics
      type: aws_cloudwatch
      namespace: DataPipeline
      AWS_REGION_NAME: us-east-1
  alerts_backends:
    - alerts_backend: cloudwatch_alerts
      type: aws_cloudwatch
      metrics_backend: cloudwatch_metrics
      ok_actions: ['arn:aws:sns:...']
      alarm_actions: ['arn:aws:sns:...']
```

`monitoring` is a section in the root lof your liminal.yml file and is a list of `metrics_backend`s and
a  list of `alerts_backend`s.

`metrics_backend`s are where metrics for your pipelines are automatically sent.

`alerts_backends`s automatically register alerts based on `metrics_backend` they are paired with.

#### metrics_backend attributes

`metrics_backend`: name of your metrics backend

`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.

Different metrics backend types require their own additional configuration. For example,
`aws_cloudwatch` metrics backend requires `namespace` to be configured.

#### alerts_backend attributes

`alerts_backend`: name of your alerts backend

`type`: type of the alerts backend. The current available alerts backends are: `aws_cloudwatch`.

`metrics_backend`: name of the metrics backends to register alerts for.

Different alerts backend types require their own additional configuration. For example,
`aws_cloudwatch` alerts backend requires `alarm_actions` to be configured.

### advanced topics

For documentation of more advanced features of liminal.yml see:
[advanced liminal.yml](advanced.liminal.yml.md)


================================================
FILE: docs/liminal/metrics_backends/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Metrics Backends

`metrics_backend` is the definition of a metrics backend to which all automatically
generated metrics from your pipeline are sent to and is part of the `metrics_backends` list in the
`monitoring` section of your liminal.yml

```yaml
  - metrics_backend: cloudwatch_metrics
    type: aws_cloudwatch
    namespace: DataPipeline
    AWS_REGION_NAME: us-east-1
```

A `metrics_backend` is defined by the following attribtues:

## metrics_backend attributes

`metrics_backend`: name of your metrics backend

`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.

Different metrics backend types require their own additional configuration. For example,
`aws_cloudwatch` metrics backend requires `namespace` to be configured.

## metrics_backend types

1. [aws cloudwatch](aws_cloudwatch.md)


================================================
FILE: docs/liminal/metrics_backends/aws_cloudwatch.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# aws_cloudwatch metrics backend

The `aws_cloudwatch` metrics backend allows you to send all automatically generated metrics from
your pipelines to AWS CloudWatch.

```yaml
  - metrics_backend: cloudwatch_metrics
    type: aws_cloudwatch
    namespace: DataPipeline
    AWS_REGION_NAME: us-east-1
```

## attributes

`metrics_backend`: name of your metrics backend.

`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.

`namespace`: target namespace on AWS CloudWatch.

`AWS_REGION_NAME`: target AWS region to report metrics to.

`AWS_ACCESS_KEY_ID`: AWS access key id (optional).

`AWS_SECRET_ACCESS_KEY`: AWS secret access key (optional).


================================================
FILE: docs/liminal/metrics_backends/index.rst
================================================
.. Apchae Liminal documentation master file, created by
   sphinx-quickstart on Sun Nov 15 08:45:27 2020.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..

Metrics Backends
================

``metrics_backend`` is the definition of a metrics backend to which all automatically
generated metrics from your pipeline are sent to and is part of the ``metrics_backends`` list in the
``monitoring`` section of your liminal.yml

.. code-block:: yaml

  - metrics_backend: cloudwatch_metrics
    type: aws_cloudwatch
    namespace: DataPipeline
    AWS_REGION_NAME: us-east-1

..

A ``metrics_backend`` is defined by the following attributes:

metrics_backend attributes
''''''''''''''''''''''''''

``metrics_backend``: name of your metrics backend

``type``: type of the metrics backend. The current available metrics backends are: ``aws_cloudwatch``.

Different metrics backend types require their own additional configuration. For example,
``aws_cloudwatch`` metrics backend requires ``namespace`` to be configured.

metrics_backend types
'''''''''''''''''''''

.. toctree::
   :maxdepth: 1

   aws_cloudwatch


================================================
FILE: docs/liminal/monitoring.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Monitoring

In the `monitoring` section you can configure monitoring for your pipelines and services.

```yaml
monitoring:
  metrics_backends:
    - metrics_backend: cloudwatch_metrics
      type: aws_cloudwatch
      namespace: DataPipeline
      AWS_REGION_NAME: us-east-1
```

`monitoring` is a section in the root lof your liminal.yml file and is a list of `metrics_backend`s and
a  list of `alerts_backend`s.

`metrics_backend`s are where metrics for your pipelines are automatically sent.

`alerts_backends`s automatically register alerts based on `metrics_backend` they are paired with.

## metrics_backend attributes

For fully detailed information on metrics backends see: [metrics backends](metrics_backends).

`metrics_backend`: name of your metrics backend

`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.

Different metrics backend types require their own additional configuration. For example,
`aws_cloudwatch` metrics backend requires `namespace` to be configured.


================================================
FILE: docs/liminal/pipelines.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Pipelines

In the `pipelines` section you can configure scheduled/manually run pipelines of tasks to be
executed sequentially:

```yaml
pipelines:
  - pipeline: my_data_pipeline
    timeout_minutes: 30
    start_date: 2020-03-01
    schedule: 0 10 * * *
    tasks:
      - task: my_sql_task
        type: sql
        query: "SELECT * FROM
        {{my_database_name}}.{{my_table_name}}
        WHERE event_date_prt >=
              '{{yesterday_ds}}'"
              AND cms_platform = 'xsite'
        output_table: my_db.my_out_table
        output_path: s3://my_bky/{{env}}/mydir
      - task: my_python_task
        type: python
        image: myorg/myrepo:pythonapp
        cmd: python my_python_app.py
        env_vars:
          env: {{env}}
          fizz: buzz
```

`pipelines` is a section in the root lof your liminal.yml file and is a list of `pipeline`s defined
by the following attributes:

## pipeline attributes

`pipeline`: name of your pipeline (must be unique per liminal server).

`timeout_minutes`: maximum allowed pipeline run time in minutes, if run exceeds this time, pipeline
and all running tasks will fail.

`start_date`: start date for the pipeline.

`schedule`: to be configured if the pipeline should run on a schedule. Format is
[cron expression](https://en.wikipedia.org/wiki/Cron#CRON_expression).

`tasks`: list of `task`s, defined by the following attributes:

## task attributes

For fully detailed information on tasks see: [tasks](tasks).

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`type`: type of the task. Examples of available task types are: `python`.
and more..

Different task types require their own additional configuration. For example, `python` task requires
`image` to be configured.


================================================
FILE: docs/liminal/services.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Services

In the `services` section you can configure constantly running applications such as
servers.

```yaml
services:
  - service: my_server
    image: myorg/myrepo:myserver
```

`services` is a section in the root lof your liminal.yml file and is a list of `service`s, defined
by the following attributes:

## service attributes

`service`: name of your service.

`image`: the service's docker image.


================================================
FILE: docs/liminal/tasks/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Tasks

`task` is the definition of a specific step in your pipeline, and is part of the `tasks` list
in your pipeline definition.

For fully detailed information on pipelines see: [pipelines](../pipelines.md).

```yaml
  - task: my_python_task
    image: myorg/myrepo:mypythonapp
    cmd: python -u my_module.py
    env_vars:
      env: {{env}}
      fizz: buzz
```

A `task` is defined by the following attributes:

## task attributes

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`type`: type of the task. Examples of available task types are: `python`
and more..

Different task types require their own additional configuration. For example, `python` task requires
`image` to be configured.

## task types

1. [python](python.md)
2. [spark](spark.md)
3. [create_cloudformation_stack](create_cloudformation_stack.md)
4. [delete_cloudformation_stack](delete_cloudformation_stack.md)


================================================
FILE: docs/liminal/tasks/create_cloudformation_stack.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# create_cloudformation_stack task

The `create_cloudformation_stack` task allows you to create cloudformation stack on AWS

```yaml
  - task: create_emr
    type: create_cloudformation_stack
    stack_name: liminal_document_emr
    properties:
      OnFailure: DO_NOTHING
      TimeoutInMinutes: 25
      Capabilities: [ 'CAPABILITY_NAMED_IAM' ]
      TemplateURL: s3://liminal-doc/emr/template.yml
      Parameters:
        Environment: Production
        CoreServerCount: '2'
```

## attributes

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`stack_name`: the name of the stack

`properties`: any attribute of [aws-create-stack-request-parameters](
https://docs.aws.amazon.com/AWSCloudFormation/latest/APIReference/API_CreateStack.html#API_CreateStack_RequestParameters)


================================================
FILE: docs/liminal/tasks/delete_cloudformation_stack.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# delete_cloudformation_stack task

The `delete_cloudformation_stack` task allows you to delete stack

```yaml
    - task: delete_emr
      type: delete_cloudformation_stack
      stack_name: liminal_emr_stack_name
      description: delete stack
```

## attributes

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`stack_name`: the name of the stack to delete


================================================
FILE: docs/liminal/tasks/index.rst
================================================
.. Apchae Liminal documentation master file, created by
   sphinx-quickstart on Sun Nov 15 08:45:27 2020.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..

Tasks
=====

``task`` is the definition of a specific step in your pipeline, and is part of the ``tasks`` list
in your pipeline definition.

For fully detailed information on pipelines see: `pipelines`_.

.. _pipelines: ../pipelines.html

.. code-block:: yaml

  - task: my_python_task
    type: python
    image: myorg/myrepo:mypythonapp
    cmd: python -u my_module.py
    env_vars:
      env: {{env}}
      fizz: buzz

..

A ``task`` is defined by the following attributes:

In the ``images`` section you can configure how to pack your code into docker images. This can be
achieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:

task attributes
''''''''''''''''

``task``: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

``type``: type of the task.

Different task types require their own additional configuration. For example, ``python`` task
requires ``image`` to be configured.

task types
''''''''''

.. toctree::
   :maxdepth: 1

   python


================================================
FILE: docs/liminal/tasks/python.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# python task

The `python` task allows you to run python code packaged as docker images.

```yaml
  - task: my_python_task
    type: python
    image: myorg/myrepo:mypythonapp
    cmd: python my_python_app.py
    env_vars:
      env: '{{env}}'
      fizz: buzz
    mounts:
      - mount: mymount
        volume: myvol1
        path: /mnt/vol1
```

## attributes

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`image`: name of image to run.

`cmd`: command to run when running the image.

`env_vars`: environment variables to set when running the image.

`mounts`: list of `mount`s defined by the following attributes:

### mount attributes

`mount`: name of the mount.

`volume`: volume to mount. volumes are defined in the `volumes` section of liminal.yml

`path`: path in which to mount the volume. this is the path accessible to user code.


================================================
FILE: docs/liminal/tasks/spark.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# spark task

The `spark` task allows you to submit a spark application

```yaml
  - task: my_spark_app
    type: spark
    executor: emr_executor
    application_source: 'my_app.py',
    class: 'com.mycompany.MySparkApp',
    master: 'yarn',
    conf:
      spark.driver.memory: '1g',
      spark.driver.maxResultSize: '1g',
      spark.yarn.executor.memoryOverhead: '500M'
    application_arguments:
      --query: "select * from my_db.my_input_table where my_date_col >= "
                 "'{{yesterday_ds}}'",
      --output: 'my_output_table'
```

## attributes

`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).

`executor`: executor name ([supported executors](#supported-executors))

`application_source`: the location of the spark application (jar location / python file)

`class`: the entry point for your application

`master`: the cluster manager to connect to.See the list
of [allowed master URL's](https://spark.apache.org/docs/latest/submitting-applications.html#master-urls)
(Optional)

`conf`: arbitrary Spark configuration properties (Optional)

`application_arguments`: arguments passed to the main method of your main class (Optional)

## supported executors
- [emr](../executors/emr.md)


================================================
FILE: docs/make.bat
================================================
REM
REM Licensed to the Apache Software Foundation (ASF) under one
REM or more contributor license agreements.  See the NOTICE file
REM distributed with this work for additional information
REM regarding copyright ownership.  The ASF licenses this file
REM to you under the Apache License, Version 2.0 (the
REM "License"); you may not use this file except in compliance
REM with the License.  You may obtain a copy of the License at
REM
REM   http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing,
REM software distributed under the License is distributed on an
REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
REM KIND, either express or implied.  See the License for the
REM specific language governing permissions and limitations
REM under the License.

@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=build

if "%1" == "" goto help

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.http://sphinx-doc.org/
	exit /b 1
)

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%

:end
popd


================================================
FILE: docs/source/How_to_install_liminal_in_airflow_on_kubernetes.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# Install Liminal in Airflow
* [Workflow](#workflow)
* [Prerequisites](#prerequisites)
* [One time setup](#One-time-environment-setup)
* [Deploying your Yaml files](#Deploying-your-Yaml-files)
* [References and other resources](#references-and-other-resources)

## Workflow
To deploy and run liminal on airflow, we need to complete two tasks:
1. Ensure that the Liminal python package is installed inside each one of Airflow's pods (schedulers, Workers and Web Server). \
This is a standard airflow management task, which can be achieved in multiple ways depending on your setup (we will cover a few later in this guide).
2. Ensure that the user's Liminal code (i.e. Yamls) are present in each of the pods' DAGs folder (typically located in /opt/airflow/dags). \
Here, our recommended approach is to use a folder on a shared filesystem (EFS) to host the airflow DAGs (and Liminal Yamls). \
This folder will be mounted into Airflow pods' DAGs folder - thus enabling each of the pods to pick the files up and run the Liminal DAGs.


![](assets/liminal_deployment_diagram.png)


Below is a description of how to achieve this task using a commonly used Helm chart for Airflow on Kubernetes.

## Prerequisites
### Airflow on Kubernetes
This guide assumes you have successfully installed Airflow on Kubernetes.

If you haven't done so yet, we found the following Helm chart useful for achieving this goal:
[airflow-helm-chart-7.16.0]

You'll need to add the chart to your Helm installation and follow the instructions from the README.md
```sh
helm repo add airflow-stable https://airflow-helm.github.io/charts
helm repo update
```

### A provisioned EFS file system
EFS is an AWS solution which offers an NFS as a service. \
You can read up about EFS [here](https://aws.amazon.com/efs/features/) and set it up via AWS console or CLI.

## One time environment setup
### Adding the Apache Liminal package to Apache Airflow pods

There are multiple ways to install liminal into the Airflow pods, depending on how you deployed Airflow on Kubernetes. \
In principle, it requires the package to be pip-installed into the Airflow docker - either during build time or in run time.

1. If you are rolling out your own Airflow Docker images based on the [official docker image](https://github.com/apache/airflow),
you can add apache-liminal installation during the build:
```docker build --build-arg ADDITIONAL_PYTHON_DEPS="apache-liminal"```

2. If you already have a running Airflow on Kubernetes, you can run a command which iterates over Airflow pods and executes a ```pip install apache-liminal```
on each of the pods.

3. If you used the [airflow-helm-chart-7.16.0], you just need to specifiy apache-liminal inside the ```requirements.txt``` file as per the instructions [here][airflow-helm-chart-7.16.0]


### Preparing a "deployment box" and an EFS folder to host the Liminal Yaml files
The "deployment box" is the machine which has access to the Yamls you want to deploy. \
This machine will also mount the same EFS folder as Airflow, and use ```liminal deploy``` to push the Yamls into that folder.

1. Provision an EC2 instance which has access to the EFS

3. Mount the EFS onto the EC2 instance

```sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport <EFS_ID>:/ /mnt/efs```

3. Create a folder on an EFS drive. \
This will be our target for deployment
```mkdir -p /mnt/efs/opt/airflow/liminal_home```

4. ```export LIMINAL_HOME=/mnt/efs/opt/airflow/liminal_home```

### Mounting the EFS folder into Airflow pods

Now that we have an EFS drive, and we've created the shared folder on it, it's time to mount it onto the Airflow pods` Dags folder.
The [guide][airflowInstallation] includes details on how to expose EFS as a Persistent Volume for kubernetes.

Make sure to point the pods' PV to the right EFS folder, like so:

```
volumeMounts:
    - name: efs-data
      mountPath: /opt/airflow/dags
```

where efs-data is a PVC pointing to `<EFS_ID>:/opt/airflow/liminal_home`


## Deploying your Yaml files

Finally, the one time setup is done, and by this point, you should have:
1. A deployment box which can deploy Liminal to an EFS folder
2. Airflow pods which will pick up the Yamls from that folder automatically.

Deploying the actual Yaml files is the simple part.

Each time you want to deploy the Liminal Yamls from the deployment box:
```
liminal deploy --path <<<path to liminal user code>>>
```

## References and other resources

### Setting up Airflow on Kubernetes with EFS
[Setting up Airflow on Kubernetes with AWS EFS][airflowInstallation]

### Example script
In order to make it easier to undernstad all the steps, we include a script which performs the Liminal-specific steps in the deployment.
This script should be run from an EC2 machine which has access to the EFS you've provisioned. \
The below steps runs the example project of [liminal-getting-started][liminal-getting-started].
* Mount the EFS and setting environment parameters:
  * [`liminal_aws_deploy.sh`][liminal-aws-deploy] -o installation
* Build dockers from your business logic:
  * [`liminal_aws_deploy.sh`][liminal-aws-deploy] -o build
* Deploy the Yaml onto the EFS folder:
  * [`liminal_aws_deploy.sh`][liminal-aws-deploy] -o deployment

![](assets/liminal_aws_deploy.gif)

##### Seting up AWS ECR with kubernetes
[How to configure and use AWS ECR with kubernetes][AWS-ECR-with-kubernetes]

1. `liminal_aws_deploy.sh -o build` will build dockers from the given liminal project path. \
The [liminal.yml][liminal-yml] defindes a docker image :
```
    tasks:
      - task: python_hello_world_example
        type: python
        description: static input task
        image: python_hello_world_example_image
```
The Yaml defines task that will pull the docker image from your predefined registry. \
Therefore, you need to use a docker registry in your kubernetes cluster. \
One way to achieve it is to use [Amazon ECR][amazon-ecr].

[AWS-ECR-with-kubernetes]: <https://medium.com/@damitj07/how-to-configure-and-use-aws-ecr-with-kubernetes-rancher2-0-6144c626d42c>
[amazon-ecr]: <https://aws.amazon.com/ecr/>
[liminal-yml]: <https://github.com/apache/incubator-liminal/blob/master/examples/liminal-getting-started/liminal.yml>
[liminal-getting-started]: <https://github.com/apache/incubator-liminal/tree/master/examples/liminal-getting-started>
[airflow-helm-chart-7.16.0]: <https://github.com/airflow-helm/charts/tree/airflow-7.16.0>
[homebrew-kubectl]: <https://formulae.brew.sh/formula/kubernetes-cli>
[cluster-access-kubeconfig]: <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/#context>
[liminal-aws-deploy]: <https://github.com/apache/incubator-liminal/tree/master/docs/source/liminal_aws_deploy.sh>
[airflowChart]: <https://github.com/airflow-helm/charts/tree/main/charts/airflow>
[airflowInstallation]: <https://medium.com/terragoneng/setting-up-airflow-on-kubernetes-with-aws-efs-c659f3a16292>
[airflowImage]: <https://hub.docker.com/layers/apache/airflow/1.10.12-python3.6/images/sha256-9ea9e5ca66bd17632241889ab248fe3852c9f3c830ed299a8ecaa8a13ac2082f?context=explore>


================================================
FILE: docs/source/liminal_aws_deploy.sh
================================================
#! /bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

help() {
    echo "$0: Get Started"
    echo "Usage: $0 -o option"
		echo "Liminal available options:"
		echo "install"
		echo "build"
		echo "deploy"
}

if [[ "$#" -lt 2 ]]; then
	help
	exit
fi

# Arguments processing
while getopts ":o:" opt; do #install, build, deploy
	case $opt in
	o)
	  option=$OPTARG
	  case $option in
	    install)
	      ACTION="install"
	    ;;
	    build)
	      ACTION="build"
	    ;;
	    deploy)
	      ACTION="deploy"
	    ;;
	  esac
	esac
done

mount_efs() {
	EFS_ID=$1
	echo "The EFS_ID is: ${EFS_ID}"

	echo "Create /mnt/efs"
	mkdir /mnt/efs

	echo "Mount the /mnt/efs"
	sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${EFS_ID}:/ /mnt/efs
}

install_liminal() {
	echo "Installing liminal on Airflow components"
	docker ps | grep k8s_airflow | awk '{print $1}' | xargs -I {} docker exec {} bash -c 'pip install apache-liminal'

	echo "Installing liminal locally"
	pip install apache-liminal
}

deploy_yaml() {
	DEPLOY_PATH=$1
	echo "The DEPLOY_PATH is: ${DEPLOY_PATH}"
	echo 'Export the liminal home'
	export LIMINAL_HOME=$(find /mnt/efs/ -name "liminal_home")

	liminal deploy --path "${DEPLOY_PATH}"
}

build() {
	BUILD_PATH=$1
	echo "The BUILD_PATH is: ${BUILD_PATH}"

	liminal build --path "${BUILD_PATH}"
}

case $ACTION in
	install)
		read -r -p "Please enter the EFS ID: " EFS_ID
		mount_efs $EFS_ID
		install_liminal
		exit 0
	;;
	build)
		read -r -p "Please enter the path to the liminal project: " BUILD_PATH
		build $BUILD_PATH
		exit 0
	;;
	deploy)
		read -r -p "Please enter the liminal yaml path: " DEPLOY_PATH
		deploy_yaml $DEPLOY_PATH
		exit 0
	;;
*)
  help
  exit
  ;;
esac


================================================
FILE: examples/aws-ml-app-demo/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/aws-ml-app-demo/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: MyDataScienceApp
owner: Bosco Albert Baracus
volumes:
  - volume: gettingstartedvol
    claim_name: gettingstartedvol-pvc
    local:
      path: .
images:
  - image: myorg/mydatascienceapp
    type: python_server
    source: .
    endpoints:
      - endpoint: /predict
        module: serving
        function: predict
      - endpoint: /healthcheck
        module: serving
        function: healthcheck
      - endpoint: /version
        module: serving
        function: version
services:
  - service: my_datascience_server
    image: myorg/mydatascienceapp
pipelines:
  - pipeline: my_datascience_pipeline
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    metrics:
      namespace: DataScience
      backends: []
    tasks:
      - task: train
        type: python
        description: train model
        image: myorg/mydatascienceapp
        cmd: python -u training.py train
        env:
          MOUNT_PATH: /mnt/gettingstartedvol
        mounts:
          - mount: mymount
            volume: gettingstartedvol
            path: /mnt/gettingstartedvol
      - task: validate
        type: python
        description: validate model and deploy
        image: myorg/mydatascienceapp
        cmd: python -u training.py validate
        env:
          MOUNT_PATH: /mnt/gettingstartedvol
        mounts:
          - mount: mymount
            volume: gettingstartedvol
            path: /mnt/gettingstartedvol


================================================
FILE: examples/aws-ml-app-demo/manifests/aws-ml-app-demo.yaml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---

apiVersion: v1
kind: Pod
metadata:
  name: aws-ml-app-demo
spec:
  volumes:
    - name: task-pv-storage
      persistentVolumeClaim:
        claimName: gettingstartedvol-pvc
  containers:
    - name: task-pv-container
      imagePullPolicy: Never
      image: myorg/mydatascienceapp
      lifecycle:
        postStart:
          exec:
            command: [/bin/bash, -c, apt update && apt install curl -y]
      ports:
        - containerPort: 80
          name: http-server
      volumeMounts:
        - mountPath: /mnt/gettingstartedvol
          name: task-pv-storage


================================================
FILE: examples/aws-ml-app-demo/model_store.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import glob
import os
import pickle
import time

MOUNT_PATH = os.environ.get('MOUNT_PATH', '/mnt/gettingstartedvol')
PRODUCTION = 'production'
CANDIDATE = 'candidate'

_ONE_HOUR = 60 * 60


class ModelStore:
    def __init__(self, env):
        self.env = env
        self._latest_model = None
        self._latest_version = None
        self._last_check = time.time()

    def load_latest_model(self, force=False):
        if not self._latest_model or time.time() - self._last_check > _ONE_HOUR or force:
            self._latest_model, self._latest_version = self._download_latest_model()

        return self._latest_model, self._latest_version

    def save_model(self, model, version):
        key = 'model.p'
        path = f'{MOUNT_PATH}/{self.env}/{version}'

        os.makedirs(f'{path}', exist_ok=True)
        pickle.dump(model, open(f'{path}/{key}', "wb"))

    def _download_latest_model(self):
        objects = glob.glob(f'{MOUNT_PATH}/{self.env}/**/*')
        models = list(reversed(sorted(obj for obj in objects if obj.endswith('.p'))))
        latest_key = models[0]
        version = latest_key.rsplit('/')[-2]
        print(f'Loading model version {version}')
        return pickle.load(open(latest_key, 'rb')), version


================================================
FILE: examples/aws-ml-app-demo/requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

scikit-learn==0.23.2
apache-liminal==0.0.2
Werkzeug==1.0.1
itsdangerous==1.1.0
MarkupSafe==1.1.1
Flask


================================================
FILE: examples/aws-ml-app-demo/serving.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json

import model_store
from model_store import ModelStore

_MODEL_STORE = ModelStore(model_store.PRODUCTION)
_PETAL_WIDTH = 'petal_width'


def predict(input_json):
    try:
        input_dict = json.loads(input_json)
        model, version = _MODEL_STORE.load_latest_model()
        result = str(model.predict_proba([[float(input_dict[_PETAL_WIDTH])]])[0][1])
        return json.dumps({"result": result, "version": version})

    except IndexError:
        return 'Failure: the model is not ready yet'

    except Exception as e:
        print(e)
        return 'Failure'


def healthcheck(self):
    return 'Server is up!'


def version(self):
    try:
        model, version = _MODEL_STORE.load_latest_model()
        print(f'version={version}')
        return version
    except Exception as e:
        return e


================================================
FILE: examples/aws-ml-app-demo/training.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import sys
import time

import model_store
import numpy as np
from model_store import ModelStore
from sklearn import datasets
from sklearn.linear_model import LogisticRegression

_CANDIDATE_MODEL_STORE = ModelStore(model_store.CANDIDATE)
_PRODUCTION_MODEL_STORE = ModelStore(model_store.PRODUCTION)


def train_model():
    iris = datasets.load_iris()

    X = iris["data"][:, 3:]  # petal width
    y = (iris["target"] == 2).astype(np.int)

    model = LogisticRegression()
    model.fit(X, y)

    version = round(time.time())

    print(f'Saving model with version {version} to candidate model store.')
    _CANDIDATE_MODEL_STORE.save_model(model, version)


def validate_model():
    model, version = _CANDIDATE_MODEL_STORE.load_latest_model()
    print(f'Validating model with version {version} to candidate model store.')
    if not isinstance(model.predict([[1]]), np.ndarray):
        raise ValueError('Invalid model')
    print(f'Deploying model with version {version} to production model store.')
    _PRODUCTION_MODEL_STORE.save_model(model, version)


if __name__ == '__main__':
    cmd = sys.argv[1]
    if cmd == 'train':
        train_model()
    elif cmd == 'validate':
        validate_model()
    else:
        raise ValueError(f"Unknown command {cmd}")


================================================
FILE: examples/extensibility/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/extensibility/executors/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/extensibility/executors/custom_executor.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from plugins.tasks import custom_task

from liminal.runners.airflow.model import executor


class CustomExecutor(executor.Executor):
    supported_task_types = [custom_task.CustomTask]

    def _apply_executor_task_to_dag(self, **kwargs):
        print("Hello from custom executor")
        return kwargs['task'].custom_task_logic()


================================================
FILE: examples/extensibility/images/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/extensibility/images/custom_image.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import os

from liminal.build.image_builder import ImageBuilder


class CustomImageBuilder(ImageBuilder):
    def __init__(self, config, base_path, relative_source_path, tag):
        super().__init__(config, base_path, relative_source_path, tag)

    @staticmethod
    def _dockerfile_path():
        return os.path.join(os.path.dirname(__file__), 'custom_image_builder/Dockerfile')


================================================
FILE: examples/extensibility/images/custom_image_builder/Dockerfile
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
FROM alpine:3.4

WORKDIR /app

COPY . /app/


================================================
FILE: examples/extensibility/images/custom_image_builder/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/extensibility/liminal.yml
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
---
name: Extensability
images:
  - image: custom_image_example
    type: custom_image
executors:
  - executor: my_custom_exec
    type: custom_executor
pipelines:
  - pipeline: getting_started_pipeline_extensibility
    owner: Bosco Albert Baracus
    start_date: 1970-01-01
    timeout_minutes: 10
    schedule: 0 * 1 * *
    tasks:
      - task: custom_task_example
        type: custom_task
        executor: my_custom_exec


================================================
FILE: examples/extensibility/tasks/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/extensibility/tasks/custom_task.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from airflow.operators.bash import BashOperator

from liminal.runners.airflow.model import task


class CustomTask(task.Task):
    def custom_task_logic(self):
        hello_world = BashOperator(
            task_id='hello_world',
            bash_command='echo "hello from liminal custom task"',
        )

        if self.parent:
            self.parent.set_downstream(hello_world)

        return hello_world


================================================
FILE: examples/liminal-getting-started/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/liminal-getting-started/hello_world.json
================================================
{
  "": [
    "Licensed to the Apache Software Foundation (ASF) under one",
    "or more contributor license agreements.  See the NOTICE file",
    "distributed with this work for additional information",
    "regarding copyright ownership.  The ASF licenses this file",
    "to you under the Apache License, Version 2.0 (the",
    "\"License\"); you may not use this file except in compliance",
    "with the License.  You may obtain a copy of the License at",
    "",
    "  http://www.apache.org/licenses/LICENSE-2.0",
    "",
    "Unless required by applicable law or agreed to in writing,",
    "software distributed under the License is distributed on an",
    "\"AS IS\"BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY",
    "KIND, either express or implied.  See the License for the",
    "specific language governing permissions and limitations",
    "under the License."
  ],
  "hello": "world"
}


================================================
FILE: examples/liminal-getting-started/helloworld/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/liminal-getting-started/helloworld/hello_world.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
import os

print('Hello world!\n')
print('Environment:')
print(os.environ)

with open('/mnt/gettingstartedvol/hello_world_output.json', 'w') as file:
    file.write(json.dumps({'hello': 1, 'world': 2}))


================================================
FILE: examples/liminal-getting-started/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: GettingStartedPipeline
volumes:
  - volume: gettingstartedvol
    claim_name: gettingstartedvol-pvc
    local:
      path: .
variables:
  myvar: myval
images:
  - image: python_hello_world_example_image
    type: python
    source: helloworld
  - image: liminal_getting_started_server_image
    type: python_server
    source: myserver
    endpoints:
      - endpoint: /myendpoint1
        module: my_server
        function: myendpoint1func
pipelines:
  - pipeline: getting_started_pipeline
    owner: Bosco Albert Baracus
    start_date: 1970-01-01
    timeout_minutes: 10
    schedule: 0 * 1 * *
    default_arg_loaded: check
    default_array_loaded: [2, 3, 4]
    default_object_loaded:
      key1: val1
      key2: val2
    metrics:
      namespace: TestNamespace
      backends: []
    tasks:
      - task: python_hello_world_example
        type: python
        image: python_hello_world_example_image
        env_vars:
          env1: '{{myvar}}'
          env2: foo
        mounts:
          - mount: mymount
            volume: gettingstartedvol
            path: /mnt/gettingstartedvol
        cmd: python -u hello_world.py
        executors: 2
        # cmd: python -u hello_world.py
      - task: python_hello_world_output_task
        type: python
        description: task with input from other task's output
        image: python_hello_world_example_image
        env_vars:
          env1: a
          env2: b
        mounts:
          - mount: mymount
            volume: gettingstartedvol
            path: /mnt/gettingstartedvol
        cmd: python -u hello_world.py
services:
  - service: liminal_getting_started_python_server
    description: my python server
    image: liminal_getting_started_server_image


================================================
FILE: examples/liminal-getting-started/myserver/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/liminal-getting-started/myserver/my_server.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


def myendpoint1func():
    return '1'


================================================
FILE: examples/liminal-getting-started/pip.conf
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/liminal-getting-started/requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/sagemaker_example/README.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->

# sageMaker

Enable access to AWS STS AssumeRole:

Go yo IAM->Role of your user and add under Trust Relationships
```
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Federated": "arn:aws:iam::<ACCOUNT_ID>:saml-provider/Okta"
            },
            "Action": "sts:AssumeRoleWithSAML",
            "Condition": {
                "StringEquals": {
                    "SAML:aud": "https://signin.aws.amazon.com/saml"
                }
            }
        },
        {
            "Sid": "",
            "Effect": "Allow",
            "Principal": {
                "Service": "sagemaker.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}
```


================================================
FILE: examples/sagemaker_example/archetype/liminal.yaml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---

# superliminal for local development
name: InfraSageMaker
owner: Bosco Albert Baracus
type: super
secrets:
  - secret: aws
    local_path_file: "~/.aws/credentials"
executors:
  - executor: k8s
    type: kubernetes
variables:
  output_root_dir: "/mnt/smvolume"
  input_root_dir: ''
images:
  - image: my_sm_image
    source: .
    type: python
    no_cache: false
task_defaults:
  python:
    executor: k8s
    image: my_sm_image
    application_source: '{{application}}'
    env_vars:
      AWS_CONFIG_FILE: "/secret/credentials"
      COMM_PATH: "{{output_root_dir}}"
    secrets:
      - secret: aws
        remote_path: "/secret"
    mounts:
      - mount: mymount
        volume: smvolume
        path: /mnt/smvolume
volumes:
  - volume: smvolume
    claim_name: smvolume-pvc
    local:
      path: .


================================================
FILE: examples/sagemaker_example/data_preparation/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/sagemaker_example/data_preparation/data_preparation.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import os
from pathlib import Path

import pandas as pd
from data_preparation.data_uploader import get_uploader
from sklearn.model_selection import train_test_split

TEST_CSV = "diamonds_test.csv"

TRAIN_CSV = "diamonds_train.csv"

LABEL_COLUMN = 'price'

DATASET_PUBLIC_URL = "https://www.openml.org/data/get_csv/21792853/dataset"


def transform(data):
    X = data.drop(columns=LABEL_COLUMN)
    y = data[LABEL_COLUMN]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    train = X_train.copy()
    train[LABEL_COLUMN] = y_train

    test = X_test.copy()
    test[LABEL_COLUMN] = y_test
    return train, test


def extract(input_uri):
    return pd.read_csv(input_uri)


def load(train, test, output_uri_base, data_uploader):
    train.to_csv(TRAIN_CSV)
    test.to_csv(TEST_CSV)
    train_path = data_uploader.upload(TRAIN_CSV, os.path.join(output_uri_base, "train"))
    test_path = data_uploader.upload(TEST_CSV, os.path.join(output_uri_base, "test"))
    return train_path, test_path


def data_pipeline(input_uri, output_uri_base, data_uploader):
    data = extract(input_uri)
    train, test = transform(data)
    return load(train, test, output_uri_base, data_uploader)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_uri", default=DATASET_PUBLIC_URL)
    parser.add_argument(
        "--output_uri_base",
        default=f"file://{Path(__file__).parent.parent.absolute().joinpath('data')}",
        help="a uri starting with 's3', 'file' or a relative path " "which will be treated as sagemaker prefix",
    )
    args = parser.parse_args()
    data_uploader = get_uploader(args.output_uri_base)
    data_pipeline(args.input_uri, args.output_uri_base, data_uploader)


================================================
FILE: examples/sagemaker_example/data_preparation/data_uploader.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import shutil
from abc import ABC, abstractmethod
from urllib.parse import urlparse

import boto3
import sagemaker


def get_uploader(output_uri_base):
    o = urlparse(output_uri_base)
    if o.scheme == 's3':
        return S3DataUploader()
    elif o.scheme == "file":
        return LocalDataUploader()
    return SagemakerDataUploader()


class DataUploader(ABC):
    def __init__(self):
        pass

    @abstractmethod
    def upload(self, local_path, output_uri_base):
        pass


class LocalDataUploader(DataUploader):
    s3_client = None

    def upload(self, local_path, output_uri_base):
        o = urlparse(output_uri_base)
        os.makedirs(o.path, exist_ok=True)
        shutil.copy2(local_path, o.path)
        return o.path


class S3DataUploader(DataUploader):
    s3_client = None

    def __init__(self):
        self.s3_client = boto3.client('s3')

    def upload(self, local_path, output_uri_base):
        o = urlparse(output_uri_base)
        response = self.s3_client.upload_file(local_path, o.netloc, o.path)


class SagemakerDataUploader(DataUploader):
    sm_client = None
    sm_session = None
    region = None
    default_bucket = None

    def __init__(self):
        self.sm_client = boto3.client("sagemaker")
        self.sm_session = sagemaker.Session()
        self.region = self.sm_session.boto_session.region_name
        self.default_bucket = self.sm_session.default_bucket()

    def upload(self, local_path, output_uri_base):
        o = urlparse(output_uri_base)
        bucket = o.netloc or self.default_bucket
        return self.sm_session.upload_data(path=local_path, bucket=bucket, key_prefix=o.path)


================================================
FILE: examples/sagemaker_example/data_train/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/sagemaker_example/data_train/train.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import os
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
from data_preparation.data_preparation import LABEL_COLUMN, TEST_CSV, TRAIN_CSV
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

feature_column_names = ['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']


categorical_cols = ['cut', 'clarity', 'color']

# inference functions
MODEL_JOBLIB_FILENAME = "model.joblib"

preprocessor = ColumnTransformer(
    transformers=[('categorical', OneHotEncoder(), categorical_cols)], remainder='passthrough'
)


def train(train_df, test_df, n_jobs, model_dir):
    X_train = train_df[feature_column_names]
    y_train = train_df[LABEL_COLUMN]

    print(X_train.head(3))
    X_test = test_df[feature_column_names]
    y_test = test_df[LABEL_COLUMN]

    # train

    diamond_price_model = LinearRegression(n_jobs=n_jobs)

    my_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', diamond_price_model)])

    my_pipeline.fit(X_train, y_train)

    train_predictions = my_pipeline.predict(X_train)
    print(train_predictions)

    print("validating model")
    abs_err = np.abs(my_pipeline.predict(X_test) - y_test)
    print(f"Test prediction error:{abs_err} ")

    # persist model
    path = os.path.join(model_dir, MODEL_JOBLIB_FILENAME)
    joblib.dump(my_pipeline, path)
    print("model persisted at " + path)
    return path


if __name__ == '__main__':
    data_path = Path(__file__).parent.parent.absolute().joinpath('data')
    parser = argparse.ArgumentParser()
    parser.add_argument('--model-dir', type=str, default=os.getenv('SM_MODEL_DIR', f"file://{data_path}"))
    parser.add_argument('--n-jobs', default=2)
    parser.add_argument(
        "--train", type=str, default=os.getenv("SM_CHANNEL_TRAIN", f"file://{data_path.joinpath('train')}")
    )
    parser.add_argument(
        "--test", type=str, default=os.getenv("SM_CHANNEL_TEST", f"file://{data_path.joinpath('test')}")
    )
    parser.add_argument("--train-file", type=str, default=TRAIN_CSV)
    parser.add_argument("--test-file", type=str, default=TEST_CSV)

    args = parser.parse_args()

    train_df = pd.read_csv(os.path.join(args.train, args.train_file))
    test_df = pd.read_csv(os.path.join(args.test, args.test_file))

    train(train_df=train_df, test_df=test_df, n_jobs=args.n_jobs, model_dir=args.model_dir)


================================================
FILE: examples/sagemaker_example/inference.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import os
from io import BytesIO

import joblib
import numpy as np
import pandas as pd

feature_column_names = ['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']


categorical_cols = ['cut', 'clarity', 'color']

MODEL_JOBLIB_FILENAME = "model.joblib"


def model_fn(model_dir):
    clf = joblib.load(os.path.join(model_dir, MODEL_JOBLIB_FILENAME))
    return clf


def input_fn(input_data, content_type):
    if content_type == "application/x-npy":
        load_bytes = BytesIO(input_data)
        input_np = np.load(load_bytes, allow_pickle=True)
        df = pd.DataFrame(data=input_np, columns=feature_column_names)
        return df
    else:
        raise ValueError(
            f"content type {content_type} is not supported by this inference endpoint. Please send a legal application/x-npy payload"
        )


def predict_fn(input_data, model):
    prediction = model.predict(input_data[feature_column_names])
    return prediction


def df_to_inference_input():
    X_train = df[feature_column_names]
    rows = X_train.head(10)
    inference_input = rows.to_numpy()
    np_bytes = BytesIO()
    np.save(np_bytes, inference_input, allow_pickle=True)
    input_data = input_fn(np_bytes.getvalue(), "application/x-npy")
    return input_data


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-dir", type=str, default=os.getenv("SM_MODEL_DIR", "../data"))
    parser.add_argument("--data-path", type=str, default=f"../data/test/diamonds_test.csv")
    args = parser.parse_args()
    model = model_fn(args.model_dir)
    df = pd.read_csv(args.data_path)
    input_data = df_to_inference_input()
    predictions = predict_fn(input_data, model)
    print(predictions)


================================================
FILE: examples/sagemaker_example/liminal.yaml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: SageMakerExample
super: InfraSageMaker
owner: Bosco Albert Baracus
pipelines:
  - pipeline: sagemaker_diamonds_train_and_inference
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    tasks:
      - task: data_preprocessing
        type: python
        description: prepare the data for training
        cmd: python -u liminal_sm.py --action data_prep --output_uri_base liminal-sm-example/data
      - task: train
        type: python
        description: train model
        cmd: python -u liminal_sm.py --action train --base_job_name liminal-base-training-job --train_instance_type ml.m5.large --n_jobs 5
      - task: deploy
        type: python
        description: Deploy model
        cmd: python -u liminal_sm.py --action deploy --model_name liminal-sm-diamonds-model --deploy_instance_type ml.m5.large
      - task: validate
        type: python
        description: validate model post deployment
        cmd: python -u liminal_sm.py --action validate


================================================
FILE: examples/sagemaker_example/liminal_sm.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import tempfile

from sagemaker.deserializers import NumpyDeserializer
from sagemaker.predictor import Predictor
from sagemaker.serializers import NumpySerializer
from sm_ops import create_sm_args_parser, sm_data_prep, sm_deploy, sm_train, sm_validate

COMM_PATH = os.getenv("COMM_PATH", tempfile.mkdtemp())
os.makedirs(COMM_PATH, exist_ok=True)


def read_message(name):
    with open(os.path.join(COMM_PATH, name)) as f:
        lines = f.readlines()
    return [x.strip() for x in lines]


def forward_message(name, lines):
    if not isinstance(lines, list):
        lines = [lines]
    lines = '\n'.join(lines) + '\n'
    with open(os.path.join(COMM_PATH, name), 'w') as f:
        f.writelines(lines)


if __name__ == '__main__':
    choices = ['data_prep', 'train', 'deploy', 'validate', 'all']
    parser = create_sm_args_parser()
    parser.add_argument("--action", choices=choices, default='all')
    args = parser.parse_args()
    steps = [args.action]
    if args.action == 'all':
        steps = choices[:-1]
    for step in steps:
        if step == 'data_prep':
            train, test = sm_data_prep(args.input_uri, args.output_uri_base)
            forward_message('data_prep', [train, test])
        elif step == 'train':
            lines = read_message('data_prep')
            train, test = lines[0], lines[1]
            artifact = sm_train(
                train,
                test,
                base_job_name=args.base_job_name,
                instance_type=args.train_instance_type,
                n_jobs=args.n_jobs,
            )
            forward_message('train', artifact)
        elif step == 'deploy':
            artifact = read_message('train')[0]
            predictor = sm_deploy(
                artifact=artifact, model_name=args.model_name, instance_type=args.deploy_instance_type
            )
            forward_message('deploy', predictor.endpoint)
        else:
            endpoint = read_message('deploy')[0]
            train, test = read_message('data_prep')
            predictor = Predictor(
                endpoint_name=endpoint, serializer=NumpySerializer(), deserializer=NumpyDeserializer()
            )
            result = sm_validate(predictor, test)
            print(f"avg abs error:{result}")


================================================
FILE: examples/sagemaker_example/requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

sagemaker==2.57.0
scikit-learn==0.23.2
s3fs==2022.5.0
protobuf==3.20.*


================================================
FILE: examples/sagemaker_example/sm_ops.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import os
import time

import boto3
import numpy as np
import pandas as pd
import sagemaker
from data_preparation.data_preparation import (
    DATASET_PUBLIC_URL,
    LABEL_COLUMN,
    data_pipeline,
)
from data_preparation.data_uploader import SagemakerDataUploader
from data_train.train import feature_column_names
from sagemaker.sklearn.estimator import SKLearn
from sagemaker.sklearn.model import SKLearnModel

FRAMEWORK_VERSION = "0.23-1"

sm_boto3 = boto3.client("sagemaker")


def get_role():
    try:
        role = sagemaker.get_execution_role()
    except ValueError:
        role = os.getenv("SM_ROLE")
    return role


def sm_train(train_path, test_path, base_job_name="Liminal-sm-training-job", instance_type="ml.m5.large", n_jobs=1):
    sklearn_estimator = SKLearn(
        entry_point="data_train/train.py",
        source_dir="./",
        role=get_role(),
        instance_count=1,
        instance_type=instance_type,
        framework_version=FRAMEWORK_VERSION,
        base_job_name=base_job_name,
        hyperparameters={
            "n-jobs": n_jobs,
        },
    )
    sklearn_estimator.fit({"train": train_path, "test": test_path})
    artifact = sm_boto3.describe_training_job(TrainingJobName=sklearn_estimator.latest_training_job.name)[
        "ModelArtifacts"
    ]["S3ModelArtifacts"]

    print("Model artifact persisted at " + artifact)
    return artifact


def sm_deploy(artifact, model_name="Liminal-sm-demo-model", instance_type="ml.m5.large"):

    timestamp = time.strftime("-%Y-%m-%d-%H-%M-%S", time.gmtime())
    model = SKLearnModel(
        name=f"{model_name}-{timestamp}",
        entry_point="inference.py",
        model_data=artifact,
        role=get_role(),
        framework_version=FRAMEWORK_VERSION,
    )

    predictor = model.deploy(instance_type=instance_type, initial_instance_count=1, wait=True)
    return predictor


def sm_validate(predictor, test_path):
    if not test_path.startswith("s3"):
        test_path = f"s3://{test_path}"

    df = pd.read_csv(test_path)
    X_test_inference = df[feature_column_names].to_numpy()
    predictions = predictor.predict(data=X_test_inference)
    print("validating model")
    avg_err = np.average(np.abs(predictions - df[LABEL_COLUMN]))
    print(f"average abs error:{avg_err}")
    print(predictions)
    return avg_err


def sm_data_prep(input_uri, output_uri_base):
    return data_pipeline(
        input_uri=input_uri or DATASET_PUBLIC_URL,
        output_uri_base=output_uri_base,
        data_uploader=SagemakerDataUploader(),
    )


def sm_main(args):
    test_path = 'sagemaker-us-east-1-468326661668/liminal-sm-example/test/diamonds_test.csv'
    train_path, test_path = sm_data_prep(args.input_uri, args.output_uri_base)
    artifact = sm_train(
        train_path,
        test_path,
        base_job_name=args.base_job_name,
        instance_type=args.train_instance_type,
        n_jobs=args.n_jobs,
    )
    # artifact = 's3://sagemaker-us-east-1-468326661668/Liminal-sm-training-job-2022-06-06-14-30-39-827/output/model.tar.gz'
    predictor = sm_deploy(artifact, model_name=args.model_name, instance_type=args.deploy_instance_type)
    # predictor =  Predictor(endpoint_name="Liminal-sm-demo-model-2022-06-06-14-35-48-079", serializer=NumpySerializer(), deserializer=NumpyDeserializer())
    sm_validate(predictor, test_path)


def create_sm_args_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_uri", default=DATASET_PUBLIC_URL)
    parser.add_argument("--output_uri_base", default="liminal-sm-example")
    parser.add_argument("--base_job_name", default="Liminal-sm-training-job")
    parser.add_argument("--train_instance_type", default="ml.m5.large")
    parser.add_argument("--n_jobs", default=1)
    parser.add_argument("--model_name", default="Liminal-sm-demo-model")
    parser.add_argument("--deploy_instance_type", default="ml.m5.large")
    return parser


if __name__ == '__main__':
    parser = create_sm_args_parser()
    args = parser.parse_args()
    sm_main(args)


================================================
FILE: examples/spark-app-demo/k8s/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: examples/spark-app-demo/k8s/archetype/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
# superliminal for local development
name: InfraSpark
owner: Bosco Albert Baracus
type: super
executors:
  - executor: k8s
    type: kubernetes
variables:
  output_root_dir: /mnt/gettingstartedvol
  input_root_dir: ''
images:
  - image: my_spark_image
    source: .
    type: spark
    no_cache: true
task_defaults:
  spark:
    executor: k8s
    image: my_spark_image
    executors: 2
    application_source: '{{application}}'
    mounts:
      - mount: mymount
        volume: gettingstartedvol
        path: /mnt/gettingstartedvol


================================================
FILE: examples/spark-app-demo/k8s/data/iris.csv
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# source: https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data
sepallength,sepalwidth,petallength,petalwidth,class
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.4,3.7,1.5,0.2,Iris-setosa
4.8,3.4,1.6,0.2,Iris-setosa
4.8,3.0,1.4,0.1,Iris-setosa
4.3,3.0,1.1,0.1,Iris-setosa
5.8,4.0,1.2,0.2,Iris-setosa
5.7,4.4,1.5,0.4,Iris-setosa
5.4,3.9,1.3,0.4,Iris-setosa
5.1,3.5,1.4,0.3,Iris-setosa
5.7,3.8,1.7,0.3,Iris-setosa
5.1,3.8,1.5,0.3,Iris-setosa
5.4,3.4,1.7,0.2,Iris-setosa
5.1,3.7,1.5,0.4,Iris-setosa
4.6,3.6,1.0,0.2,Iris-setosa
5.1,3.3,1.7,0.5,Iris-setosa
4.8,3.4,1.9,0.2,Iris-setosa
5.0,3.0,1.6,0.2,Iris-setosa
5.0,3.4,1.6,0.4,Iris-setosa
5.2,3.5,1.5,0.2,Iris-setosa
5.2,3.4,1.4,0.2,Iris-setosa
4.7,3.2,1.6,0.2,Iris-setosa
4.8,3.1,1.6,0.2,Iris-setosa
5.4,3.4,1.5,0.4,Iris-setosa
5.2,4.1,1.5,0.1,Iris-setosa
5.5,4.2,1.4,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
5.0,3.2,1.2,0.2,Iris-setosa
5.5,3.5,1.3,0.2,Iris-setosa
4.9,3.1,1.5,0.1,Iris-setosa
4.4,3.0,1.3,0.2,Iris-setosa
5.1,3.4,1.5,0.2,Iris-setosa
5.0,3.5,1.3,0.3,Iris-setosa
4.5,2.3,1.3,0.3,Iris-setosa
4.4,3.2,1.3,0.2,Iris-setosa
5.0,3.5,1.6,0.6,Iris-setosa
5.1,3.8,1.9,0.4,Iris-setosa
4.8,3.0,1.4,0.3,Iris-setosa
5.1,3.8,1.6,0.2,Iris-setosa
4.6,3.2,1.4,0.2,Iris-setosa
5.3,3.7,1.5,0.2,Iris-setosa
5.0,3.3,1.4,0.2,Iris-setosa
7.0,3.2,4.7,1.4,Iris-versicolor
6.4,3.2,4.5,1.5,Iris-versicolor
6.9,3.1,4.9,1.5,Iris-versicolor
5.5,2.3,4.0,1.3,Iris-versicolor
6.5,2.8,4.6,1.5,Iris-versicolor
5.7,2.8,4.5,1.3,Iris-versicolor
6.3,3.3,4.7,1.6,Iris-versicolor
4.9,2.4,3.3,1.0,Iris-versicolor
6.6,2.9,4.6,1.3,Iris-versicolor
5.2,2.7,3.9,1.4,Iris-versicolor
5.0,2.0,3.5,1.0,Iris-versicolor
5.9,3.0,4.2,1.5,Iris-versicolor
6.0,2.2,4.0,1.0,Iris-versicolor
6.1,2.9,4.7,1.4,Iris-versicolor
5.6,2.9,3.6,1.3,Iris-versicolor
6.7,3.1,4.4,1.4,Iris-versicolor
5.6,3.0,4.5,1.5,Iris-versicolor
5.8,2.7,4.1,1.0,Iris-versicolor
6.2,2.2,4.5,1.5,Iris-versicolor
5.6,2.5,3.9,1.1,Iris-versicolor
5.9,3.2,4.8,1.8,Iris-versicolor
6.1,2.8,4.0,1.3,Iris-versicolor
6.3,2.5,4.9,1.5,Iris-versicolor
6.1,2.8,4.7,1.2,Iris-versicolor
6.4,2.9,4.3,1.3,Iris-versicolor
6.6,3.0,4.4,1.4,Iris-versicolor
6.8,2.8,4.8,1.4,Iris-versicolor
6.7,3.0,5.0,1.7,Iris-versicolor
6.0,2.9,4.5,1.5,Iris-versicolor
5.7,2.6,3.5,1.0,Iris-versicolor
5.5,2.4,3.8,1.1,Iris-versicolor
5.5,2.4,3.7,1.0,Iris-versicolor
5.8,2.7,3.9,1.2,Iris-versicolor
6.0,2.7,5.1,1.6,Iris-versicolor
5.4,3.0,4.5,1.5,Iris-versicolor
6.0,3.4,4.5,1.6,Iris-versicolor
6.7,3.1,4.7,1.5,Iris-versicolor
6.3,2.3,4.4,1.3,Iris-versicolor
5.6,3.0,4.1,1.3,Iris-versicolor
5.5,2.5,4.0,1.3,Iris-versicolor
5.5,2.6,4.4,1.2,Iris-versicolor
6.1,3.0,4.6,1.4,Iris-versicolor
5.8,2.6,4.0,1.2,Iris-versicolor
5.0,2.3,3.3,1.0,Iris-versicolor
5.6,2.7,4.2,1.3,Iris-versicolor
5.7,3.0,4.2,1.2,Iris-versicolor
5.7,2.9,4.2,1.3,Iris-versicolor
6.2,2.9,4.3,1.3,Iris-versicolor
5.1,2.5,3.0,1.1,Iris-versicolor
5.7,2.8,4.1,1.3,Iris-versicolor
6.3,3.3,6.0,2.5,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
7.1,3.0,5.9,2.1,Iris-virginica
6.3,2.9,5.6,1.8,Iris-virginica
6.5,3.0,5.8,2.2,Iris-virginica
7.6,3.0,6.6,2.1,Iris-virginica
4.9,2.5,4.5,1.7,Iris-virginica
7.3,2.9,6.3,1.8,Iris-virginica
6.7,2.5,5.8,1.8,Iris-virginica
7.2,3.6,6.1,2.5,Iris-virginica
6.5,3.2,5.1,2.0,Iris-virginica
6.4,2.7,5.3,1.9,Iris-virginica
6.8,3.0,5.5,2.1,Iris-virginica
5.7,2.5,5.0,2.0,Iris-virginica
5.8,2.8,5.1,2.4,Iris-virginica
6.4,3.2,5.3,2.3,Iris-virginica
6.5,3.0,5.5,1.8,Iris-virginica
7.7,3.8,6.7,2.2,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
6.0,2.2,5.0,1.5,Iris-virginica
6.9,3.2,5.7,2.3,Iris-virginica
5.6,2.8,4.9,2.0,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica
6.3,2.7,4.9,1.8,Iris-virginica
6.7,3.3,5.7,2.1,Iris-virginica
7.2,3.2,6.0,1.8,Iris-virginica
6.2,2.8,4.8,1.8,Iris-virginica
6.1,3.0,4.9,1.8,Iris-virginica
6.4,2.8,5.6,2.1,Iris-virginica
7.2,3.0,5.8,1.6,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.9,3.8,6.4,2.0,Iris-virginica
6.4,2.8,5.6,2.2,Iris-virginica
6.3,2.8,5.1,1.5,Iris-virginica
6.1,2.6,5.6,1.4,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
6.3,3.4,5.6,2.4,Iris-virginica
6.4,3.1,5.5,1.8,Iris-virginica
6.0,3.0,4.8,1.8,Iris-virginica
6.9,3.1,5.4,2.1,Iris-virginica
6.7,3.1,5.6,2.4,Iris-virginica
6.9,3.1,5.1,2.3,Iris-virginica
5.8,2.7,5.1,1.9,Iris-virginica
6.8,3.2,5.9,2.3,Iris-virginica
6.7,3.3,5.7,2.5,Iris-virginica
6.7,3.0,5.2,2.3,Iris-virginica
6.3,2.5,5.0,1.9,Iris-virginica
6.5,3.0,5.2,2.0,Iris-virginica
6.2,3.4,5.4,2.3,Iris-virginica
5.9,3.0,5.1,1.8,Iris-virginica


================================================
FILE: examples/spark-app-demo/k8s/data_cleanup.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse

import pyspark.sql.functions as F
from pyspark.ml import Pipeline
from pyspark.ml.feature import StandardScaler, StringIndexer, VectorAssembler
from pyspark.ml.functions import vector_to_array
from pyspark.sql import SparkSession


def transform(data):
    columns_to_scale = data.columns[:-1]
    vectorizer = VectorAssembler(inputCols=columns_to_scale, outputCol="features")
    scaler = StandardScaler(inputCol="features", outputCol="scaled_features", withStd=True, withMean=True)
    labeler = StringIndexer(inputCol=data.columns[-1], outputCol='label')
    pipeline = Pipeline(stages=[vectorizer, scaler, labeler])
    fitted = pipeline.fit(data)
    transformed = fitted.transform(data)

    result = transformed.withColumn("feature_arr", vector_to_array("scaled_features")).select(
        [F.col("feature_arr")[i].alias(columns_to_scale[i]) for i in range(len(columns_to_scale))] + ['label']
    )

    return result


def extract(spark, input_uri):
    return spark.read.csv(input_uri, header=True, inferSchema=True, comment="#")


def load(data, output_uri):
    data.coalesce(1).write.mode("overwrite").csv(output_uri, header=True)


def data_pipeline(input_uri, output_uri):
    spark = SparkSession.builder.appName("Prepare Iris Data").getOrCreate()

    input = extract(spark, input_uri)
    data = transform(input)
    load(data, output_uri)
    spark.stop()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_uri")
    parser.add_argument("--output_uri")
    args = parser.parse_args()
    data_pipeline(args.input_uri, args.output_uri)


================================================
FILE: examples/spark-app-demo/k8s/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: MyFirstLiminalSparkApp
super: InfraSpark
owner: Bosco Albert Baracus
variables:
  training_data_path: '{{output_root_dir}}/iris/'
  application: data_cleanup.py
images:
  - image: myorg/mydatascienceapp
    type: python_server
    source: .
    endpoints:
      - endpoint: /predict
        module: serving
        function: predict
      - endpoint: /healthcheck
        module: serving
        function: healthcheck
      - endpoint: /version
        module: serving
        function: version
task_defaults:
  python:
    mounts:
      - mount: mymount
        volume: gettingstartedvol
        path: /mnt/gettingstartedvol
pipelines:
  - pipeline: my_first_spark_pipeline
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    tasks:
      - task: data_preprocessing
        type: spark
        description: prepare the data for training
        application_arguments:
          - --input_uri
          - '{{input_root_dir}}data/iris.csv'
          - --output_uri
          - '{{training_data_path}}'
      - task: train
        type: python
        description: train model
        image: myorg/mydatascienceapp
        cmd: python -u training.py --action train --input_uri '{{training_data_path}}'
        env:
          MOUNT_PATH: /mnt/gettingstartedvol
      - task: validate
        type: python
        description: validate model and deploy
        image: myorg/mydatascienceapp
        cmd: python -u training.py --action validate --input_uri '{{training_data_path}}'
        env:
          MOUNT_PATH: /mnt/gettingstartedvol
volumes:
  - volume: gettingstartedvol
    claim_name: gettingstartedvol-pvc
    local:
      path: .
services:
  - service:
    name: my_datascience_server
    type: python_server
    description: my ds server
    image: myorg/mydatascienceapp
    source: .
    endpoints:
      - endpoint: /predict
        module: serving
        function: predict
      - endpoint: /healthcheck
        module: serving
        function: healthcheck


================================================
FILE: examples/spark-app-demo/k8s/manifests/spark-app-demo.yaml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---

apiVersion: v1
kind: Pod
metadata:
  name: spark-app-demo
spec:
  volumes:
    - name: task-pv-storage
      persistentVolumeClaim:
        claimName: gettingstartedvol-pvc
  containers:
    - name: task-pv-container
      imagePullPolicy: Never
      image: myorg/mydatascienceapp
      lifecycle:
        postStart:
          exec:
            command: [/bin/bash, -c, apt update && apt install curl -y]
      ports:
        - containerPort: 80
          name: http-server
      volumeMounts:
        - mountPath: /mnt/gettingstartedvol
          name: task-pv-storage


================================================
FILE: examples/spark-app-demo/k8s/model_store.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import glob
import os
import pickle
import time

MOUNT_PATH = os.environ.get('MOUNT_PATH', '/mnt/gettingstartedvol')
PRODUCTION = 'production'
CANDIDATE = 'candidate'

_ONE_HOUR = 60 * 60


class ModelStore:
    def __init__(self, env):
        self.env = env
        self._latest_model = None
        self._latest_version = None
        self._last_check = time.time()

    def load_latest_model(self, force=False):
        if not self._latest_model or time.time() - self._last_check > _ONE_HOUR or force:
            self._latest_model, self._latest_version = self._download_latest_model()

        return self._latest_model, self._latest_version

    def save_model(self, model, version):
        key = 'model.p'
        path = f'{MOUNT_PATH}/{self.env}/{version}'

        os.makedirs(f'{path}', exist_ok=True)
        pickle.dump(model, open(f'{path}/{key}', "wb"))

    def _download_latest_model(self):
        objects = glob.glob(f'{MOUNT_PATH}/{self.env}/**/*')
        models = list(reversed(sorted(obj for obj in objects if obj.endswith('.p'))))
        latest_key = models[0]
        version = latest_key.rsplit('/')[-2]
        print(f'Loading model version {version}')
        return pickle.load(open(latest_key, 'rb')), version


================================================
FILE: examples/spark-app-demo/k8s/requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

scikit-learn==0.24.1
apache-liminal==0.0.2
pyspark==3.1.3


================================================
FILE: examples/spark-app-demo/k8s/serving.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json

import model_store
from model_store import ModelStore

_MODEL_STORE = ModelStore(model_store.PRODUCTION)
_PETAL_WIDTH = 'petal_width'


def predict(input_json):
    try:
        input_dict = json.loads(input_json)
        model, version = _MODEL_STORE.load_latest_model()
        result = str(model.predict_proba([list(input_dict[_PETAL_WIDTH])])[0][1])
        return json.dumps({"result": result, "version": version})

    except IndexError:
        return 'Failure: the model is not ready yet'

    except Exception as e:
        print(e)
        return 'Failure'


def healthcheck(self):
    return 'Server is up!'


def version(self):
    try:
        model, version = _MODEL_STORE.load_latest_model()
        print(f'version={version}')
        return version
    except Exception as e:
        return e


================================================
FILE: examples/spark-app-demo/k8s/training.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import sys
import time

import model_store
import numpy as np
import pandas as pd
from model_store import ModelStore
from sklearn import datasets, model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

_CANDIDATE_MODEL_STORE = ModelStore(model_store.CANDIDATE)
_PRODUCTION_MODEL_STORE = ModelStore(model_store.PRODUCTION)

import argparse
import csv

import numpy as np


def load_iris_from_csv_file(f):
    df = pd.read_csv(f, header=0).reset_index(drop=True)
    types = {col: np.float64 for col in df.columns[:-1]}
    types['label'] = np.int32
    df = df.astype(types)
    return df


def get_dataset(d):
    print(f"searching for csv files in {d}")
    for root, dirs, files in os.walk(d):

        for file in files:
            if file.endswith(".csv"):
                return os.path.join(d, file)
    return None


def load_and_split(input_uri):
    csv_file = get_dataset(input_uri)
    if csv_file:
        print(f"found {csv_file} dataset")

    iris = load_iris_from_csv_file(csv_file)
    return train_test_split(iris, test_size=0.2, random_state=8)


def train_model(input_uri):
    train, test = load_and_split(input_uri)
    y = train.pop("label")
    X = train.loc[:, train.columns]

    model = LogisticRegression(max_iter=500)
    model.fit(X, y)

    scoring = 'accuracy'
    results = model_selection.cross_val_score(model, X, y, cv=5, scoring=scoring)
    print(f'Accuracy in cross validation: {results.mean()*100} % ({results.std()} std)')
    version = round(time.time())

    print(f'Saving model with version {version} to candidate model store.')
    _CANDIDATE_MODEL_STORE.save_model(model, version)


def validate_model(input_uri):
    model, version = _CANDIDATE_MODEL_STORE.load_latest_model()
    print(f'Validating model with version {version} to candidate model store.')
    if not isinstance(model.predict([[1, 1, 1, 1]]), np.ndarray):
        raise ValueError('Invalid model')
    train, test = load_and_split(input_uri)
    y = test.pop("label")
    X = test.loc[:, test.columns]
    result = model.score(X, y)
    print(f'model accuracy {result*100}')
    if result < 0.85:
        raise ValueError('model accuracy under threshold (0.85  ). Model is not promoted to production')
    print(f'Deploying model with version {version} to production model store.')
    _PRODUCTION_MODEL_STORE.save_model(model, version)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--action", choices=['train', 'validate'])
    parser.add_argument("--input_uri")
    args = parser.parse_args()
    if args.action == 'train':
        train_model(args.input_uri)
    else:
        validate_model(args.input_uri)


================================================
FILE: install.sh
================================================
#!/bin/sh
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

yes | pip uninstall apache-liminal

cd "$DIR" || exit

rm -rf build
rm -rf dist
rm "${LIMINAL_HOME:-${HOME}/liminal_home}"/*.whl

python setup.py sdist bdist_wheel

pip install dist/*.whl

mkdir -p "${LIMINAL_HOME:-${HOME}/liminal_home}"

cp dist/*.whl "${LIMINAL_HOME:-${HOME}/liminal_home}"

cd - || exit


================================================
FILE: liminal/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from liminal import settings

settings.initialize()


================================================
FILE: liminal/build/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/build/image/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/build/image/python/Dockerfile
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Use an official Python runtime as a parent image
FROM {{python}}

# Install aptitude build-essential
RUN apt-get update && apt-get install -y --reinstall build-essential \
                                                   make \
                                                   gcc

# Set the working directory to /app
WORKDIR /app

# Order of operations is important here for docker's caching & incremental build performance.    !
# Be careful when changing this code.                                                            !

# Install any needed packages specified in requirements.txt
COPY ./requirements.txt /app/

# mount the secret in the correct location, then run pip install
RUN python -m pip install --upgrade pip
RUN {{mount}} pip install -r requirements.txt

# Copy the current directory contents into the container at /app
RUN echo "Copying source code.."
COPY . /app/


================================================
FILE: liminal/build/image/python/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/build/image/python/python.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os

from liminal.build.python import BasePythonImageBuilder


class PythonImageBuilder(BasePythonImageBuilder):
    def __init__(self, config, base_path, relative_source_path, tag):
        super().__init__(config, base_path, relative_source_path, tag)

    @staticmethod
    def _dockerfile_path():
        return os.path.join(os.path.dirname(__file__), 'Dockerfile')


================================================
FILE: liminal/build/image/python_server/Dockerfile
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# Use an official Python runtime as a parent image
FROM {{python}}

# Install aptitude build-essential
#RUN apt-get install -y --reinstall build-essential

# Set the working directory to /app
WORKDIR /app

# Order of operations is important here for docker's caching & incremental build performance.    !
# Be careful when changing this code.                                                            !

# Install any needed packages specified in python_server_requirements.txt and requirements.txt
RUN pip install --upgrade pip

COPY ./python_server_requirements.txt /app/
RUN pip install -r python_server_requirements.txt

COPY ./requirements.txt /app/
RUN {{mount}} pip install -r requirements.txt

# Copy the current directory contents into the container at /app
RUN echo "Copying source code.."
COPY . /app/

CMD python -u liminal_python_server.py


================================================
FILE: liminal/build/image/python_server/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/build/image/python_server/liminal_python_server.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import yaml
from flask import Blueprint, Flask, request


def __get_module(kls):
    parts = kls.split('.')
    module = ".".join(parts)
    m = __import__(module)
    for comp in parts[1:]:
        m = getattr(m, comp)
    return m


def __get_endpoint_function(endpoint_config):
    module = __get_module(endpoint_config['module'])
    return module.__getattribute__(endpoint_config['function'])


if __name__ == '__main__':
    with open('service.yml') as stream:
        config = yaml.safe_load(stream)

    endpoints = {
        endpoint_config['endpoint'][1:]: __get_endpoint_function(endpoint_config)
        for endpoint_config in config['endpoints']
    }

    blueprint = Blueprint('liminal_python_server_blueprint', __name__)

    @blueprint.route('/favicon.ico', methods=('GET', 'POST'))
    def no_content():
        return '', 204

    @blueprint.route('/', defaults={'endpoint': ''}, methods=('GET', 'POST'))
    @blueprint.route('/<endpoint>', methods=('GET', 'POST'))
    def show(endpoint):
        if endpoint in endpoints:
            return endpoints[endpoint](request.get_data())
        else:
            return 'Page not found.', 404

    app = Flask(__name__)
    app.register_blueprint(blueprint)

    app.run(host='0.0.0.0', threaded=False, port=80)


================================================
FILE: liminal/build/image/python_server/python_server.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os

import yaml

from liminal.build.python import BasePythonImageBuilder


class PythonServerImageBuilder(BasePythonImageBuilder):
    def __init__(self, config, base_path, relative_source_path, tag):
        super().__init__(config, base_path, relative_source_path, tag)

    @staticmethod
    def _dockerfile_path():
        return os.path.join(os.path.dirname(__file__), 'Dockerfile')

    @staticmethod
    def _additional_files_from_paths():
        return [
            os.path.join(os.path.dirname(__file__), 'liminal_python_server.py'),
            os.path.join(os.path.dirname(__file__), 'python_server_requirements.txt'),
        ]

    def _additional_files_from_filename_content_pairs(self):
        return super()._additional_files_from_filename_content_pairs() + [('service.yml', yaml.safe_dump(self.config))]


================================================
FILE: liminal/build/image/python_server/python_server_requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

Flask
pyyaml


================================================
FILE: liminal/build/image/spark/Dockerfile
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
FROM bitnami/spark:3.1.2-debian-10-r23

USER root

WORKDIR /app

COPY . /app/

RUN {{mount}} pip install -r requirements.txt


================================================
FILE: liminal/build/image/spark/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/build/image/spark/spark.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import os

from liminal.build.python import BasePythonImageBuilder


class SparkImageBuilder(BasePythonImageBuilder):
    def __init__(self, config, base_path, relative_source_path, tag):
        super().__init__(config, base_path, relative_source_path, tag)

    @staticmethod
    def _dockerfile_path():
        return os.path.join(os.path.dirname(__file__), 'Dockerfile')


================================================
FILE: liminal/build/image_builder.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import shutil
import subprocess
import tempfile
import time


class ImageBuilder:
    """
    Builds an image from source code
    """

    __NO_CACHE = 'no_cache'

    def __init__(self, config, base_path, relative_source_path, tag):
        """
        :param config: task/service config
        :param base_path: directory containing liminal yml
        :param relative_source_path: source path relative to liminal yml
        :param tag: image tag
        """
        self.base_path = base_path
        self.relative_source_path = relative_source_path
        self.tag = tag
        self.config = config

    def build(self):
        """
        Builds source code into an image.
        """
        logging.info(f'[ ] Building image: {self.tag}')

        temp_dir = self.__temp_dir()

        self.__copy_source_code(temp_dir)
        self._write_additional_files(temp_dir)

        no_cache = ''
        if self.__NO_CACHE in self.config and self.config[self.__NO_CACHE]:
            no_cache = '--no-cache=true'

        docker = 'docker' if shutil.which('docker') is not None else '/usr/local/bin/docker'
        docker_build_command = f'{docker} build {no_cache} ' + f'--tag {self.tag} '

        docker_build_command += f'--progress=plain {self._build_flags()} '

        docker_build_command += f'{temp_dir}'

        if self._use_buildkit():
            docker_build_command = f'DOCKER_BUILDKIT=1 {docker_build_command}'

        logging.info(docker_build_command)
        docker_build_out = []
        try:
            build_start = time.time()
            # Poll process.stdout to show stdout live
            build_process = subprocess.Popen(
                docker_build_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
            )
            logging.info('=' * 80)
            timeout = 960
            while time.time() < build_start + timeout:
                output = build_process.stdout.readline()
                if build_process.poll() is not None:
                    break
                if output:
                    stdout_log_str = output.strip().decode('utf-8')
                    docker_build_out.append(stdout_log_str)
                    logging.info(stdout_log_str)
            logging.info('=' * 80)
            build_process.stdout.close()
            build_process.wait(time.time() - (build_start + timeout))
        except subprocess.CalledProcessError as e:
            for line in str(e.output)[2:-3].split('\\n'):
                logging.info(line)
            raise e

        self.__remove_dir(temp_dir)

        logging.info(f'[X] Building image: {self.tag} (Success).')

        return '\n'.join(docker_build_out)

    def __copy_source_code(self, temp_dir):
        self.__copy_dir(os.path.join(self.base_path, self.relative_source_path), temp_dir)

    def _write_additional_files(self, temp_dir):
        for file in [self._dockerfile_path()] + self._additional_files_from_paths():
            self.__copy_file(file, temp_dir)

        for filename, content in self._additional_files_from_filename_content_pairs():
            with open(os.path.join(temp_dir, filename), 'w') as file:
                file.write(content)

    def __temp_dir(self):
        temp_dir = tempfile.mkdtemp()
        # Delete dir for shutil.copytree to work
        self.__remove_dir(temp_dir)
        return temp_dir

    @staticmethod
    def __remove_dir(temp_dir):
        shutil.rmtree(temp_dir)

    @staticmethod
    def __copy_dir(source_path, destination_path):
        shutil.copytree(source_path, destination_path)

    @staticmethod
    def __copy_file(source_file_path, destination_file_path):
        shutil.copy2(source_file_path, destination_file_path)

    @staticmethod
    def _dockerfile_path():
        """
        Path to Dockerfile
        """
        raise NotImplementedError()

    @staticmethod
    def _additional_files_from_paths():
        """
        List of paths to additional files
        """
        return []

    def _additional_files_from_filename_content_pairs(self):
        """
        File name and content pairs to create files from
        """
        return []

    def _build_flags(self):
        """
        Additional build flags to add to docker build command.
        """
        return ''

    def _use_buildkit(self):
        """
        overwrite with True to use docker buildkit
        """
        return False


================================================
FILE: liminal/build/liminal_apps_builder.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os

from liminal.core.config.config import ConfigUtil
from liminal.core.util import class_util, extensible, files_util


def build_liminal_apps(path):
    """
    Build images for liminal apps in path.
    """
    config_util = ConfigUtil(path)
    configs = config_util.safe_load(is_render_variables=True, soft_merge=True)

    for liminal_config in configs:
        base_path = os.path.dirname(files_util.resolve_pipeline_source_file(liminal_config['name']))
        if 'images' in liminal_config:
            for image in liminal_config['images']:
                image_name = image['image']

                if 'source' in image:
                    image_type = image['type']
                    builder_class = __get_image_builder_class(image_type)
                    if builder_class:
                        __build_image(base_path, image, builder_class)
                    else:
                        raise ValueError(f'No such image type: {image_type}')
                else:
                    logging.warning(f'No source configured for image {image_name}.')


def __build_image(base_path, builder_config, builder):
    builder_instance = builder(
        config=builder_config,
        base_path=base_path,
        relative_source_path=builder_config['source'],
        tag=builder_config['image'],
    )
    builder_instance.build()


def __get_image_builder_class(task_type):
    return image_builder_types.get(task_type, None)


logging.info(f'Loading image builder implementations..')

image_builder_types = extensible.load_image_builders()

logging.info(f'Finished loading image builder implementations: {image_builder_types}')
logging.info(f'Loading service image builder implementations..')


================================================
FILE: liminal/build/python.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os

from liminal.build.image_builder import ImageBuilder


class PythonImageVersions:
    """
    Handles the python versions for python images.
    """

    @property
    def default_version(self):
        return '3.7'

    @property
    def supported_versions(self):
        return '3.6', '3.7', '3.8', '3.9'

    def get_image_name(self, python_version):
        """
        :param python_version: The python version that would be installed in
            the docker image. For example '3.8', '3.8.1' etc.
        :type python_version: str
        :return: The name of the base (slim) python image
        :rtype: str
        """
        if not python_version:
            python_version = self.default_version
        else:
            python_version = str(python_version)
        if python_version[:3] not in self.supported_versions:
            raise ValueError(
                f'liminal supports the following python versions: '
                f'{self.supported_versions} but {python_version} '
                f'were passed'
            )
        return f'python:{python_version}-slim'


class BasePythonImageBuilder(ImageBuilder):
    """
    Base class for building python images.
    """

    __PIP_CONF = 'pip_conf'
    __PYTHON_VERSION = 'python_version'

    def __init__(self, config, base_path, relative_source_path, tag, base_image=PythonImageVersions()):
        super().__init__(config, base_path, relative_source_path, tag)
        self._base_image = base_image

    @staticmethod
    def _dockerfile_path():
        raise NotImplementedError()

    def _write_additional_files(self, temp_dir):
        requirements_file_path = os.path.join(temp_dir, 'requirements.txt')
        if not os.path.exists(requirements_file_path):
            with open(requirements_file_path, 'w'):
                pass

        super()._write_additional_files(temp_dir)

    def _additional_files_from_filename_content_pairs(self):
        with open(self._dockerfile_path()) as original:
            data = original.read()

        data = self.__mount_pip_conf(data)
        data = self.__add_python_base_version(data)

        return [('Dockerfile', data)]

    def __mount_pip_conf(self, data):
        new_data = data

        if self.__PIP_CONF in self.config:
            new_data = '# syntax = docker/dockerfile:1.0-experimental\n' + data
            new_data = new_data.replace('{{mount}}', '--mount=type=secret,id=pip_config,dst=/etc/pip.conf \\\n')
        else:
            new_data = new_data.replace('{{mount}} ', '')

        return new_data

    def __add_python_base_version(self, data):
        python_version = self.config.get(self.__PYTHON_VERSION)
        base_image = self._base_image.get_image_name(python_version)
        return data.replace('{{python}}', base_image)

    def _build_flags(self):
        if self.__PIP_CONF in self.config:
            return f'--secret id=pip_config,src={self.config[self.__PIP_CONF]}'
        else:
            return ''

    def _use_buildkit(self):
        if self.__PIP_CONF in self.config:
            return True


================================================
FILE: liminal/core/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/core/config/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/core/config/config.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import traceback

from liminal.core import environment
from liminal.core.config.defaults import base, default_configs
from liminal.core.util import dict_util, files_util


class ConfigUtil:
    """
    Load and enrich config files under configs_path.
    """

    __BASE = 'base'
    __PIPELINES = 'pipelines'
    __SUPER = 'super'
    __TYPE = 'type'
    __SUB = 'sub'
    __SERVICES = 'services'
    __TASKS = 'tasks'
    __PIPELINE_DEFAULTS = 'pipeline_defaults'
    __TASK_DEFAULTS = 'task_defaults'
    __BEFORE_TASKS = 'before_tasks'
    __AFTER_TASKS = 'after_tasks'
    __EXECUTORS = 'executors'
    __IMAGES = 'images'
    __BASE = "base"
    __PIPELINES = "pipelines"
    __SUPER = "super"
    __TYPE = "type"
    __SUB = "sub"
    __SERVICES = "services"
    __TASKS = "tasks"
    __PIPELINE_DEFAULTS = "pipeline_defaults"
    __TASK_DEFAULTS = "task_defaults"
    __BEFORE_TASKS = "before_tasks"
    __AFTER_TASKS = "after_tasks"
    __EXECUTORS = "executors"

    def __init__(self, configs_path):
        self.configs_path = configs_path
        self.config_files = files_util.load(configs_path)
        self.base = base.BASE
        self.loaded_subliminals = []
        self.snapshot_path = os.path.join(environment.get_airflow_home_dir(), '../liminal_config_files')

    def safe_load(self, is_render_variables, soft_merge=False):
        """
        :returns list of config files after enrich with defaults and supers
        """
        if self.loaded_subliminals:
            return self.loaded_subliminals

        configs = self.config_files.values()
        enriched_configs = []

        for subliminal in [config for config in configs if self.__is_subliminal(config)]:
            name = subliminal.get('name')
            logging.info(f'Loading yml {name}')
            # noinspection PyBroadException
            try:
                superliminal = self.__get_superliminal(subliminal, soft_merge)
                enriched_config = self.__merge_configs(subliminal, superliminal, is_render_variables, soft_merge)
                enriched_configs.append(enriched_config)
            except Exception:
                logging.error(f'Failed to load yml {name}')
                traceback.print_exc()

        self.loaded_subliminals = enriched_configs

        return self.loaded_subliminals

    def __merge_configs(self, subliminal, superliminal, is_render_variables, soft_merge):
        if not superliminal:
            return subliminal

        sub = subliminal.copy()
        supr = superliminal.copy()

        merged_superliminal = self.__merge_configs(
            supr, self.__get_superliminal(supr, soft_merge), is_render_variables, soft_merge
        )

        sub[self.__EXECUTORS] = self.__merge_section(sub, merged_superliminal, self.__EXECUTORS)
        sub[self.__IMAGES] = self.__merge_section(sub, merged_superliminal, self.__IMAGES)

        if self.__is_subliminal(sub):
            return self.__merge_sub_and_super(sub, merged_superliminal, is_render_variables)
        else:
            return self.__merge_superliminals(sub, merged_superliminal)

    def __get_superliminal(self, liminal, soft_merge):
        superliminal = {}
        if not self.__is_base_config(liminal):
            superliminal_name = liminal.get(self.__SUPER, '')
            if not superliminal_name:
                superliminal = self.base
            else:
                superliminal = self.__get_config(superliminal_name)
                if not superliminal:
                    supr_is_missing_msg = (
                        f"superliminal '{superliminal_name}' " + f"is missing from '{self.configs_path}'"
                    )
                    if soft_merge:
                        logging.warning(supr_is_missing_msg)
                    else:
                        raise FileNotFoundError(supr_is_missing_msg)

        return superliminal

    def __get_base_config(self):
        return self.base

    def __is_base_config(self, config):
        return config.get('name', '') == self.__BASE

    def __is_subliminal(self, config):
        is_subliminal = config.get(self.__TYPE, self.__SUB) != self.__SUPER
        if is_subliminal:
            config[self.__TYPE] = self.__SUB
        return is_subliminal

    def __get_config(self, config_name):
        return self.config_files.get(config_name)

    def __merge_sub_and_super(self, sub, supr, is_render_variables):
        merged_pipelines = list()

        for pipeline in sub.get(self.__PIPELINES, {}):
            final_pipeline = self.__apply_pipeline_defaults(sub, supr, pipeline)
            merged_pipelines.append(final_pipeline)

        sub[self.__PIPELINES] = merged_pipelines
        sub[self.__SERVICES] = default_configs.apply_service_defaults(sub, supr)

        sub = dict_util.merge_dicts(supr.copy(), sub)

        return default_configs.apply_variable_substitution(sub, supr, is_render_variables)

    def __merge_superliminals(self, super1, super2):
        super1_pipeline_defaults = super1.get(self.__PIPELINE_DEFAULTS, {}).copy()
        super2_pipeline_defaults = super2.get(self.__PIPELINE_DEFAULTS, {}).copy()

        super1[self.__PIPELINE_DEFAULTS] = super1_pipeline_defaults
        super1[self.__PIPELINE_DEFAULTS][self.__BEFORE_TASKS] = super2_pipeline_defaults.pop(
            self.__BEFORE_TASKS, []
        ) + super1_pipeline_defaults.pop(self.__BEFORE_TASKS, [])

        super2[self.__PIPELINE_DEFAULTS] = super2_pipeline_defaults
        super1[self.__PIPELINE_DEFAULTS][self.__AFTER_TASKS] = super1_pipeline_defaults.pop(
            self.__AFTER_TASKS, []
        ) + super2_pipeline_defaults.pop(self.__AFTER_TASKS, [])

        # merge supers tasks
        return dict_util.merge_dicts(super1, super2, True)

    def snapshot_final_liminal_configs(self):
        files_util.dump_liminal_configs(liminal_configs=self.loaded_subliminals, path=self.snapshot_path)

    def __merge_section(self, subliminal, superliminal, section):
        return self.__deep_list_keyword_merge(section[:-1], subliminal.get(section, []), superliminal.get(section, []))

    @staticmethod
    def __apply_pipeline_defaults(subliminal, superliminal, pipeline):
        return default_configs.apply_pipeline_defaults(subliminal, superliminal, pipeline)

    @staticmethod
    def __deep_list_keyword_merge(unique_key_name, subliminal_list_conf, superliminal_list_conf):
        subliminal_key_map = {item[unique_key_name]: item for item in subliminal_list_conf}
        superliminal_key_map = {item[unique_key_name]: item for item in superliminal_list_conf}

        return list(dict_util.merge_dicts(superliminal_key_map, subliminal_key_map, recursive=True).values())


================================================
FILE: liminal/core/config/defaults/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/core/config/defaults/base/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import os

from liminal.core.util import files_util

BASE = files_util.load(os.path.dirname(__file__))['base'].copy()


================================================
FILE: liminal/core/config/defaults/base/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: base
type: super
executors:
  - executor: default_k8s
    type: kubernetes
  - executor: airflow_executor
    type: airflow
service_defaults:
  description: add defaults parameters for all services
task_defaults:
  description: add defaults parameters for all tasks separate by task type
  python:
    executor: default_k8s
  spark:
    executor: default_k8s
  job_end:
    executor: airflow_executor
  job_start:
    executor: airflow_executor
pipeline_defaults:
  description: add defaults parameters for all pipelines
  before_tasks:
    - task: start
      type: job_start
  after_tasks:
    - task: end
      type: job_end


================================================
FILE: liminal/core/config/defaults/default_configs.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from liminal.core.util import dict_util
from liminal.core.util.dict_util import merge_dicts

__SERVICES = "services"
__TASKS = "tasks"
__BEFORE_TASKS = "before_tasks"
__AFTER_TASKS = "after_tasks"


def apply_variable_substitution(subliminal, superliminal, is_render_variables=False):
    """
    if is_render_variables is True
       Replace all {{variable.key}} in subliminal with variable.value variable in
       subliminal.variables + superliminal.variables
    else
       merge subliminal.variables with superliminal.variables without replace
       placeholders
    """
    keyword = "variables"
    merged_variables = dict_util.merge_dicts(subliminal.get(keyword, {}), superliminal.get(keyword, {}), True)
    if is_render_variables:
        for k, v in merged_variables.items():
            if isinstance(v, str) or (not isinstance(v, dict) and not isinstance(v, list)):
                merged_variables[k] = dict_util.replace_placholders_in_string(str(v), merged_variables)

        merged_variables = dict_util.replace_placeholders(merged_variables, merged_variables)
        return dict_util.replace_placeholders(subliminal, merged_variables)
    else:
        subliminal[keyword] = merged_variables
        return subliminal


def apply_service_defaults(subliminal, superliminal):
    """Apply defaults services
    :param subliminal: subliminal config
    :param superliminal: superliminal config
    :returns: enriched services with superliminal.service_defaults & subliminal.service_defaults
    """
    keyword = "service_defaults"
    superliminal_service_defaults = superliminal.get(keyword, {})
    subliminal_service_defaults = subliminal.get(keyword, {})

    merged_service_defaults = merge_dicts(subliminal_service_defaults, superliminal_service_defaults, recursive=True)

    return [merge_dicts(service, merged_service_defaults, True) for service in subliminal.get(__SERVICES, {})]


def apply_pipeline_defaults(subliminal, superliminal, pipeline):
    """Apply defaults values on given pipeline
    :param subliminal: subliminal config
    :param superliminal: superliminal config
    :param  pipeline: to apply defaults on

    :returns: enriched pipeline with superliminal.pipeline_defaults & subliminal.pipeline_defaults
    """
    keyword = "pipeline_defaults"
    superliminal_pipe_defaults = superliminal.get(keyword, {}).copy()
    subliminal_pipe_defaults = subliminal.get(keyword, {}).copy()
    superliminal_before_tasks = superliminal_pipe_defaults.pop(__BEFORE_TASKS, [])
    superliminal_after_tasks = superliminal_pipe_defaults.pop(__AFTER_TASKS, [])
    merged_pipeline_defaults = merge_dicts(subliminal_pipe_defaults, superliminal_pipe_defaults, True)
    pipeline = merge_dicts(pipeline, merged_pipeline_defaults, True)

    return apply_task_defaults(
        subliminal,
        superliminal,
        pipeline,
        superliminal_before_tasks=superliminal_before_tasks,
        superliminal_after_tasks=superliminal_after_tasks,
    )


def apply_task_defaults(subliminal, superliminal, pipeline, superliminal_before_tasks, superliminal_after_tasks):
    """Apply defaults task values on given pipeline
    :param subliminal: subliminal config
    :param superliminal: superliminal config
    :param  pipeline: where 'tasks' list can be found it
    :param  superliminal_before_tasks: superliminal before subtasks list
    :param  superliminal_after_tasks: superliminal after subtasks list

    :returns: pipeline after enrich with superliminal.tasks and superliminal.task_defaults
     and subliminal.task_defaults
    """
    pipeline[__TASKS] = __apply_task_defaults(
        subliminal, superliminal, pipeline.get(__TASKS, []), superliminal_before_tasks, superliminal_after_tasks
    )

    return pipeline


def __apply_task_defaults(
    subliminal, superliminal, subliminal_tasks, superliminal_before_tasks, superliminal_after_tasks
):
    keyword = "task_defaults"
    subliminal_tasks_defaults = subliminal.get(keyword, {})
    superliminal_tasks_defaults = superliminal.get(keyword, {})
    merged_task_defaults = merge_dicts(subliminal_tasks_defaults, superliminal_tasks_defaults, recursive=True)

    return __enrich_tasks(subliminal_tasks, superliminal_before_tasks, superliminal_after_tasks, merged_task_defaults)


def __enrich_tasks(sub_tasks, super_before_tasks, super_after_tasks, task_defaults):
    merged_tasks = super_before_tasks + sub_tasks + super_after_tasks

    return [merge_dicts(task, task_defaults.get(task.get('type', ''), {}), recursive=True) for task in merged_tasks]


================================================
FILE: liminal/core/environment.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import subprocess

DEFAULT_DAGS_ZIP_NAME = 'liminal.zip'
DEFAULT_LIMINAL_HOME = os.path.expanduser('~/liminal_home')
DEFAULT_PIPELINES_SUBDIR = "pipelines"
LIMINAL_HOME_PARAM_NAME = "LIMINAL_HOME"
LIMINAL_HOME_CLOUD_PARAM_NAME = "AIRFLOW__CORE__LIMINAL_HOME"
LIMINAL_VERSION_PARAM_NAME = 'LIMINAL_VERSION'


def get_liminal_home():
    if LIMINAL_HOME_CLOUD_PARAM_NAME in os.environ:
        return os.environ.get(LIMINAL_HOME_CLOUD_PARAM_NAME)
    if not os.environ.get(LIMINAL_HOME_PARAM_NAME):
        logging.info("no environment parameter called LIMINAL_HOME detected")
        logging.info(f"registering {DEFAULT_LIMINAL_HOME} as the LIMINAL_HOME directory")
        os.environ[LIMINAL_HOME_PARAM_NAME] = DEFAULT_LIMINAL_HOME
    return os.environ.get(LIMINAL_HOME_PARAM_NAME, DEFAULT_LIMINAL_HOME)


def get_dags_dir():
    # if we are inside airflow, we will take it from the configured dags folder
    base_dir = os.environ.get("AIRFLOW__CORE__DAGS_FOLDER", get_liminal_home())
    return os.path.join(base_dir, DEFAULT_PIPELINES_SUBDIR)


def get_liminal_version():
    result = os.environ.get(LIMINAL_VERSION_PARAM_NAME, None)
    if not result:
        output = subprocess.run(
            ['pip freeze | grep \'apache-liminal\''], capture_output=True, env=os.environ, shell=True
        )
        pip_res = output.stdout.decode('UTF-8').strip()
        liminal_home = get_liminal_home()
        whl_files = [file for file in os.listdir(liminal_home) if file.endswith(".whl")]
        if whl_files:
            value = 'file://' + os.path.join(liminal_home, whl_files[0])
        elif ' @ ' in pip_res:
            value = pip_res[pip_res.index(' @ ') + 3 :]
        else:
            value = pip_res
        logging.info(f'LIMINAL_VERSION not set. Setting it to currently installed version: {value}')
        os.environ[LIMINAL_VERSION_PARAM_NAME] = value
    return os.environ.get(LIMINAL_VERSION_PARAM_NAME, 'apache-liminal')


def get_airflow_home_dir():
    # if we are inside airflow, we will take it from the configured dags folder
    base_dir = os.environ.get("AIRFLOW__CORE__DAGS_FOLDER", get_liminal_home())
    return os.path.join(base_dir)


================================================
FILE: liminal/core/util/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/core/util/class_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import importlib.util
import pkgutil


def find_subclasses_in_packages(packages, parent_class):
    """
    Finds all subclasses of given parent class within given packages
    :return: map of module ref -> class
    """
    module_content = {}
    for p in packages:
        module_content.update(import_module(p))

    subclasses = set()
    work = [parent_class]
    while work:
        parent = work.pop()
        for child in parent.__subclasses__():
            if child not in subclasses:
                work.append(child)
                # verify that the found class is in the relevant module
                for p in packages:
                    if p in child.__module__:
                        subclasses.add(child)
                        break

    return {sc.__module__.split(".")[-1]: sc for sc in subclasses}


def import_module(package, recursive=True):
    """
    Import all submodules of a module

    :param package: package (name or actual module)
    :type package: str | module
    :rtype: dict[str, types.ModuleType]

    :param recursive: search recursively (default: True)
    :type recursive: bool
    """
    if isinstance(package, str):
        package = importlib.import_module(package)
    results = {}
    for loader, name, is_pkg in pkgutil.walk_packages(package.__path__):
        if not name == 'liminal_python_server':
            full_name = package.__name__ + '.' + name
            results[full_name] = importlib.import_module(full_name)
            if recursive and is_pkg:
                results.update(import_module(full_name))
    return results


def __get_class(the_module, the_class):
    m = __import__(the_module)
    for comp in the_module.split('.')[1:] + [the_class]:
        m = getattr(m, comp)
    return m


================================================
FILE: liminal/core/util/dict_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import re
from collections import OrderedDict

from sqlalchemy.util import OrderedSet

from liminal.runners.airflow.config import standalone_variable_backend


def merge_dicts(dict1, dict2, recursive=False):
    """
    :returns dict1 enriched by dict2
    """
    if not recursive:
        return {**dict1, **dict2}

    return __merge_dicts(dict1, dict2)


def __merge_dicts(dict1, dict2):
    # recursive merge
    merged_dicts = OrderedDict()
    dict_1_keys = dict1.keys()
    dict_2_keys = dict2.keys()
    for k in OrderedSet(dict_1_keys).union(dict_2_keys):
        if k in dict1 and k in dict2:
            if isinstance(dict1[k], dict) and isinstance(dict2[k], dict):
                merged_dicts[k] = dict(__merge_dicts(dict1[k], dict2[k]))
            else:
                merged_dicts[k] = dict1[k]
        elif k in dict_1_keys:
            merged_dicts[k] = dict1[k]
        else:
            merged_dicts[k] = dict2[k]
    return merged_dicts


__PLACE_HOLDER_PATTERN = r"{{\s*([a-zA-Z0-9._-]+)\s*}}"


def replace_placeholders(dct, variables):
    """
    Replace all {{variable.key}} in dct with variable.value variable in variables
    """
    return dict(__replace_placeholders(dct, variables))


def replace_placholders_in_string(string_value, variables, pattern=__PLACE_HOLDER_PATTERN):
    return re.sub(pattern, lambda m: __repl(m, variables), string_value, flags=re.IGNORECASE)


def __replace_placeholders(dct, variables):
    dct_items = dct.items()
    for k, v in dct_items:
        if isinstance(v, str):
            yield k, replace_placholders_in_string(v, variables)
        elif isinstance(v, dict):
            yield k, dict(__replace_placeholders(v, variables))
        elif isinstance(v, list):
            yield k, list(__replace_placeholder_in_list(v, variables))
        else:
            yield k, v


def __replace_placeholder_in_list(lst, variables):
    for v in lst:
        if isinstance(v, str):
            yield replace_placholders_in_string(v, variables)
        elif isinstance(v, dict):
            yield dict(__replace_placeholders(v, variables))
        elif isinstance(v, list):
            yield __replace_placeholder_in_list(v, variables)
        else:
            yield v


def __repl(matched, variables):
    origin = matched.group(0)
    key = matched.group(1)
    return variables.get(key, __try_backend_variables(key, default=origin))


def __try_backend_variables(key, default):
    return standalone_variable_backend.get_variable(key, default)


================================================
FILE: liminal/core/util/env_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os


def is_running_on_jenkins():
    jenkins_url = os.getenv('JENKINS_URL')
    return jenkins_url is not None and jenkins_url


================================================
FILE: liminal/core/util/extensible.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import sys

from liminal.build.image_builder import ImageBuilder
from liminal.core.util import class_util
from liminal.runners.airflow.model import executor
from liminal.runners.airflow.model.task import Task

__PLUGINS = 'plugins'


def __generate_extra_paths(plugin_type, extra_paths):
    return (extra_paths or []) + ([f'{__PLUGINS}.{plugin_type}'] if f'{__PLUGINS}.{plugin_type}' in sys.path else [])


def load_executors(extra_paths=None):
    """
    Load all Executor extensions
    """
    package_paths = ['liminal.runners.airflow.executors'] + __generate_extra_paths('executors', extra_paths)
    return class_util.find_subclasses_in_packages(package_paths, executor.Executor)


def load_tasks(extra_paths=None):
    """
    Load all Task extensions
    """
    package_paths = ['liminal.runners.airflow.tasks'] + __generate_extra_paths('tasks', extra_paths)
    return class_util.find_subclasses_in_packages(package_paths, Task)


def load_image_builders(extra_paths=None):
    """
    Load all ImageBuilder extensions
    """
    package_paths = ['liminal.build.image'] + __generate_extra_paths('images', extra_paths)
    return class_util.find_subclasses_in_packages(package_paths, ImageBuilder)


================================================
FILE: liminal/core/util/files_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os

import yaml

# { path -> { name -> config_file } }
cached_files = dict()
cached_source_files = dict()


def resolve_pipeline_source_file(config_name):
    """
    Return the source (file) path of the given config name
    """
    return cached_source_files[config_name]


def load(path):
    """
    :param path: config path
    :returns dict of {name -> loaded config file} from all liminal.y[a]ml files under given path
    """

    if cached_files.get(path):
        return cached_files[path]

    config_entities = {}

    for file_data in find_config_files(path):
        with open(file_data) as data:
            config_file = yaml.safe_load(data)
            config_entities[config_file['name']] = config_file
            cached_source_files[config_file['name']] = file_data

    cached_files[path] = config_entities
    return config_entities


def find_config_files(path):
    """
    :param path: config path
    :returns list of all liminal.y[a]ml files under config path
    """
    files = []
    logging.info(path)
    for r, d, f in os.walk(path):
        for file in f:
            if os.path.basename(file) in ['liminal.yml', 'liminal.yaml']:
                logging.info(os.path.join(r, file))
                files.append(os.path.join(r, file))
    return files


def dump_liminal_configs(liminal_configs, path):
    if not (os.path.exists(path)):
        os.mkdir(path)

    logging.info(f"Starting to dump liminal configs into {path}")

    for liminal_config in liminal_configs:
        dump_liminal_config(liminal_config, f'{path}/{liminal_config["name"]}.yml')


def dump_liminal_config(liminal_config, file_path):
    with open(file_path, 'w') as config_file:
        logging.info(f"Dumping {liminal_config['name']} into {file_path}")
        yaml.dump(liminal_config, config_file, default_flow_style=False)


================================================
FILE: liminal/docker/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/kubernetes/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/kubernetes/secret_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import base64
import logging
import os
import sys
from pathlib import Path
from time import sleep

from kubernetes import client, config
from kubernetes.client import V1Secret

# noinspection PyBroadException
try:
    config.load_kube_config()
except Exception:
    msg = "Kubernetes is not running\n"
    sys.stdout.write(f"INFO: {msg}")

_LOG = logging.getLogger('volume_util')
_LOCAL_VOLUMES = set()
_kubernetes = client.CoreV1Api()


def get_secret_configs(liminal_config):
    secrets_config = liminal_config.get('secrets', [])

    for volume_config in secrets_config:
        if 'secret' in volume_config and 'local_path_file' not in volume_config:
            secret_path = f"{os.getcwd()}/credentials-{volume_config['secret']}.txt"
            open(secret_path, 'a').close()
            volume_config['local_path_file'] = secret_path
    return secrets_config


def create_local_secrets(liminal_config):
    secrets_config = get_secret_configs(liminal_config)

    for secret_config in secrets_config:
        logging.info(f'Creating local kubernetes secret if needed: {secret_config}')
        create_secret(secret_config)


def create_secret(conf, namespace='default') -> None:
    name = conf['secret']

    _LOG.info(f'Requested secret {name}')

    if name not in _LOCAL_VOLUMES:
        matching_secrets = _kubernetes.list_namespaced_secret(
            namespace, field_selector=f'metadata.name={name}'
        ).to_dict()['items']

    if len(matching_secrets) == 0:
        _create_secret(namespace, conf, name)
        sleep(5)

        _LOCAL_VOLUMES.add(name)


def _create_secret(namespace, conf, name):
    _LOG.info(f'Creating kubernetes secret {name} with spec {conf}')

    _kubernetes.create_namespaced_secret(
        namespace,
        V1Secret(
            api_version='v1',
            kind='Secret',
            metadata={
                'name': name,
                'labels': {"apache/incubator-liminal": "liminal.apache.org"},
            },
            data={
                'credentials': base64.b64encode(
                    Path(os.path.expanduser(conf['local_path_file'])).read_text().encode('ascii')
                ).decode('ascii')
            },
        ),
    )


def delete_local_secrets(liminal_config, base_dir):
    secrets_config = get_secret_configs(liminal_config, base_dir)

    for secret_config in secrets_config:
        logging.info(f'Delete local secret if needed: {secret_config}')
        delete_local_secret(secret_config)


def delete_local_secret(name, namespace='default'):
    matching_secrets = _kubernetes.list_namespaced_secret(namespace, field_selector=f'metadata.name={name}').to_dict()[
        'items'
    ]

    if len(matching_secrets) > 0:
        _LOG.info(f'Deleting secret {name}')
        _kubernetes.delete_namespaced_secret(name, namespace)

    if name in _LOCAL_VOLUMES:
        _LOCAL_VOLUMES.remove(name)


================================================
FILE: liminal/kubernetes/volume_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import sys
from time import sleep

from kubernetes import client, config
from kubernetes.client import V1PersistentVolume, V1PersistentVolumeClaim

# noinspection PyBroadException
try:
    config.load_kube_config()
except Exception:
    msg = "Kubernetes is not running\n"
    sys.stdout.write(f"INFO: {msg}")

_LOG = logging.getLogger('volume_util')
_LOCAL_VOLUMES = set()
_kubernetes = client.CoreV1Api()


def get_volume_configs(liminal_config, base_dir):
    volumes_config = liminal_config.get('volumes', [])

    for volume_config in volumes_config:
        if 'local' in volume_config:
            path = volume_config['local']['path']
            if path.startswith(".."):
                path = os.path.join(base_dir, path)
            if path.startswith("."):
                path = os.path.join(base_dir, path[1:])
            volume_config['local']['path'] = path
    return volumes_config


def create_local_volumes(liminal_config, base_dir):
    volumes_config = get_volume_configs(liminal_config, base_dir)

    for volume_config in volumes_config:
        logging.info(f'Creating local kubernetes volume if needed: {volume_config}')
        create_local_volume(volume_config)


def create_local_volume(conf, namespace='default') -> None:
    name = conf['volume']

    _LOG.info(f'Requested volume {name}')

    if name not in _LOCAL_VOLUMES:
        matching_volumes = _kubernetes.list_persistent_volume(field_selector=f'metadata.name={name}').to_dict()[
            'items'
        ]

        while len(matching_volumes) == 0:
            _create_local_volume(conf, name)
            sleep(5)
            matching_volumes = _kubernetes.list_persistent_volume(field_selector=f'metadata.name={name}').to_dict()[
                'items'
            ]

        pvc_name = conf.get('claim_name', f'{name}-pvc')

        matching_claims = _kubernetes.list_persistent_volume_claim_for_all_namespaces(
            field_selector=f'metadata.name={pvc_name}'
        ).to_dict()['items']

        while len(matching_claims) == 0:
            _create_persistent_volume_claim(pvc_name, name, namespace)
            sleep(5)
            matching_claims = _kubernetes.list_persistent_volume_claim_for_all_namespaces(
                field_selector=f'metadata.name={pvc_name}'
            ).to_dict()['items']

        _LOCAL_VOLUMES.add(name)


def delete_local_volumes(liminal_config, base_dir):
    volumes_config = get_volume_configs(liminal_config, base_dir)

    for volume_config in volumes_config:
        logging.info(f'Delete local kubernetes volume if needed: {volume_config}')
        delete_local_volume(volume_config['volume'])


def delete_local_volume(name, namespace='default'):
    pvc_name = f'{name}-pvc'

    matching_claims = _list_persistent_volume_claims(pvc_name)

    if len(matching_claims) > 0:
        _LOG.info(f'Deleting persistent volume claim {pvc_name}')
        _kubernetes.delete_namespaced_persistent_volume_claim(pvc_name, namespace)

    while len(matching_claims) > 0:
        matching_claims = _list_persistent_volume_claims(pvc_name)

    matching_volumes = _list_persistent_volumes(name)

    if len(matching_volumes) > 0:
        _LOG.info(f'Deleting persistent volume {name}')
        _kubernetes.delete_persistent_volume(name)

    while len(matching_volumes) > 0:
        matching_volumes = _list_persistent_volumes(name)

    if name in _LOCAL_VOLUMES:
        _LOCAL_VOLUMES.remove(name)


def _list_persistent_volume_claims(name):
    return _kubernetes.list_persistent_volume_claim_for_all_namespaces(
        field_selector=f'metadata.name={name}'
    ).to_dict()['items']


def _list_persistent_volumes(name):
    return _kubernetes.list_persistent_volume(field_selector=f'metadata.name={name}').to_dict()['items']


def _create_persistent_volume_claim(pvc_name, volume_name, namespace):
    _LOG.info(f'Creating persistent volume claim {pvc_name} with volume {volume_name}')
    spec = {
        'volumeName': volume_name,
        'volumeMode': 'Filesystem',
        'storageClassName': 'local-storage',
        'accessModes': ['ReadWriteOnce'],
        'resources': {'requests': {'storage': '100Gi'}},
    }

    _kubernetes.create_namespaced_persistent_volume_claim(
        namespace,
        V1PersistentVolumeClaim(
            api_version='v1', kind='PersistentVolumeClaim', metadata={'name': pvc_name}, spec=spec
        ),
    )


def _create_local_volume(conf, name):
    _LOG.info(f'Creating persistent volume {name} with spec {conf}')
    spec = {
        'capacity': {'storage': '100Gi'},
        'volumeMode': 'Filesystem',
        'accessModes': ['ReadWriteOnce'],
        'persistentVolumeReclaimPolicy': 'Retain',
        'storageClassName': 'local-storage',
        'nodeAffinity': {
            'required': {
                'nodeSelectorTerms': [
                    {'matchExpressions': [{'key': 'kubernetes.io/hostname', 'operator': 'NotIn', 'values': ['']}]}
                ]
            }
        },
    }

    spec.update(conf)

    _kubernetes.create_persistent_volume(
        V1PersistentVolume(api_version='v1', kind='PersistentVolume', metadata={'name': name}, spec=spec)
    )


================================================
FILE: liminal/logging/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/logging/logging_setup.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
from logging.handlers import RotatingFileHandler

from liminal.core import environment

LOGS_DIR = 'logs'
LOG_FILENAME = 'liminal.log'
MAX_FILE_SIZE = 10485760  # 10 MB


def logging_initialization():
    root_logger = logging.getLogger()

    log_formatter = logging.Formatter(
        '[%(asctime)s] [%(filename)s:%(lineno)d] %(levelname)s - %(message)s', '%m-%d %H:%M:%S'
    )

    logs_dir = os.path.join(environment.get_liminal_home(), LOGS_DIR)
    os.makedirs(logs_dir, exist_ok=True)

    file_handler = RotatingFileHandler(os.path.join(logs_dir, LOG_FILENAME), maxBytes=MAX_FILE_SIZE, backupCount=3)

    root_logger.addHandler(file_handler)
    root_logger.setLevel(logging.INFO)

    [h.setFormatter(log_formatter) for h in root_logger.handlers]

    logging.info('Logging initialization completed')


================================================
FILE: liminal/monitoring/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/airflow/__init__.py
================================================
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime
from unittest.mock import MagicMock

import pytz


class DummyDagRun:
    def __init__(self) -> None:
        self.start_date = pytz.utc.localize(datetime.utcnow())
        self.conf = None
        self.run_id = "run_id"

    @staticmethod
    def get_task_instances():
        return []

    def get_task_instance(self, _):
        return self


from datetime import datetime

TASK_ID_SEPARATOR = '.'


class DummyDag:
    def __init__(self, dag_id, task_id):
        self.dag_id = dag_id
        self.task_id = task_id
        self.try_number = 0
        self.is_subdag = False
        self.execution_date = '2017-05-21T00:00:00'
        self.dag_run_id = 'dag_run_id'
        self.owner = ['owner1', 'owner2']
        self.email = ['email1@test.com']
        self.task = MagicMock(name='task', owner=self.owner, email=self.email)
        self.context = {
            'dag': self,
            'task': self,
            'ti': self,
            'ts': datetime.now().timestamp(),
            'dag_run': DummyDagRun(),
        }

    def get_dagrun(self):
        return self.context['dag_run']

    @staticmethod
    def xcom_push(key, value):
        return key, value


================================================
FILE: liminal/runners/airflow/config/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/airflow/config/standalone_variable_backend.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
from os import environ
from sqlite3 import OperationalError

from airflow.models import Variable

LIMINAL_STAND_ALONE_MODE_KEY = "LIMINAL_STAND_ALONE_MODE"


# noinspection PyBroadException
def get_variable(key, default_val):
    if liminal_local_mode():
        return os.environ.get(key, default_val)
    else:
        try:
            return Variable.get(key, default_var=default_val)
        except OperationalError as e:
            logging.warning(
                f'Failed to find variable {key} in Airflow variables table.'
                f' Error: {e.__class__.__module__}.{e.__class__.__name__}'
            )
        except Exception as e:
            logging.warning(f'Failed to find variable {key} in Airflow variables table. Error: {e}')
            return default_val


def liminal_local_mode():
    stand_alone = environ.get(LIMINAL_STAND_ALONE_MODE_KEY, "False")
    return stand_alone.strip().lower() == "true"


================================================
FILE: liminal/runners/airflow/dag/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import os

from liminal.core import environment as env
from liminal.runners.airflow.dag import liminal_register_dags

BASE_PATH = os.path.join(env.get_airflow_home_dir(), env.DEFAULT_PIPELINES_SUBDIR)


def register_dags():
    return liminal_register_dags.register_dags(BASE_PATH)


================================================
FILE: liminal/runners/airflow/dag/liminal_dags.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import traceback

from airflow import DAG

import liminal.runners.airflow.dag as liminal

pipelines = liminal.register_dags()

for pipeline, dag in pipelines:
    try:
        globals()[pipeline] = dag
    except Exception:
        traceback.print_exc()


================================================
FILE: liminal/runners/airflow/dag/liminal_register_dags.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
import os
import traceback
from datetime import datetime, timedelta

from airflow import DAG

from liminal.core.config.config import ConfigUtil
from liminal.core.util import extensible
from liminal.runners.airflow.executors import airflow

__DEPENDS_ON_PAST = 'depends_on_past'


# noinspection PyBroadException
def register_dags(configs_path):
    """
    Registers pipelines in liminal yml files found in given path (recursively) as airflow DAGs.
    """
    logging.info(f'Registering DAGs from path: {configs_path}')
    config_util = ConfigUtil(configs_path)
    configs = config_util.safe_load(is_render_variables=False)

    if os.getenv('POD_NAMESPACE') != "jenkins":
        config_util.snapshot_final_liminal_configs()

    dags = []
    logging.info(f'found {len(configs)} liminal configs in path: {configs_path}')
    for config in configs:
        name = config['name'] if 'name' in config else None
        try:
            if not name:
                raise ValueError('liminal.yml missing field `name`')

            logging.info(f"Registering DAGs for {name}")

            owner = config.get('owner')

            trigger_rule = 'all_success'
            if 'always_run' in config and config['always_run']:
                trigger_rule = 'all_done'

            executors = __initialize_executors(config)

            default_executor = airflow.AirflowExecutor("default_executor", liminal_config=config, executor_config={})

            for pipeline in config['pipelines']:
                default_args = __default_args(pipeline)
                dag = __initialize_dag(default_args, pipeline, owner)

                parent = None

                for task in pipeline['tasks']:
                    task_type = task['type']
                    task_instance = get_task_class(task_type)(
                        task_id=task['task'],
                        dag=dag,
                        parent=parent,
                        trigger_rule=trigger_rule,
                        liminal_config=config,
                        pipeline_config=pipeline,
                        task_config=task,
                        variables=config.get('variables', {}),
                    )

                    executor_id = task.get('executor')
                    if executor_id:
                        executor = executors[executor_id]
                    else:
                        logging.info(
                            f"Did not find `executor` in ${task['task']} config."
                            f" Using the default executor (${type(default_executor)})"
                            f" instead."
                        )
                        executor = default_executor

                    parent = executor.apply_task_to_dag(task=task_instance)

                logging.info(f'registered DAG {dag.dag_id}: {dag.tasks}')

                dags.append((pipeline['pipeline'], dag))

        except Exception:
            logging.error(f'Failed to register DAGs for {name}')
            traceback.print_exc()

    return dags


def __initialize_executors(liminal_config):
    executors = {}
    for executor_config in liminal_config.get('executors', {}):
        executors[executor_config['executor']] = get_executor_class(executor_config['type'])(
            executor_config['executor'], liminal_config, executor_config
        )
    return executors


def __initialize_dag(default_args, pipeline, owner):
    pipeline_name = pipeline['pipeline']

    schedule_interval = default_args.get('schedule_interval', None)
    if not schedule_interval:
        schedule_interval = default_args.get('schedule', None)

    if owner and 'owner' not in default_args:
        default_args['owner'] = owner

    start_date = pipeline.get('start_date', datetime.min.time())
    if not isinstance(start_date, datetime):
        start_date = datetime.combine(start_date, datetime.min.time())

    default_args.pop('tasks', None)
    default_args.pop('schedule', None)
    default_args.pop('monitoring', None)
    default_args.pop('schedule_interval', None)

    dag = DAG(
        dag_id=pipeline_name,
        default_args=default_args,
        dagrun_timeout=timedelta(minutes=pipeline['timeout_minutes']),
        start_date=start_date,
        schedule_interval=schedule_interval,
        catchup=False,
    )

    return dag


def __default_args(pipeline):
    default_args = {k: v for k, v in pipeline.items()}
    override_args = {
        'start_date': datetime.combine(pipeline['start_date'], datetime.min.time()),
        __DEPENDS_ON_PAST: default_args[__DEPENDS_ON_PAST] if __DEPENDS_ON_PAST in default_args else False,
    }
    default_args.update(override_args)
    return default_args


logging.info(f'Loading task implementations..')

tasks_by_liminal_name = extensible.load_tasks()

logging.info(f'Finished loading task implementations: {tasks_by_liminal_name.keys()}')

executors_by_liminal_name = extensible.load_executors()

logging.info(f'Finished loading executor implementations: {executors_by_liminal_name.keys()}')


def get_task_class(task_type):
    return tasks_by_liminal_name[task_type]


def get_executor_class(executor_type):
    return executors_by_liminal_name[executor_type]


================================================
FILE: liminal/runners/airflow/executors/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/airflow/executors/airflow.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from liminal.runners.airflow.model import executor
from liminal.runners.airflow.tasks import airflow


class AirflowExecutor(executor.Executor):
    """
    Execute the task steps
    """

    supported_task_types = [airflow.AirflowTask]

    def _apply_executor_task_to_dag(self, **kwargs):
        return kwargs['task'].apply_task_to_dag()


================================================
FILE: liminal/runners/airflow/executors/emr.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.providers.amazon.aws.operators.emr_add_steps import EmrAddStepsOperator
from airflow.providers.amazon.aws.sensors.emr_step import EmrStepSensor

from liminal.runners.airflow.model import executor
from liminal.runners.airflow.tasks import hadoop


class EMRExecutor(executor.Executor):
    """
    Executes a EMR steps
    """

    supported_task_types = [hadoop.HadoopTask]

    def __init__(self, executor_id, liminal_config, executor_config):
        super().__init__(executor_id, liminal_config, executor_config)
        self.aws_conn_id = self.executor_config.get('aws_conn_id', 'aws_default')
        self.cluster_states = self.executor_config.get('cluster_state', ['RUNNING', 'WAITING'])
        self.job_flow_id = self.executor_config.get('cluster_id', None)
        self.job_flow_name = self.executor_config.get('cluster_name', None)

    def _apply_executor_task_to_dag(self, **kwargs):
        task = kwargs['task']
        parent = task.parent

        self._validate_task_type(task)

        # assuming emr already exists
        add_step = executor.add_variables_to_operator(
            EmrAddStepsOperator(
                task_id=f'{task.task_id}_add_step',
                job_flow_id=self.job_flow_id,
                job_flow_name=self.job_flow_name,
                aws_conn_id=self.aws_conn_id,
                steps=self.__generate_emr_step(task.task_id, [str(x) for x in task.get_runnable_command()]),
                cluster_states=self.cluster_states,
            ),
            task,
        )

        if task.parent:
            parent.set_downstream(add_step)

        emr_sensor_step = executor.add_variables_to_operator(
            EmrStepSensor(
                task_id=f'{task.task_id}_watch_step',
                job_flow_id="{{ task_instance.xcom_pull('" + add_step.task_id + "', key='job_flow_id') }}",
                step_id="{{ task_instance.xcom_pull('" + add_step.task_id + "', key='return_value')[0] }}",
                aws_conn_id=self.aws_conn_id,
            ),
            task,
        )

        add_step.set_downstream(emr_sensor_step)

        return emr_sensor_step

    def __generate_emr_step(self, task_id, args):
        return [
            {
                'Name': task_id,
                **self.executor_config.get('properties', {}),
                'HadoopJarStep': {'Jar': 'command-runner.jar', 'Args': args},
            }
        ]


================================================
FILE: liminal/runners/airflow/executors/kubernetes.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import copy
import datetime
import logging
import os

from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import (
    KubernetesPodOperator,
)
from kubernetes.client import V1Volume, V1VolumeMount
from kubernetes.client import models as k8s

from liminal.core.util import env_util
from liminal.runners.airflow.config.standalone_variable_backend import get_variable
from liminal.runners.airflow.model import executor
from liminal.runners.airflow.tasks import containerable
from liminal.runners.airflow.tasks.containerable import ContainerTask

_LOG = logging.getLogger(__name__)


class KubernetesPodExecutor(executor.Executor):
    """
    Kubernetes pod executor
    """

    supported_task_types = [containerable.ContainerTask]

    def __init__(self, task_id, liminal_config, executor_config):
        super().__init__(task_id, liminal_config, executor_config)

        self.task_name = self.executor_config['executor']
        self.volumes = self._volumes()

    def _apply_executor_task_to_dag(self, **kwargs):
        task = kwargs['task']
        parent = task.parent

        self._validate_task_type(task)

        pod_task = executor.add_variables_to_operator(
            KubernetesPodOperator(trigger_rule=task.trigger_rule, **self.__kubernetes_kwargs(task)), task
        )

        if parent:
            parent.set_downstream(pod_task)

        return pod_task

    def _volumes(self):
        volumes_config = self.liminal_config.get('volumes', [])
        secrets_config = self.liminal_config.get('secrets', [])
        volumes = []
        for volume_config in volumes_config:
            name = volume_config['volume']
            claim_name = volume_config.get('claim_name')
            if not claim_name and 'local' in volume_config:
                claim_name = f'{name}-pvc'
            volume = V1Volume(name=name, persistent_volume_claim={'claimName': claim_name})
            volumes.append(volume)

        for secret_config in secrets_config:
            name = secret_config['secret']
            secret = V1Volume(name=name, secret={'secretName': name})
            volumes.append(secret)

        return volumes

    def __kubernetes_kwargs(self, task: ContainerTask):
        config = copy.deepcopy(self.executor_config)

        kubernetes_kwargs = {
            'task_id': task.task_id,
            'image': task.image,
            'arguments': task.arguments,
            'namespace': os.environ.get('AIRFLOW__KUBERNETES__NAMESPACE', 'default'),
            'name': task.task_id.replace('_', '-'),
            'in_cluster': os.environ.get('AIRFLOW__KUBERNETES__IN_CLUSTER', False),
            'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'),
            'get_logs': config.pop('get_logs', True),
            'is_delete_operator_pod': config.pop('is_delete_operator_pod', True),
            'startup_timeout_seconds': config.pop('startup_timeout_seconds', 1200),
            'env_vars': [k8s.V1EnvVar(name=x, value=v) for x, v in task.env_vars.items()],
            'do_xcom_push': task.task_config.get('do_xcom_push', False),
            'image_pull_secrets': config.pop('image_pull_secrets', 'regcred'),
            'volumes': self.volumes,
            'config_file': os.environ.get('AIRFLOW__KUBERNETES__CONFIG_FILE'),
            'cluster_context': os.environ.get('AIRFLOW__KUBERNETES__CLUSTER_CONTEXT', None),
            'cmds': task.cmds,
            'volume_mounts': [
                V1VolumeMount(
                    name=mount['volume'],
                    mount_path=mount['path'],
                    sub_path=mount.get('sub_path'),
                    read_only=mount.get('read_only', False),
                )
                for mount in task.mounts
            ]
            + [
                V1VolumeMount(
                    name=secret['secret'],
                    mount_path=secret['remote_path'],
                    sub_path=secret.get('sub_path'),
                    read_only=secret.get('read_only', False),
                )
                for secret in task.secrets
            ],
        }

        config.pop('in_cluster', None)
        config.pop('volumes', None)
        config.pop('volume_mounts', None)
        config.pop('executor', None)
        config.pop('type', None)

        kubernetes_kwargs.update(config)

        if env_util.is_running_on_jenkins():
            kubernetes_kwargs['affinity'] = self.__jenkins_kubernetes_affinity()
            kubernetes_kwargs['namespace'] = 'jenkins'

        if not task.dag:
            kubernetes_kwargs.update(
                {
                    'start_date': datetime.datetime(1970, 1, 1),
                }
            )

        return kubernetes_kwargs

    @staticmethod
    def __jenkins_kubernetes_affinity():
        return {
            "podAffinity": {
                "requiredDuringSchedulingIgnoredDuringExecution": [
                    {
                        "labelSelector": {
                            "matchExpressions": [{"key": "liminal", "operator": "In", "values": ["unittest"]}]
                        },
                        "namespaces": ["jenkins"],
                        "topologyKey": "kubernetes.io/hostname",
                    }
                ]
            }
        }


================================================
FILE: liminal/runners/airflow/model/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/airflow/model/executor.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from abc import ABC, abstractmethod

from airflow.models import BaseOperator

from liminal.runners.airflow.operators.operator_with_variable_resolving import (
    OperatorWithVariableResolving,
)


def add_variables_to_operator(operator, task) -> BaseOperator:
    """
    :param operator: Airflow operator
    :type operator: BaseOperator
    :param task: Task instance
    :returns: OperatorWithVariableResolving wrapping given operator
    """
    return OperatorWithVariableResolving(
        dag=task.dag,
        task_config=task.task_config,
        variables=task.variables,
        liminal_task_instance=task,
        operator=operator,
    )


class Executor(ABC):
    """
    Executor Task.
    """

    # list of task types supported by the executor
    supported_task_types = []

    def __init__(self, executor_id, liminal_config, executor_config):
        self.liminal_config = liminal_config
        self.executor_id = executor_id
        self.executor_config = executor_config

    def apply_task_to_dag(self, **kwargs):
        task = kwargs['task']

        self._validate_task_type(task)

        return self._apply_executor_task_to_dag(task=task)

    @abstractmethod
    def _apply_executor_task_to_dag(self, **kwargs):
        pass

    def _validate_task_type(self, task):
        assert any(
            [isinstance(task, tYp) for tYp in self.supported_task_types]
        ), f'supported task types: {self.supported_task_types}'


================================================
FILE: liminal/runners/airflow/model/task.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""
Base task.
"""
import json
from abc import ABC

from airflow.models import BaseOperator

from liminal.runners.airflow.model import executor


class Task(ABC):
    """
    Task.
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        self.liminal_config = liminal_config
        self.dag = dag
        self.pipeline_config = pipeline_config
        self.task_id = task_id
        self.parent = parent
        self.trigger_rule = trigger_rule
        self.task_config = task_config
        self.variables = variables

    def serialize(self) -> str:
        """
        :returns: JSON string representation of this task
        """
        data = {
            'task_id': self.task_id,
            'dag': None,
            'parent': self.parent,
            'trigger_rule': self.trigger_rule,
            'liminal_config': self.liminal_config,
            'pipeline_config': self.pipeline_config,
            'task_config': self.task_config,
            'variables': self.variables,
        }

        return json.dumps(data, default=str)

    def _add_variables_to_operator(self, operator) -> BaseOperator:
        """
        :param operator: Airflow operator
        :type operator: BaseOperator
        :returns: OperatorWithVariableResolving wrapping given operator
        """
        return executor.add_variables_to_operator(operator, self)


================================================
FILE: liminal/runners/airflow/operators/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/airflow/operators/cloudformation.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""
This module contains CloudFormation create/delete stack operators.
Can be removed when Airflow 2.0.0 is released.
"""
from typing import List

from airflow.models import BaseOperator
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
from airflow.sensors.base import BaseSensorOperator
from botocore.exceptions import ClientError


# noinspection PyAbstractClass
class CloudFormationHook(AwsBaseHook):
    """
    Interact with AWS CloudFormation.
    """

    def __init__(self, region_name=None, *args, **kwargs):
        self.region_name = region_name
        self.conn = None
        super().__init__(*args, client_type='cloudformation', **kwargs)

    def get_conn(self):
        self.conn = self.get_client_type('cloudformation', self.region_name)
        return self.conn


class BaseCloudFormationOperator(BaseOperator):
    """
    Base operator for CloudFormation operations.

    :param params: parameters to be passed to CloudFormation.
    :type dict
    :param aws_conn_id: aws connection to uses
    :type aws_conn_id: str
    """

    template_fields: List[str] = []
    template_ext = ()
    ui_color = '#1d472b'
    ui_fgcolor = '#FFF'

    def __init__(self, params, aws_conn_id='aws_default', *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.params = params
        self.aws_conn_id = aws_conn_id

    def execute(self, context):
        self.log.info('Parameters: %s', self.params)

        self.cloudformation_op(CloudFormationHook(aws_conn_id=self.aws_conn_id).get_conn())

    def cloudformation_op(self, cloudformation):
        """
        This is the main method to run CloudFormation operation.
        """
        raise NotImplementedError()


class CloudFormationCreateStackOperator(BaseCloudFormationOperator):
    """
    An operator that creates a CloudFormation stack.

    :param params: parameters to be passed to CloudFormation. For possible arguments see:
            https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.create_stack
    :type dict
    :param aws_conn_id: aws connection to uses
    :type aws_conn_id: str
    """

    template_fields: List[str] = []
    template_ext = ()
    ui_color = '#6b9659'

    def __init__(self, params, aws_conn_id='aws_default', *args, **kwargs):
        super().__init__(params=params, aws_conn_id=aws_conn_id, *args, **kwargs)

    def cloudformation_op(self, cloudformation):
        cloudformation.create_stack(**self.params)


class CloudFormationDeleteStackOperator(BaseCloudFormationOperator):
    """
    An operator that deletes a CloudFormation stack.

    :param params: parameters to be passed to CloudFormation. For possible arguments see:
            https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.delete_stack
    :type dict
    :param aws_conn_id: aws connection to uses
    :type aws_conn_id: str
    """

    template_fields: List[str] = []
    template_ext = ()
    ui_color = '#1d472b'
    ui_fgcolor = '#FFF'

    def __init__(self, params, aws_conn_id='aws_default', *args, **kwargs):
        super().__init__(params=params, aws_conn_id=aws_conn_id, *args, **kwargs)

    def cloudformation_op(self, cloudformation):
        cloudformation.delete_stack(**self.params)


class BaseCloudFormationSensor(BaseSensorOperator):
    """
    Waits for a stack operation to complete on AWS CloudFormation.

    :param stack_name: The name of the stack to wait for (templated)
    :type stack_name: str
    :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are
        stored
    :type aws_conn_id: str
    :param poke_interval: Time in seconds that the job should wait between each try
    :type poke_interval: int
    """

    def __init__(
        self,
        stack_name,
        complete_status,
        in_progress_status,
        aws_conn_id='aws_default',
        poke_interval=30,
        *args,
        **kwargs,
    ):
        super().__init__(poke_interval=poke_interval, *args, **kwargs)
        self.aws_conn_id = aws_conn_id
        self.stack_name = stack_name
        self.complete_status = complete_status
        self.in_progress_status = in_progress_status
        self.hook = None

    def poke(self, context):
        """
        Checks for existence of the stack in AWS CloudFormation.
        """
        cloudformation = self.get_hook().get_conn()

        self.log.info('Poking for stack %s', self.stack_name)

        try:
            stacks = cloudformation.describe_stacks(StackName=self.stack_name)['Stacks']
            stack_status = stacks[0]['StackStatus']
            if stack_status == self.complete_status:
                return True
            elif stack_status == self.in_progress_status:
                return False
            else:
                raise ValueError(f'Stack {self.stack_name} in bad state: {stack_status}')
        except ClientError as e:
            if 'does not exist' in str(e):
                if not self.allow_non_existing_stack_status():
                    raise ValueError(f'Stack {self.stack_name} does not exist')
                else:
                    return True
            else:
                raise e

    def get_hook(self):
        """
        Gets the AwsGlueCatalogHook
        """
        if not self.hook:
            self.hook = CloudFormationHook(aws_conn_id=self.aws_conn_id)

        return self.hook

    def allow_non_existing_stack_status(self):
        """
        Boolean value whether or not sensor should allow non existing stack responses.
        """
        return False


class CloudFormationCreateStackSensor(BaseCloudFormationSensor):
    """
    Waits for a stack to be created successfully on AWS CloudFormation.

    :param stack_name: The name of the stack to wait for (templated)
    :type stack_name: str
    :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are
        stored
    :type aws_conn_id: str
    :param poke_interval: Time in seconds that the job should wait between each try
    :type poke_interval: int
    """

    template_fields = ['stack_name']
    ui_color = '#C5CAE9'

    def __init__(self, stack_name, aws_conn_id='aws_default', poke_interval=30, *args, **kwargs):
        super().__init__(
            stack_name=stack_name,
            complete_status='CREATE_COMPLETE',
            in_progress_status='CREATE_IN_PROGRESS',
            aws_conn_id=aws_conn_id,
            poke_interval=poke_interval,
            *args,
            **kwargs,
        )


class CloudFormationDeleteStackSensor(BaseCloudFormationSensor):
    """
    Waits for a stack to be deleted successfully on AWS CloudFormation.

    :param stack_name: The name of the stack to wait for (templated)
    :type stack_name: str
    :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are
        stored
    :type aws_conn_id: str
    :param poke_interval: Time in seconds that the job should wait between each try
    :type poke_interval: int
    """

    template_fields = ['stack_name']
    ui_color = '#C5CAE9'

    def __init__(self, stack_name, aws_conn_id='aws_default', poke_interval=30, *args, **kwargs):
        super().__init__(
            stack_name=stack_name,
            complete_status='DELETE_COMPLETE',
            in_progress_status='DELETE_IN_PROGRESS',
            aws_conn_id=aws_conn_id,
            poke_interval=poke_interval,
            *args,
            **kwargs,
        )

    def allow_non_existing_stack_status(self):
        return True


================================================
FILE: liminal/runners/airflow/operators/job_status_operator.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime
from typing import Any

import pytz
from airflow.contrib.hooks.aws_hook import AwsHook
from airflow.exceptions import AirflowException
from airflow.lineage import apply_lineage
from airflow.models import BaseOperator
from airflow.utils.db import provide_session
from airflow.utils.decorators import apply_defaults
from airflow.utils.state import State


class JobStatusOperator(BaseOperator):
    """
    Base operator for job status operators.
    """

    template_ext = ()

    @apply_defaults
    def __init__(self, backends, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.backends = backends
        self.cloudwatch = None

    def execute(self, context):
        for backend in self.backends:
            if backend in self.report_functions:
                for metric in self.metrics(context):
                    self.report_functions[backend](self, metric)
            else:
                raise AirflowException(f'No such metrics backend: {backend}')

    def metrics(self, context):
        raise NotImplementedError

    def send_metric_to_cloudwatch(self, metric):
        self.get_cloudwatch().put_metric_data(metric)

    report_functions = {'cloudwatch': send_metric_to_cloudwatch}

    def get_cloudwatch(self):
        if not self.cloudwatch:
            self.cloudwatch = CloudWatchHook()
        return self.cloudwatch


class JobStartOperator(JobStatusOperator):
    ui_color = '#c5e5e8'

    def __init__(self, namespace, application_name, backends, *args, **kwargs):
        super().__init__(backends=backends, *args, **kwargs)
        self.namespace = namespace
        self.application_name = application_name

    def metrics(self, context):
        return [Metric(self.namespace, 'JobStarted', 1, [Tag('ApplicationName', self.application_name)])]


class JobEndOperator(JobStatusOperator):
    ui_color = '#6d8fad'

    def __init__(self, namespace, application_name, backends, *args, **kwargs):
        super().__init__(backends=backends, *args, **kwargs)
        self.namespace = namespace
        self.application_name = application_name
        self.__job_result = 0

    def execute(self, context):
        self.__calculate_job_result(context)
        super().execute(context)

    def metrics(self, context):
        duration = round(
            (pytz.utc.localize(datetime.utcnow()) - context['ti'].get_dagrun().start_date).total_seconds()
        )

        self.log.info('Elapsed time: %s' % duration)

        self.log.info(f'dag final job result: {self.__job_result}')

        return [
            Metric(self.namespace, 'JobResult', self.__job_result, [Tag('ApplicationName', self.application_name)]),
            Metric(self.namespace, 'JobDuration', duration, [Tag('ApplicationName', self.application_name)]),
        ]

    def __log_and_get_state(self, task_instance):
        state = task_instance.state

        self.log.info(f'Task {task_instance.task_id} finished with state = {state}')

        return state

    def __calculate_job_result(self, context):
        self.log.info('scanning task instances states.. ')
        task_instances = context['dag_run'].get_task_instances()
        task_states = [
            self.__log_and_get_state(task_instance)
            for task_instance in task_instances
            if task_instance.task_id != context['task_instance'].task_id
        ]

        self.__job_result = 0
        if all((state == State.SUCCESS or state == State.SKIPPED) for state in task_states):
            self.__job_result = 1

    @apply_lineage
    @provide_session
    def post_execute(self, context: Any, result: Any = None, session=None):
        if self.__job_result == 0:
            self.log.info("Failing this DAG run due to task failure.")

            dag_run = context['ti'].get_dagrun()
            dag_run.end_date = datetime.utcnow()
            dag_run.state = State.FAILED

            session.merge(dag_run)


# noinspection PyAbstractClass
class CloudWatchHook(AwsHook):
    """
    Interact with AWS CloudWatch.
    """

    def __init__(self, region_name=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.region_name = region_name
        self.conn = self.get_client_type('cloudwatch', self.region_name)

    def get_conn(self):
        return self.conn

    def put_metric_data(self, metric):
        value = metric.value

        cloudwatch = self.get_conn()

        dimensions = [{'Name': tag.name, 'Value': tag.value} for tag in metric.tags]

        cloudwatch.put_metric_data(
            Namespace=metric.namespace,
            MetricData=[
                {
                    'MetricName': metric.name,
                    'Dimensions': dimensions,
                    'Timestamp': datetime.utcnow(),
                    'Value': value,
                    'Unit': 'None',
                }
            ],
        )

        self.log.info(f'Published metric: {metric.name} with value: {value}')


class Metric:
    """
    Metric.
    :param namespace: namespace.
    :type name: str
    :param name: name.
    :type name: str
    :param value: value.
    :type value: float
    :param tags: list of tags.
    :type tags: List[str]
    """

    def __init__(self, namespace, name, value, tags):
        self.namespace = namespace
        self.name = name
        self.value = value
        self.tags = tags


class Tag:
    def __init__(self, name, value):
        self.name = name
        self.value = value


================================================
FILE: liminal/runners/airflow/operators/operator_with_variable_resolving.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import inspect
import logging
import re
from datetime import datetime
from typing import Any, Dict, Optional, Set

import jinja2
from airflow.models import BaseOperator
from airflow.settings import Session
from jinja2 import Environment

from liminal.runners.airflow.config import standalone_variable_backend

_VAR_REGEX = '(.*){{([^}]*)}}(.*)'

_BASE_OPERATOR_ATTRIBUTES = list(inspect.signature(BaseOperator.__init__).parameters.keys())


class OperatorWithVariableResolving(BaseOperator):
    """
    Operator delegator that handles liminal variable substitution at run time
    """

    def __init__(self, dag, task_config: dict, variables: dict = None, liminal_task_instance=None, **kwargs):
        self.operator_delegate: BaseOperator = kwargs.pop('operator')
        self.liminal_task_instance = liminal_task_instance.serialize() if liminal_task_instance else None
        if variables:
            self.variables = variables.copy()
        else:
            self.variables = {}
        self.task_config = task_config
        super().__init__(task_id=self.operator_delegate.task_id, dag=dag)
        self._LOG = logging.getLogger(self.__class__.__name__)

    def execute(self, context):
        attributes = self._get_operator_delegate_attributes()
        self._LOG.info(f'task_config: {self.task_config}')
        self._LOG.info(f'variables: {self.variables}')
        self.operator_delegate.template_fields = set(list(self.operator_delegate.template_fields) + attributes)
        self.operator_delegate.render_template_fields(context, LiminalEnvironment(self.variables, self.task_config))
        self.operator_delegate.render_template_fields(context)

        if 'ti' in context:
            context['ti'].xcom_push(key="liminal_task_instance", value=self.liminal_task_instance)

        return self.operator_delegate.execute(context)

    def post_execute(self, context, result=None):
        self.operator_delegate.post_execute(context, result)

    def _get_operator_delegate_attributes(self):
        return [
            attr
            for attr in dir(self.operator_delegate)
            if attr not in _BASE_OPERATOR_ATTRIBUTES
            and attr not in dir(BaseOperator)
            and not attr.startswith('_')
            and attr not in ('args', 'kwargs', 'lineage_data', 'subdag', 'template_fields')
        ]

    def pre_execute(self, context: Any):
        return self.operator_delegate.pre_execute(context)

    def on_kill(self) -> None:
        self.operator_delegate.on_kill()

    def render_template_fields(self, context: Dict, jinja_env: Optional[jinja2.Environment] = None) -> None:
        pass

    def render_template(
        self,
        content: Any,
        context: Dict,
        jinja_env: Optional[jinja2.Environment] = None,
        seen_oids: Optional[Set] = None,
    ) -> Any:
        value = self.operator_delegate.render_template(
            content, context, LiminalEnvironment(self.variables, self.task_config)
        )
        return self.operator_delegate.render_template(value, context, jinja_env, seen_oids)

    def get_template_env(self) -> jinja2.Environment:
        return self.operator_delegate.get_template_env()

    def prepare_template(self) -> None:
        self.operator_delegate.prepare_template()

    def resolve_template_files(self) -> None:
        self.operator_delegate.resolve_template_files()

    def clear(
        self,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None,
        upstream: bool = False,
        downstream: bool = False,
        session: Session = None,
    ):
        return self.operator_delegate.clear(start_date, end_date, upstream, downstream, session)

    def run(
        self,
        start_date: Optional[datetime] = None,
        end_date: Optional[datetime] = None,
        ignore_first_depends_on_past: bool = True,
        ignore_ti_state: bool = False,
        mark_success: bool = False,
    ) -> None:
        self.operator_delegate.run(start_date, end_date, ignore_first_depends_on_past, ignore_ti_state, mark_success)


class LiminalEnvironment(Environment):
    def __init__(self, variables, task_config=None):
        super().__init__()
        self.val = None
        self.variables = variables.copy()
        logging.info(f'variables: {variables}')
        if task_config and 'variables' in task_config:
            task_variables = task_config['variables']
            if isinstance(task_variables, dict):
                self.variables.update(task_variables)
            elif isinstance(task_variables, str):
                variables_key = self.from_string(task_variables).render()
                if variables_key in variables:
                    self.variables.update(variables[variables_key])

    def from_string(self, val, **kwargs):
        self.val = val
        return self

    def render(self, *_, **kwargs):
        """
        Implements jinja2.environment.Template.render
        """
        conf = kwargs['dag_run'].conf if 'dag_run' in kwargs else {}
        return self.__render(self.val, conf, set())

    def __render(self, val: str, dag_run_conf: dict, unresolved_tags: set):
        token = re.match(_VAR_REGEX, val)
        if token and token[2].strip() not in unresolved_tags:
            tag_name = token[2].strip()
            prefix = self.__render(token[1], dag_run_conf, unresolved_tags)
            suffix = self.__render(token[3], dag_run_conf, unresolved_tags)
            if dag_run_conf and tag_name in dag_run_conf:
                return self.__render(prefix + str(dag_run_conf[tag_name]) + suffix, dag_run_conf, unresolved_tags)
            elif tag_name in self.variables:
                return self.__render(prefix + str(self.variables[tag_name]) + suffix, dag_run_conf, unresolved_tags)
            else:
                backend_value = standalone_variable_backend.get_variable(tag_name, None)
                if backend_value:
                    return self.__render(prefix + backend_value + suffix, dag_run_conf, unresolved_tags)
                else:
                    unresolved_tags.add(tag_name)
                    return self.__render(prefix + '{{' + token[2] + '}}' + suffix, dag_run_conf, unresolved_tags)
        else:
            return val


def add_variables_to_operator(operator, task) -> BaseOperator:
    """
    :param operator: Airflow operator
    :type operator: BaseOperator
    :param task: Task instance
    :type task: Task
    :returns: OperatorWithVariableResolving wrapping given operator
    """
    return OperatorWithVariableResolving(
        dag=task.dag,
        task_config=task.task_config,
        variables=task.variables,
        liminal_task_instance=task,
        operator=operator,
    )


================================================
FILE: liminal/runners/airflow/tasks/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal/runners/airflow/tasks/airflow.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from abc import ABC, abstractmethod

from liminal.runners.airflow.model import task


class AirflowTask(task.Task, ABC):
    """
    Airflow task
    """

    @abstractmethod
    def apply_task_to_dag(self):
        pass


================================================
FILE: liminal/runners/airflow/tasks/containerable.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
import logging
import os
from abc import ABC
from typing import Dict

from liminal.core import environment
from liminal.runners.airflow.config import standalone_variable_backend
from liminal.runners.airflow.config.standalone_variable_backend import get_variable
from liminal.runners.airflow.model import task

_LOG = logging.getLogger(__name__)
ENV = 'env'
DEFAULT = 'default'


class ContainerTask(task.Task, ABC):
    """
    K8S Containerable task
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)
        env = standalone_variable_backend.get_variable(ENV, DEFAULT)
        self.env_vars = self.__env_vars(env)
        self.image = self.task_config['image']
        self.mounts = self.task_config.get('mounts', [])
        self.secrets = self.task_config.get('secrets', [])
        self.cmds, self.arguments = self._kubernetes_cmds_and_arguments()

    def _kubernetes_cmds_and_arguments(self):
        cmds = ['/bin/sh', '-c']

        arguments = [self.task_config['cmd']]

        return cmds, arguments

    @staticmethod
    def __get_local_env_params_from_env_file():
        env_file = f'{environment.get_liminal_home()}/env'
        if os.path.isfile(env_file):
            _LOG.info(f'found env file at {env_file}')
            result = {}
            with open(env_file) as f:
                lines = f.readlines()
                for line in lines:
                    if line and line.strip() and line.strip()[0:1] != '#':
                        parts = line.strip().split('=')
                        if len(parts) == 2:
                            result[parts[0]] = parts[1]
            return result
        else:
            return {}

    def __env_vars(self, env) -> Dict:
        env_vars = dict(self.task_config['env_vars']) if 'env_vars' in self.task_config else {}
        env_vars.update(self.__get_local_env_params_from_env_file())
        airflow_configuration_variable = get_variable(
            f'''{self.pipeline_config['pipeline']}_dag_configuration''', default_val=None
        )

        if airflow_configuration_variable:
            airflow_configs = json.loads(airflow_configuration_variable)
            environment_variables_key = f"{self.pipeline_config['pipeline']}_environment_variables"
            if environment_variables_key in airflow_configs:
                env_vars = airflow_configs[environment_variables_key]

        if ENV not in env_vars:
            env_vars[ENV] = env

        return {k: str(v) for k, v in env_vars.items()}


================================================
FILE: liminal/runners/airflow/tasks/create_cloudformation_stack.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.operators.dummy import DummyOperator
from airflow.operators.python import BranchPythonOperator
from flatdict import FlatDict

from liminal.runners.airflow.operators.cloudformation import (
    CloudFormationCreateStackOperator,
    CloudFormationCreateStackSensor,
    CloudFormationHook,
)
from liminal.runners.airflow.operators.operator_with_variable_resolving import (
    OperatorWithVariableResolving,
)
from liminal.runners.airflow.tasks import airflow


class CreateCloudFormationStackTask(airflow.AirflowTask):
    """
    Creates cloud_formation stack.
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)
        self.stack_name = task_config['stack_name']

    def apply_task_to_dag(self):
        check_cloudformation_stack_exists_task = self._add_variables_to_operator(
            BranchPythonOperator(
                templates_dict={'stack_name': self.stack_name},
                task_id=f'is-cloudformation-{self.task_id}-running',
                python_callable=self.__cloudformation_stack_running_branch,
                provide_context=True,
            )
        )

        create_cloudformation_stack_task = self._add_variables_to_operator(
            CloudFormationCreateStackOperator(
                task_id=f'create-cloudformation-{self.task_id}', params={**self.__reformatted_params()}
            )
        )

        create_stack_sensor_task = self._add_variables_to_operator(
            CloudFormationCreateStackSensor(
                task_id=f'cloudformation-watch-{self.task_id}-create',
                stack_name=self.stack_name,
            )
        )

        stack_creation_end_task = DummyOperator(
            task_id=f'creation-end-{self.task_id}', trigger_rule='all_done', dag=self.dag
        )

        if self.parent:
            self.parent.set_downstream(check_cloudformation_stack_exists_task)

        create_stack_sensor_task.set_downstream(stack_creation_end_task)
        create_cloudformation_stack_task.set_downstream(create_stack_sensor_task)
        check_cloudformation_stack_exists_task.set_downstream(create_cloudformation_stack_task)
        check_cloudformation_stack_exists_task.set_downstream(stack_creation_end_task)

        return stack_creation_end_task

    def __cloudformation_stack_running_branch(self, stack_name):
        cloudformation = CloudFormationHook().get_conn()
        try:
            stack_status = cloudformation.describe_stacks(StackName=stack_name)['Stacks'][0]['StackStatus']
            if stack_status in ['CREATE_COMPLETE', 'DELETE_FAILED']:
                print(f'Stack {stack_name} is running')
                return f'creation-end-{self.task_id}'
            else:
                print(f'Stack {stack_name} is not running')
        except Exception as e:
            if 'does not exist' in str(e):
                print(f'Stack {stack_name} does not exist')
                return f'create-cloudformation-{self.task_id}'
            else:
                raise e

        return f'create-cloudformation-{self.task_id}'

    # noinspection PyUnusedLocal
    def __reformatted_params(self, **kwargs):
        return {
            'StackName': self.stack_name,
            **self.task_config['properties'],
            'Parameters': [
                {'ParameterKey': x, 'ParameterValue': str(y)}
                for (x, y) in FlatDict(self.task_config['properties']['Parameters']).items()
            ],
        }


================================================
FILE: liminal/runners/airflow/tasks/delete_cloudformation_stack.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.utils.trigger_rule import TriggerRule

from liminal.runners.airflow.operators.cloudformation import (
    CloudFormationDeleteStackOperator,
    CloudFormationDeleteStackSensor,
)
from liminal.runners.airflow.tasks import airflow


class DeleteCloudFormationStackTask(airflow.AirflowTask):
    """
    Deletes cloud_formation stack.
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)
        self.stack_name = task_config['stack_name']

    def apply_task_to_dag(self):
        check_dags_queued_task = BranchPythonOperator(
            task_id=f'{self.task_id}-is-dag-queue-empty',
            python_callable=self.__queued_dag_runs_exists,
            provide_context=True,
            trigger_rule=TriggerRule.ALL_DONE,
            dag=self.dag,
        )

        delete_stack_task = self._add_variables_to_operator(
            CloudFormationDeleteStackOperator(
                task_id=f'delete-cloudformation-{self.task_id}',
                params={'StackName': self.stack_name},
            )
        )

        delete_stack_sensor = self._add_variables_to_operator(
            CloudFormationDeleteStackSensor(
                task_id=f'cloudformation-watch-{self.task_id}-delete',
                stack_name=self.stack_name,
            )
        )

        stack_delete_end_task = DummyOperator(task_id=f'delete-end-{self.task_id}', dag=self.dag)

        if self.parent:
            self.parent.set_downstream(check_dags_queued_task)

        check_dags_queued_task.set_downstream(stack_delete_end_task)
        check_dags_queued_task.set_downstream(delete_stack_task)
        delete_stack_task.set_downstream(delete_stack_sensor)
        delete_stack_sensor.set_downstream(stack_delete_end_task)

        return stack_delete_end_task

    # noinspection PyUnusedLocal
    def __queued_dag_runs_exists(self, **kwargs):
        if self.dag.get_num_active_runs() > 1:
            return f'delete-end-{self.task_id}'
        else:
            return f'delete-cloudformation-{self.task_id}'


================================================
FILE: liminal/runners/airflow/tasks/hadoop.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from abc import ABC, abstractmethod

from liminal.runners.airflow.model import task


class HadoopTask(task.Task, ABC):
    """
    Hadoop task
    """

    @abstractmethod
    def get_runnable_command(self):
        pass


================================================
FILE: liminal/runners/airflow/tasks/job_end.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from liminal.runners.airflow.operators.job_status_operator import JobEndOperator
from liminal.runners.airflow.tasks import airflow


class JobEndTask(airflow.AirflowTask):
    """
    Job end task. Reports job end metrics.
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)
        metrics = self.liminal_config.get('metrics', {})
        self.metrics_namespace = metrics.get('namespace', '')
        self.metrics_backends = metrics.get('backends', [])

    def apply_task_to_dag(self):
        job_end_task = JobEndOperator(
            task_id='end',
            namespace=self.metrics_namespace,
            application_name=self.pipeline_config['pipeline'],
            backends=self.metrics_backends,
            dag=self.dag,
            trigger_rule=self.trigger_rule,
        )

        if self.parent:
            self.parent.set_downstream(job_end_task)

        return job_end_task


================================================
FILE: liminal/runners/airflow/tasks/job_start.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from liminal.runners.airflow.operators.job_status_operator import JobStartOperator
from liminal.runners.airflow.tasks import airflow


class JobStartTask(airflow.AirflowTask):
    """
    Job start task. Reports job start metrics.
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)
        metrics = self.liminal_config.get('metrics', {})
        self.metrics_namespace = metrics.get('namespace', '')
        self.metrics_backends = metrics.get('backends', [])

    def apply_task_to_dag(self):
        job_start_task = JobStartOperator(
            task_id='start',
            namespace=self.metrics_namespace,
            application_name=self.pipeline_config['pipeline'],
            backends=self.metrics_backends,
            dag=self.dag,
            trigger_rule=self.trigger_rule,
        )

        if self.parent:
            self.parent.set_downstream(job_start_task)

        return job_start_task


================================================
FILE: liminal/runners/airflow/tasks/python.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from liminal.runners.airflow.tasks import containerable


class PythonTask(containerable.ContainerTask):
    """
    Python task.
    """

    def _kubernetes_cmds_and_arguments(self):
        cmds = ['/bin/bash', '-c']
        arguments = [self.__cmd()]

        return cmds, arguments

    def __cmd(self):
        return self.task_config['cmd']


================================================
FILE: liminal/runners/airflow/tasks/spark.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from itertools import chain

from flatdict import FlatDict

from liminal.runners.airflow.tasks import containerable, hadoop


class SparkTask(hadoop.HadoopTask, containerable.ContainerTask):
    """
    Executes a Spark application.
    """

    def __init__(
        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None
    ):
        task_config['image'] = task_config.get('image', '')
        task_config['cmd'] = task_config.get('cmd', [])
        task_config['env_vars'] = {'SPARK_LOCAL_HOSTNAME': 'localhost'}
        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)

    def get_runnable_command(self):
        """
        Return spark-submit runnable command
        """
        return self.__generate_spark_submit()

    def __generate_spark_submit(self):
        spark_submit = ['spark-submit']

        spark_arguments = self.__spark_args()
        application_arguments = self.__additional_arguments()

        spark_submit.extend(spark_arguments)
        spark_submit.extend(application_arguments)

        return [str(x) for x in spark_submit]

    def __spark_args(self):
        # reformat spark conf
        flat_conf_args = list()

        spark_arguments = {
            'master': self.task_config.get('master', None),
            'class': self.task_config.get('class', None),
        }

        source_code = self.task_config.get("application_source")

        for conf_arg in [f'{k}={v}' for (k, v) in FlatDict(self.task_config.get('conf', {})).items()]:
            flat_conf_args.append('--conf')
            flat_conf_args.append(conf_arg)

        spark_conf = self.__parse_spark_arguments(spark_arguments)
        spark_conf.extend(flat_conf_args)
        spark_conf.extend([source_code])
        return spark_conf

    def __additional_arguments(self):
        application_arguments = self.task_config.get('application_arguments', {})
        if type(application_arguments) == list:
            return application_arguments
        return self.__interleaving(application_arguments.keys(), application_arguments.values())

    def __parse_spark_arguments(self, spark_arguments):
        spark_arguments = {x[0]: x[1] for x in spark_arguments.items() if x[1]}

        return self.__interleaving(
            [f'--{k}' for k in spark_arguments.keys() if spark_arguments[k]], spark_arguments.values()
        )

    @staticmethod
    def __interleaving(keys, values):
        return list(chain.from_iterable(zip(keys, values)))

    def _kubernetes_cmds_and_arguments(self):
        return self.__generate_spark_submit(), []


================================================
FILE: liminal/runners/airflow/tasks/sql.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from liminal.runners.airflow.model import task


class SqlTask(task.Task):
    """
    Executes an SQL application.
    """


================================================
FILE: liminal/settings.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import sys

from liminal.core.environment import get_liminal_home

LIMINAL_HOME = get_liminal_home()


def prepare_syspath():
    plugins = os.path.join(LIMINAL_HOME, 'liminal')
    sys.path.append(plugins)


def initialize():
    # making sure all extensible modules are in root path
    prepare_syspath()


================================================
FILE: liminal/sql/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: liminal-arch.md
================================================
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->
# Liminal

Liminal is an end-to-end platform for data engineers & scientists, allowing them to build, train and deploy machine learning models in a robust and agile way. The platform provides the abstractions and declarative capabilities for data extraction & feature engineering followed by model training and serving. Apache Liminal's goal is to operationalise the machine learning process, allowing data scientists to quickly transition from a successful experiment to an automated pipeline of model training, validation, deployment and inference in production, freeing them from engineering and non-functional tasks, and allowing them to focus on machine learning code and artifacts.

## Motivation

The challenges involved in operationalizing machine learning models are one of the main reasons why many machine learning projects never make it to production.
The process involves automating and orchestrating multiple steps which run on heterogeneous infrastructure - different compute environments, data processing platforms, ML frameworks, notebooks, containers and monitoring tools.
There are no mature standards for this workflow, and most organizations do not have the experience to build it in-house. In the best case, dev-ds-devops teams form in order to accomplish this task together; in many cases, it's the data scientists who try to deal with this themselves without the knowledge or the inclination to become infrastructure experts.
As a result, many projects never make it through the cycle. Those who do suffer from a very long lead time from a successful experiment to an operational, refreshable, deployed and monitored model in production.
The goal of Apache Liminal is to simplify the creation and management of machine learning pipelines by data engineers & scientists. The platform provides declarative building blocks which define the workflow, orchestrate the underlying infrastructure,  take care of non functional concerns, enabling focus in business logic / algorithm code.
Some Commercial E2E solutions have started to emerge in the last few years, however, they are limited to specific parts of the workflow, such as Databricks MLFlow. Other solutions are tied to specific environments (e.g. SageMaker on AWS).

## High Level Architecture
The platform is aimed to provide data engineers & scientists with a solution for end to end flows from model training to real time inference in production. It’s architecture enables and promotes adoption of specific components in existing (non-Liminal) frameworks, as well as seamless integration with other open source projects. Liminal was created to enable scalability in ML efforts and after a thorough review of available solutions and frameworks, which did not meet our main KPIs:
Provide an opinionated but customizable end-to-end workflow
Abstract away the complexity of underlying infrastructure
Support major open source tools and cloud-native infrastructure to carry out many of the steps
Allow teams to leverage their existing investments or bring in their tools of choice into the workflow
We have found that other tech companies in the Israeli Hi-Tech ecosystem also have an interest in such a platform, hence decided to share our work with the community.
The following diagram depicts these main components and where Apache Liminal comes in:

![raibow-arch1](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/liminal_001.png)

A classical data scientist workflow includes some base phases:
_Train, Deploy and Consume._

**The Train phase includes the following tasks:**

1. Fetch -  get the data needed to build a model - usually using SQL
1. Clean - make sure the data is useful for building the model
1. Prepare - split data and encode features from the data according to model needs
1. Train - Build the model and tune it
1. Evaluate - make sure the model is correct - run it on a test set, etc…
1. Validate - make sure the model is up to the standards you need

**The Deploy phase includes these tasks:**
1. Deploy - make it available for usage in production
1. Inference - Batch or Real-time - use the model to evaluate data by your offline or online by your applications
1. Consume - The actual use of the models created by applications and ETLs, usually through APIs to the batch or real-time inference that usually rely on Model and Feature stores.

Liminal provides its users a declarative composition capabilities to materialize these steps in a robust way, while exploiting existing frameworks and tools. e.g. Data science frameworks such as scikit-learn, Tensor flow, Keras and such, for running core data science algorithms; as numerous core mechanisms as data stores, processing engines, parallelism, schedulers, code deployment as well as batch and real-time inference.
Liminal allows the creation and wiring of these kinds of functional and non functional tasks while making the underlying infrastructure used by these tasks very easy to use and even abstracted away entirely. While handling the non-functional aspects as monitoring (in a standard fashion) deployment, scheduling, resource management and execution.

![raibow-arch2](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/liminal_002.png)


================================================
FILE: pyproject.toml
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

[tool.black]
line-length = 119  # TODO: Decide if 119 or 110?
target-version = ['py36', 'py37', 'py38', 'py39']
skip-string-normalization = true
include = '\.pyi?$'
exclude = '''
(
  /(
      \.eggs
    | \.git
    | \.mypy_cache
    | \.tox
    | _build
    | build
    | dist
  )/
  | src/buildstream/_protos
)
'''


================================================
FILE: requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

docker==4.2.0
apache-airflow==2.1.2
click==7.1.1
Flask==1.1.1
pyyaml==5.4.1
boto3==1.17.112
botocore==1.20.112
wheel==0.36.2
termcolor~=1.1.0
docker-pycreds==0.4.0
typing==3.7.4.1
GitPython==3.1.11
moto==1.3.14
diskcache==3.1.1
croniter==0.3.31
pytz==2020.5
pytzdata==2020.1
freezegun==1.1.0
statsd>=3.3.0, <4.0
sqlalchemy~=1.3.15
flatdict==3.4.0
jinja2>=2.10.1, <2.11.0
python-json-logger==2.0.1
requests==2.26.0
apache-airflow-providers-amazon==1.4.0
apache-airflow-providers-cncf-kubernetes==1.0.2
#apache-airflow-providers-google==4.0.0
#apache-airflow[google,amazon,apache.spark]==2.0.0
cfn-lint==0.53.0
pre-commit==2.16.0
Werkzeug==1.0.1
itsdangerous==1.1.0
MarkupSafe==1.1.1


================================================
FILE: run_tests.sh
================================================
#!/bin/sh
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

python -m unittest


================================================
FILE: scripts/Dockerfile-airflow
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

FROM apache/airflow:2.1.2-python3.8

ARG LIMINAL_VERSION='apache-liminal'
ARG DAG_FOLDER='/opt/airflow/dags/'
ADD scripts/* ${DAG_FOLDER}
ADD *.whl ${DAG_FOLDER}

RUN ls -ls ${DAG_FOLDER}
ADD liminal/runners/airflow/dag/liminal_dags.py ${DAG_FOLDER}
ADD scripts/webserver_config.py /opt/airflow/

USER airflow

RUN pip --disable-pip-version-check install -r /opt/airflow/dags/requirements-airflow.txt --user
RUN pip --disable-pip-version-check install --user ${LIMINAL_VERSION}


================================================
FILE: scripts/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: scripts/docker-compose.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
version: '3'
x-airflow-common: &airflow-common
  image: liminal-airflow
  build:
    context: .
    dockerfile: scripts/Dockerfile-airflow
    args:
      LIMINAL_VERSION: ${LIMINAL_VERSION}
  environment: &airflow-common-env
    LOAD_EX: n
    AIRFLOW__CORE__EXECUTOR: LocalExecutor
    KUBECONFIG: /home/airflow/kube/config
    AWS_DEFAULT_REGION: "${AWS_DEFAULT_REGION:-us-east-1}"
    AIRFLOW__CORE__LOAD_EXAMPLES: 'False'
    AIRFLOW__WEBSERVER__WORKERS: '1'
    AIRFLOW__WEBSERVER__EXPOSE_CONFIG: 'True'
    _AIRFLOW_DB_UPGRADE: 'true'
    LIMINAL_HOME: /opt/airflow/dags
    AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS: 'False'
    AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
    AIRFLOW_CONN_METADATA_DB: postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
    AIRFLOW_VAR__METADATA_DB_SCHEMA: airflow
  volumes:
    - ${LIMINAL_HOME}/liminal:/opt/airflow/dags/liminal/
    - ${LIMINAL_HOME}:/opt/airflow/dags
    - ${LIMINAL_HOME}/logs:/opt/airflow/logs
    - ${HOME}/.kube:/home/airflow/kube
  depends_on:
    postgres:
      condition: service_healthy
  healthcheck:
    test: [CMD-SHELL, '[ -f /usr/local/airflow/airflow-webserver.pid ]']
    interval: 30s
    timeout: 30s
    retries: 3
  logging:
    options:
      max-size: 10m
      max-file: '3'
  restart: always

services:
  postgres:
    image: postgres:13
    container_name: liminal-postgress
    environment:
      POSTGRES_USER: airflow
      POSTGRES_PASSWORD: airflow
      POSTGRES_DB: ''
    ports:
      - 5432:5432
    volumes:
      - ${LIMINAL_HOME}/db:/var/lib/postgresql/data
    logging:
      options:
        max-size: 10m
        max-file: '3'
    healthcheck:
      test: [CMD, pg_isready, -U, airflow]
      interval: 30s
      timeout: 30s
      retries: 3
    restart: always

  webserver:
    <<: *airflow-common
    container_name: liminal-webserver
    environment:
      <<: *airflow-common-env
    ports:
      - 8080:8080
    command: webserver

  scheduler:
    <<: *airflow-common
    container_name: liminal-scheduler
    environment:
      <<: *airflow-common-env
    ports:
      - 8793:8793
    command: scheduler


================================================
FILE: scripts/liminal
================================================
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import pathlib
import shutil
import subprocess
import sys
import yaml

import click

import scripts
from liminal import settings
from liminal.build import liminal_apps_builder
from liminal.core import environment
from liminal.core.config.config import ConfigUtil
from liminal.core.util import files_util
from liminal.kubernetes import volume_util, secret_util
from liminal.logging.logging_setup import logging_initialization
from liminal.runners.airflow import dag

logging_initialization()
settings.initialize()

try:
    import importlib.resources as pkg_resources
except ImportError:
    # Try backported to PY<37 `importlib_resources`.
    import importlib_resources as pkg_resources


@click.group()
def cli():
    pass


def docker_is_running():
    try:
        return not subprocess.check_output("docker info >/dev/null 2>&1", shell=True)
    except subprocess.CalledProcessError:
        msg = "Docker is not running. Please start docker service on your machine\n"
        sys.stderr.write(f"ERROR: {msg}")
        raise RuntimeError(msg)


@cli.command("build", short_help="builds dockers from your business logic")
@click.option('--path', default=os.getcwd(), help='Build within this path.')
def build(path):
    click.echo(f'Building liminal apps in {path}')
    if docker_is_running():
        liminal_apps_builder.build_liminal_apps(path)
    configs = ConfigUtil(path).safe_load(is_render_variables=True,
                                         soft_merge=True)
    for config in configs:
        if config.get('volumes'):
            logging.info(f'local volume is being created')
            volume_util.create_local_volumes(config, os.path.dirname(
                files_util.resolve_pipeline_source_file(config['name'])))


def deploy_liminal_core_internal(liminal_home, clean):
    # noinspection PyTypeChecker
    with pkg_resources.path(dag, 'liminal_dags.py') as p:
        dags_path = p
    os.makedirs(environment.get_dags_dir(), exist_ok=True)
    dag_target_file = os.path.join(environment.get_liminal_home(), 'liminal_dags.py')
    shutil.copyfile(dags_path, dag_target_file)
    # initialize the env. variable which indicates to the docke compose which
    # liminal to install in airflow docker
    liminal_version = environment.get_liminal_version()
    logging.info(f'Deploying liminal version: {liminal_version}')
    # if liminal is installed from local file - the developer needs to put it in the /scripts folder
    # in which case it will end up inside the container during build
    if liminal_version.find("file://") > -1:
        local_file_name = os.path.basename(liminal_version)
        full_path = os.path.join('/opt/airflow/dags', local_file_name)
        logging.info(f'Liminal was installed locally, setting LIMINAL_VERSION to {full_path}')
        os.environ[environment.LIMINAL_VERSION_PARAM_NAME] = full_path
        _, project_dir = get_docker_compose_paths()
        shutil.copyfile(os.path.join(liminal_home, local_file_name),
                        os.path.join(project_dir, local_file_name))
    if clean:
        docker_compose_command('down', ['--remove-orphans', '--rmi', 'local'])
        docker_compose_command('build', [])
        docker_compose_command('run', ['webserver', 'db reset', '-y'])
        docker_compose_command('run', ['webserver', 'db init'])
        docker_compose_command('run', ['webserver', 'users create '
                                                    '--role Admin '
                                                    '--username admin '
                                                    '--email admin '
                                                    '--firstname admin '
                                                    '--lastname admin '
                                                    '--password admin'])
        docker_compose_command('down', ['--remove-orphans'])


def docker_compose_command(command_name, args):
    detached_mode = False
    docker_compose_path, project_dir = get_docker_compose_paths()
    concated_args = ' '.join(args)
    run_command = [
        'docker-compose '
        f'-f "{docker_compose_path}" '
        '-p liminal --project-directory '
        f'{project_dir} {command_name} {concated_args}'
    ]
    logging.info(run_command[0])
    if 'tail' in str(args) or '-d' in str(args) or command_name in ['ps', 'down']:
        detached_mode = True

    if detached_mode:
        output = subprocess.run(run_command, env=os.environ, shell=True, stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE, universal_newlines=True)
        return output.stdout, output.stderr
    else:
        subprocess.call(run_command, env=os.environ, shell=True)
        return '', ''


@cli.command("deploy", short_help="deploys your liminal.yaml files to $LIMINAL_HOME folder")
@click.option('--path', default=os.getcwd(), help="folder containing liminal.yaml files")
@click.option('--clean', is_flag=True, default=False,
              help="re-deploy liminal airflow components from scratch")
def deploy_liminal_apps(path, clean):
    click.echo("deploying liminal yaml files")
    liminal_home = environment.get_liminal_home()
    if (clean):
        click.echo("cleaning liminal home directory")
        shutil.rmtree(liminal_home)
    os.makedirs(liminal_home, exist_ok=True)
    os.makedirs(environment.get_dags_dir(), exist_ok=True)

    deploy_liminal_core_internal(liminal_home, clean)
    config_files = files_util.find_config_files(path)
    for config_file in config_files:
        if (is_file_empty(config_file)):
            continue
        click.echo(f"deploying liminal file: {config_file}")
        relative_path = os.path.relpath(config_file, os.path.dirname(path))
        target_yml_name = os.path.join(environment.get_dags_dir(), relative_path)
        pathlib.Path(target_yml_name).parent.mkdir(parents=True, exist_ok=True)
        shutil.copyfile(config_file, target_yml_name)
        configs_path = ConfigUtil(os.path.dirname(config_file)).safe_load(is_render_variables=True,
                                                                          soft_merge=True)
        for config in configs_path:
            if config.get('secrets'):
                logging.info(f'Secret volume is being created')
                secret_util.create_local_secrets(config)


def is_file_empty(file):
    try:
        with open(file, "r") as stream:
            data_loaded = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)
    return data_loaded is None


def liminal_is_running():
    stdout, stderr = docker_compose_command('ps', [])
    return "liminal" in stdout


@cli.command("stop", short_help="stops the docker compose")
def stop():
    if docker_is_running() and liminal_is_running():
        # initialize liminal home by default
        environment.get_liminal_home()
        docker_compose_command('down', [])


@cli.command("logs", short_help="display the docker compose logs")
@click.option('--follow', '-f', is_flag=True, default=False, help="Follow log output.")
@click.option('--tail', '-t', default=0, type=int,
              help="Number of lines to show from the end of the log")
def logs(follow, tail):
    if docker_is_running() and liminal_is_running():
        # initialize liminal home by default
        environment.get_liminal_home()
        if follow:
            docker_compose_command('logs', ['--follow'])
        if tail > 0:
            stdout, stderr = docker_compose_command('logs', [f'--tail={tail}'])
            logging.info(stdout)


@cli.command("start",
             short_help="starts a local airflow in docker compose. should be run after deploy. " +
                        "Make sure docker is running on your machine")
@click.option('--detached-mode', '-d', is_flag=True, default=False,
              help="Start liminal in detached mode.")
def start(detached_mode):
    liminal_version = environment.get_liminal_version()
    logging.info(f'starting liminal version: {liminal_version}')
    if docker_is_running():
        # initialize liminal home by default
        environment.get_liminal_home()
        if detached_mode:
            docker_compose_command('up', ['-d'])
        else:
            docker_compose_command('up', [])


@cli.command("delete", short_help="delete liminal resource registration")
@click.option('--path', default=os.getcwd(), help='Delete within this path.')
@click.option('--clean', is_flag=True, default=False,
              help="Delete all resource: DAGs, Volumes")
def delete(path, clean):
    configs = ConfigUtil(path).safe_load(is_render_variables=True,
                                         soft_merge=True)
    for config in configs:
        if config.get('volumes'):
            logging.info(f'local volume is being deleted')
            volume_util.delete_local_volumes(config, os.path.dirname(
                files_util.resolve_pipeline_source_file(config['name'])))

    if clean:
        dir_path = os.path.abspath(environment.get_liminal_home())
        shutil.rmtree(dir_path)
        docker_compose_command('down', ['--remove-orphans', '--rmi', 'local'])


def get_docker_compose_paths():
    # noinspection PyTypeChecker
    with pkg_resources.path(scripts, 'docker-compose.yml') as p:
        docker_compose_path = p
    project_dir = pathlib.Path(docker_compose_path).parent.parent.absolute()
    return docker_compose_path, project_dir


if __name__ == '__main__':
    cli()


================================================
FILE: scripts/requirements-airflow.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

click==7.1.1
pyyaml
boto3==1.12.10
botocore==1.15.21
kubernetes
diskcache==3.1.1
flatdict==4.0.1
kafka-python==2.0.2
influxdb-client


================================================
FILE: scripts/webserver_config.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""Default configuration for the Airflow webserver"""
import os

from flask_appbuilder.security.manager import AUTH_DB

# from flask_appbuilder.security.manager import AUTH_LDAP
# from flask_appbuilder.security.manager import AUTH_OAUTH
# from flask_appbuilder.security.manager import AUTH_OID
# from flask_appbuilder.security.manager import AUTH_REMOTE_USER


basedir = os.path.abspath(os.path.dirname(__file__))

# Flask-WTF flag for CSRF
WTF_CSRF_ENABLED = True

# ----------------------------------------------------
# AUTHENTICATION CONFIG
# ----------------------------------------------------
# For details on how to set up each of the following authentication, see
# http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods
# for details.

# The authentication type
# AUTH_OID : Is for OpenID
# AUTH_DB : Is for database
# AUTH_LDAP : Is for LDAP
# AUTH_REMOTE_USER : Is for using REMOTE_USER from web server
# AUTH_OAUTH : Is for OAuth
AUTH_TYPE = AUTH_DB

# Uncomment to setup Full admin role name
# AUTH_ROLE_ADMIN = 'Admin'

# Uncomment to setup Public role name, no authentication needed
AUTH_ROLE_PUBLIC = 'Admin'

# Will allow user self registration
# AUTH_USER_REGISTRATION = True

# The recaptcha it's automatically enabled for user self registration is active and the keys are necessary
# RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY
# RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY

# Config for Flask-Mail necessary for user self registration
# MAIL_SERVER = 'smtp.gmail.com'
# MAIL_USE_TLS = True
# MAIL_USERNAME = 'yourappemail@gmail.com'
# MAIL_PASSWORD = 'passwordformail'
# MAIL_DEFAULT_SENDER = 'sender@gmail.com'

# The default user self registration role
# AUTH_USER_REGISTRATION_ROLE = "Public"

# When using OAuth Auth, uncomment to setup provider(s) info
# Google OAuth example:
# OAUTH_PROVIDERS = [{
#   'name':'google',
#     'token_key':'access_token',
#     'icon':'fa-google',
#         'remote_app': {
#             'api_base_url':'https://www.googleapis.com/oauth2/v2/',
#             'client_kwargs':{
#                 'scope': 'email profile'
#             },
#             'access_token_url':'https://accounts.google.com/o/oauth2/token',
#             'authorize_url':'https://accounts.google.com/o/oauth2/auth',
#             'request_token_url': None,
#             'client_id': GOOGLE_KEY,
#             'client_secret': GOOGLE_SECRET_KEY,
#         }
# }]

# When using LDAP Auth, setup the ldap server
# AUTH_LDAP_SERVER = "ldap://ldapserver.new"

# When using OpenID Auth, uncomment to setup OpenID providers.
# example for OpenID authentication
# OPENID_PROVIDERS = [
#    { 'name': 'Yahoo', 'url': 'https://me.yahoo.com' },
#    { 'name': 'AOL', 'url': 'http://openid.aol.com/<username>' },
#    { 'name': 'Flickr', 'url': 'http://www.flickr.com/<username>' },
#    { 'name': 'MyOpenID', 'url': 'https://www.myopenid.com' }]

# ----------------------------------------------------
# Theme CONFIG
# ----------------------------------------------------
# Flask App Builder comes up with a number of predefined themes
# that you can use for Apache Airflow.
# http://flask-appbuilder.readthedocs.io/en/latest/customizing.html#changing-themes
# Please make sure to remove "navbar_color" configuration from airflow.cfg
# in order to fully utilize the theme. (or use that property in conjunction with theme)
# APP_THEME = "bootstrap-theme.css"  # default bootstrap
# APP_THEME = "amelia.css"
# APP_THEME = "cerulean.css"
# APP_THEME = "cosmo.css"
# APP_THEME = "cyborg.css"
# APP_THEME = "darkly.css"
# APP_THEME = "flatly.css"
# APP_THEME = "journal.css"
# APP_THEME = "lumen.css"
# APP_THEME = "paper.css"
# APP_THEME = "readable.css"
# APP_THEME = "sandstone.css"
# APP_THEME = "simplex.css"
# APP_THEME = "slate.css"
# APP_THEME = "solar.css"
# APP_THEME = "spacelab.css"
# APP_THEME = "superhero.css"
# APP_THEME = "united.css"
# APP_THEME = "yeti.css"


================================================
FILE: setup.py
================================================
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os

import setuptools

with open("README.md") as fh:
    long_description = fh.read()

with open('requirements.txt') as f:
    requirements = f.read().splitlines()
    logging.info(requirements)

setuptools.setup(
    name="apache-liminal",
    version=os.environ.get("LIMINAL_BUILD_VERSION", os.environ.get('LIMINAL_VERSION', None)),
    author="dev@liminal.apache.org",
    author_email='dev@liminal.apache.org',
    description="A package for authoring and deploying machine learning workflows",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/apache/incubator-liminal",
    packages=setuptools.find_packages(),
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: Apache Software License",
        "Operating System :: OS Independent",
    ],
    license='Apache License, Version 2.0',
    python_requires='>=3.6',
    install_requires=requirements,
    scripts=['scripts/liminal'],
    include_package_data=True,
    package_data={
        'liminal': [
            '../DISCLAIMER',
            '../LICENSE',
            '../NOTICE',
            '../README.md',
            '../liminal/core/config/defaults/base/liminal.yml',
        ],
    },
)


================================================
FILE: tests/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/core/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/core/config/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/core/config/defaults/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/core/config/defaults/test_apply_variables_substitution.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase

from liminal.core.config.defaults import default_configs


class TestApplyVariablesSubstitution(TestCase):
    def test_apply(self):
        subliminal = {'variables': {'one': 'one_value', 'two': 'two_value'}}

        superliminal = {'variables': {'three': 'three_value', 'two': 'two_super_value'}}

        expected = {'variables': {'one': 'one_value', 'two': 'two_value', 'three': 'three_value'}}
        self.assertEqual(expected, default_configs.apply_variable_substitution(subliminal, superliminal))

    def test_apply_superliminal_is_missing_variables(self):
        subliminal = {'variables': {'one': 'one_value', 'two': 'two_value'}}

        superliminal = {}

        expected = {'variables': {'one': 'one_value', 'two': 'two_value'}}
        self.assertEqual(expected, default_configs.apply_variable_substitution(subliminal, superliminal))

    def test_apply_subliminal_is_missing_variables(self):
        subliminal = {}

        superliminal = {'variables': {'one': 'one_value', 'two': 'two_value'}}

        expected = {'variables': {'one': 'one_value', 'two': 'two_value'}}
        self.assertEqual(expected, default_configs.apply_variable_substitution(subliminal, superliminal))


================================================
FILE: tests/liminal/core/config/defaults/test_defaults_pipeline_config.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase

from liminal.core.config.defaults import default_configs


class TestDefaultsPipelineConfig(TestCase):
    def test_apply(self):
        pipeline = {"name": "mypipe", "param": "constant", "tasks": [{"task": "middle_task", "type": "python"}]}

        subliminal = {
            "name": "my_subliminal_test",
            "pipelines": [
                {
                    "name": "mypipe",
                    "param": "constant",
                    "tasks": [{"task": "middle_task", "type": "python", "env_vars": {"env1": "env1"}}],
                }
            ],
            "pipeline_defaults": {"param1": "param1_value"},
        }
        superliminal = {
            "pipeline_defaults": {
                "param2": "param2super_value",
                "param3": "param3super_value",
                "before_tasks": [{"task": "first_task", "type": "python"}],
                "after_tasks": [{"task": "end_task", "type": "python"}],
            },
            "task_defaults": {
                "python": {
                    "default_task_super": "default_task_super_value",
                    "env_vars": {"env1": "env1super", "env2": "env2super"},
                }
            },
        }
        expected = {
            "param": "constant",
            "param3": "param3super_value",
            "param2": "param2super_value",
            "tasks": [
                {
                    "type": "python",
                    "env_vars": {"env1": "env1super", "env2": "env2super"},
                    "task": "first_task",
                    "default_task_super": "default_task_super_value",
                },
                {
                    "type": "python",
                    "env_vars": {"env1": "env1super", "env2": "env2super"},
                    "task": "middle_task",
                    "default_task_super": "default_task_super_value",
                },
                {
                    "type": "python",
                    "env_vars": {"env1": "env1super", "env2": "env2super"},
                    "task": "end_task",
                    "default_task_super": "default_task_super_value",
                },
            ],
            "name": "mypipe",
            "param1": "param1_value",
        }

        self.assertEqual(expected, default_configs.apply_pipeline_defaults(subliminal, superliminal, pipeline))


================================================
FILE: tests/liminal/core/config/defaults/test_defaults_service_config.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase

from liminal.core.config.defaults import default_configs


class TestDefaultsServiceConfig(TestCase):
    def test_apply(self):
        subliminal = {
            "services": [
                {"name": "my_python_server", "type": "python_server", "image": "default_image"},
                {"name": "my_python_server_for_stg", "type": "python_server", "image": "test_image"},
            ]
        }

        superliminal = {"service_defaults": {"param": "param1", "param2": "param2"}}

        expected = [
            {
                'image': 'default_image',
                'name': 'my_python_server',
                'param': 'param1',
                'param2': 'param2',
                'type': 'python_server',
            },
            {
                'image': 'test_image',
                'name': 'my_python_server_for_stg',
                'param': 'param1',
                'param2': 'param2',
                'type': 'python_server',
            },
        ]

        self.assertEqual(expected, default_configs.apply_service_defaults(subliminal, superliminal))


================================================
FILE: tests/liminal/core/config/defaults/test_defaults_tasks_config.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase

from liminal.core.config.defaults import default_configs


class TestDefaultsTaskConfig(TestCase):
    def test_apply(self):
        pipeline = {
            'pipeline': 'mypipe',
            "tasks": [
                {"task": "middle", "type": "spark", "task_param": "task_middle_param"},
                {"task": "end", "type": "python", "task_param": "task_end_param"},
            ],
        }

        subliminal = {'pipelines': [pipeline]}

        superliminal = {
            "task_defaults": {
                "python": {"env_vars": {"env2": "env2value"}},
                "spark": {"task_param": "task_spark_param", "executor": "emr"},
            }
        }

        expected = {
            'pipeline': 'mypipe',
            'tasks': [
                {'env_vars': {'env1': 'env1value', 'env2': 'env2value'}, 'task': 'start', 'type': 'python'},
                {'executor': 'emr', 'task': 'middle', 'task_param': 'task_middle_param', 'type': 'spark'},
                {'env_vars': {'env2': 'env2value'}, 'task': 'end', 'task_param': 'task_end_param', 'type': 'python'},
            ],
        }
        self.assertEqual(
            expected,
            default_configs.apply_task_defaults(
                subliminal,
                superliminal,
                pipeline=pipeline,
                superliminal_before_tasks=[{"task": "start", "type": "python", "env_vars": {"env1": "env1value"}}],
                superliminal_after_tasks=[],
            ),
        )

    def test_missing_tasks_from_supr(self):
        pipeline = {
            'pipeline': 'mypipe',
            "tasks": [
                {"task": "middle", "type": "spark", "task_param": "task_middle_param"},
                {"task": "end", "type": "python", "task_param": "task_end_param"},
            ],
        }

        subliminal = {'pipelines': [pipeline]}

        superliminal = {
            "task_defaults": {
                "python": {"env_vars": {"env2": "env2value"}},
                "spark": {"task_param": "task_spark_param"},
            }
        }

        expected = {
            'pipeline': 'mypipe',
            'tasks': [
                {'task': 'middle', 'task_param': 'task_middle_param', 'type': 'spark'},
                {'env_vars': {'env2': 'env2value'}, 'task': 'end', 'task_param': 'task_end_param', 'type': 'python'},
            ],
        }

        self.assertEqual(
            expected,
            default_configs.apply_task_defaults(
                subliminal, superliminal, pipeline=pipeline, superliminal_before_tasks=[], superliminal_after_tasks=[]
            ),
        )

    def test_missing_tasks_from_sub(self):
        pipeline = {'pipeline': 'mypipe'}

        subliminal = {'pipelines': [pipeline]}

        superliminal = {
            "task_defaults": {
                "python": {"env_vars": {"env2": "env2value"}},
                "spark": {
                    "task_param": "task_start_param",
                },
            }
        }

        expected = {
            'pipeline': 'mypipe',
            'tasks': [
                {'task': 'start', 'task_param': 'task_middle_param', 'type': 'spark'},
                {'env_vars': {'env2': 'env2value'}, 'task': 'end', 'task_param': 'task_end_param', 'type': 'end'},
            ],
        }

        self.assertEqual(
            expected,
            default_configs.apply_task_defaults(
                subliminal,
                superliminal,
                pipeline=pipeline,
                superliminal_before_tasks=[{"task": "start", "task_param": "task_middle_param", "type": "spark"}],
                superliminal_after_tasks=[
                    {"env_vars": {"env2": "env2value"}, "task": "end", "task_param": "task_end_param", "type": "end"}
                ],
            ),
        )


================================================
FILE: tests/liminal/core/config/test_config.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
from unittest import TestCase, mock

from liminal.core.config.config import ConfigUtil


# noinspection PyUnresolvedReferences,DuplicatedCode
class TestConfigUtil(TestCase):
    @mock.patch('liminal.core.util.files_util.load')
    def test_safe_load(self, find_config_files_mock):
        subliminal = {
            'name': 'my_subliminal_test',
            'type': 'sub',
            'super': 'my_superliminal_test',
            'images': [{'image': 'my_image'}],
            'pipelines': [{'name': 'mypipe1', 'param': 'constant'}, {'name': 'mypipe2', 'param': 'constant'}],
            'pipeline_defaults': {'param1': 'param1_value'},
            'task_defaults': {'job_start': {'task_sub_def': 'task_sub_def_value'}},
        }
        superliminal = {
            'name': 'my_superliminal_test',
            'type': 'super',
            'super': 'super_superliminal',
            'images': [{'image': 'my_image', 'source': '.'}],
            'pipeline_defaults': {'param2': 'param2super_value', 'param3': 'param3super_value'},
            'task_defaults': {
                'job_start': {
                    'task_def1': 'task_def1_value',
                    'task_def2': {
                        'task_def2_1': 'task_def2_1_value',
                    },
                }
            },
        }
        super_superliminal = {
            'name': 'super_superliminal',
            'type': 'super',
            'pipeline_defaults': {
                'param2': 'param2super_value',
                'param3': 'param3hyper_value',
                'param4': 'param4hyper_value',
            },
        }

        expected = [
            {
                'executors': [{'executor': 'default_k8s', 'type': 'kubernetes'}],
                'name': 'my_subliminal_test',
                'pipeline_defaults': {'param1': 'param1_value'},
                'pipelines': [
                    {
                        'description': 'add defaults parameters for all pipelines',
                        'name': 'mypipe1',
                        'param': 'constant',
                        'param1': 'param1_value',
                        'param2': 'param2super_value',
                        'param3': 'param3super_value',
                        'param4': 'param4hyper_value',
                        'tasks': [
                            {
                                'task': 'start',
                                'task_def1': 'task_def1_value',
                                'task_def2': {'task_def2_1': 'task_def2_1_value'},
                                'task_sub_def': 'task_sub_def_value',
                                'type': 'job_start',
                            },
                            {'task': 'end', 'type': 'job_end'},
                        ],
                    },
                    {
                        'description': 'add defaults parameters for all pipelines',
                        'name': 'mypipe2',
                        'param': 'constant',
                        'param1': 'param1_value',
                        'param2': 'param2super_value',
                        'param3': 'param3super_value',
                        'param4': 'param4hyper_value',
                        'tasks': [
                            {
                                'task': 'start',
                                'task_def1': 'task_def1_value',
                                'task_def2': {'task_def2_1': 'task_def2_1_value'},
                                'task_sub_def': 'task_sub_def_value',
                                'type': 'job_start',
                            },
                            {'task': 'end', 'type': 'job_end'},
                        ],
                    },
                ],
                'service_defaults': {'description': 'add defaults parameters for all ' 'services'},
                'images': [{'image': 'my_image', 'source': '.'}],
                'services': [],
                'super': 'my_superliminal_test',
                'task_defaults': {'job_start': {'task_sub_def': 'task_sub_def_value'}},
                'type': 'sub',
            }
        ]

        expected = [
            {
                'executors': [
                    {'executor': 'default_k8s', 'type': 'kubernetes'},
                    {'executor': 'airflow_executor', 'type': 'airflow'},
                ],
                'images': [{'image': 'my_image', 'source': '.'}],
                'name': 'my_subliminal_test',
                'pipeline_defaults': {'param1': 'param1_value'},
                'pipelines': [
                    {
                        'description': 'add defaults parameters for all pipelines',
                        'name': 'mypipe1',
                        'param': 'constant',
                        'param1': 'param1_value',
                        'param2': 'param2super_value',
                        'param3': 'param3super_value',
                        'param4': 'param4hyper_value',
                        'tasks': [
                            {
                                'executor': 'airflow_executor',
                                'task': 'start',
                                'task_def1': 'task_def1_value',
                                'task_def2': {'task_def2_1': 'task_def2_1_value'},
                                'task_sub_def': 'task_sub_def_value',
                                'type': 'job_start',
                            },
                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},
                        ],
                    },
                    {
                        'description': 'add defaults parameters for all pipelines',
                        'name': 'mypipe2',
                        'param': 'constant',
                        'param1': 'param1_value',
                        'param2': 'param2super_value',
                        'param3': 'param3super_value',
                        'param4': 'param4hyper_value',
                        'tasks': [
                            {
                                'executor': 'airflow_executor',
                                'task': 'start',
                                'task_def1': 'task_def1_value',
                                'task_def2': {'task_def2_1': 'task_def2_1_value'},
                                'task_sub_def': 'task_sub_def_value',
                                'type': 'job_start',
                            },
                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},
                        ],
                    },
                ],
                'service_defaults': {'description': 'add defaults parameters for all ' 'services'},
                'services': [],
                'super': 'my_superliminal_test',
                'task_defaults': {'job_start': {'task_sub_def': 'task_sub_def_value'}},
                'type': 'sub',
            }
        ]

        find_config_files_mock.return_value = {
            'my_subliminal_test': subliminal,
            'my_superliminal_test': superliminal,
            'super_superliminal': super_superliminal,
        }

        config_util = ConfigUtil('')

        self.assertEqual(expected, config_util.safe_load(is_render_variables=True))

        # validate cache
        self.assertEqual(expected, config_util.loaded_subliminals)

    @mock.patch('liminal.core.util.files_util.load')
    def test_get_config(self, find_config_files_mock):
        find_config_files_mock.return_value = {
            'my_subliminal_test': {'type': 'sub'},
            'my_superliminal_test': {'type': 'super'},
        }

        config_util = ConfigUtil('')

        self.assertEqual({'type': 'sub'}, config_util._ConfigUtil__get_config('my_subliminal_test'))

        self.assertEqual({'type': 'super'}, config_util._ConfigUtil__get_config('my_superliminal_test'))

    @mock.patch('liminal.core.util.files_util.load')
    def test_get_superliminal(self, find_config_files_mock):
        base = {
            'executors': [
                {'executor': 'default_k8s', 'type': 'kubernetes'},
                {'executor': 'airflow_executor', 'type': 'airflow'},
            ],
            'name': 'base',
            'pipeline_defaults': {
                'after_tasks': [{'task': 'end', 'type': 'job_end'}],
                'before_tasks': [{'task': 'start', 'type': 'job_start'}],
                'description': 'add defaults parameters for all ' 'pipelines',
            },
            'service_defaults': {'description': 'add defaults parameters for all ' 'services'},
            'task_defaults': {
                'description': 'add defaults parameters for all tasks ' 'separate by task type',
                'job_end': {'executor': 'airflow_executor'},
                'job_start': {'executor': 'airflow_executor'},
                'python': {'executor': 'default_k8s'},
                'spark': {'executor': 'default_k8s'},
            },
            'type': 'super',
        }
        subliminal = {'name': 'subliminal_test', 'type': 'sub'}

        find_config_files_mock.return_value = {'subliminal_test': subliminal}

        config_util = ConfigUtil('')

        self.assertEqual(base, config_util._ConfigUtil__get_superliminal(subliminal, False))

        self.assertEqual({}, config_util._ConfigUtil__get_superliminal(base, False))

        liminal = {'name': 'subliminal_test', 'type': 'sub', 'super': 'my_superliminal'}

        with self.assertRaises(FileNotFoundError):
            config_util._ConfigUtil__get_superliminal(liminal, False)

    @mock.patch('liminal.core.util.files_util.load')
    def test_merge_superliminals(self, find_config_files_mock):
        superliminal = {
            'name': 'my_superliminal_test',
            'type': 'super',
            'super': 'super_superliminal',
            'pipeline_defaults': {
                'before_tasks': [
                    {'task': 'start-2', 'type': 'spark'},
                ],
                'after_tasks': [{'task': 'end-1', 'type': 'spark'}],
            },
            'task_defaults': {'task_def1': 'task_def1_value'},
        }

        super_superliminal = {
            'name': 'super_superliminal',
            'type': 'super',
            'pipeline_defaults': {
                'before_tasks': [{'task': 'start-1', 'type': 'spark'}],
                'after_tasks': [{'task': 'end-2', 'type': 'spark'}],
            },
        }

        config_util = ConfigUtil('')

        find_config_files_mock.return_value = {'super_superliminal': super_superliminal, 'superliminal': superliminal}

        expected = {
            'name': 'my_superliminal_test',
            'pipeline_defaults': {
                'after_tasks': [{'task': 'end-1', 'type': 'spark'}, {'task': 'end-2', 'type': 'spark'}],
                'before_tasks': [{'task': 'start-1', 'type': 'spark'}, {'task': 'start-2', 'type': 'spark'}],
            },
            'super': 'super_superliminal',
            'task_defaults': {'task_def1': 'task_def1_value'},
            'type': 'super',
        }

        self.assertEqual(
            expected, dict(config_util._ConfigUtil__merge_superliminals(superliminal, super_superliminal))
        )

    @mock.patch('liminal.core.util.files_util.load')
    @mock.patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True'})
    def test_safe_load_with_variables(self, find_config_files_mock):
        subliminal = {
            'name': 'my_subliminal_test',
            'type': 'sub',
            'super': 'my_superliminal_test',
            'variables': {
                'var': 'simple case',
                'var-2': '-case',
                'var_2': '_case',
                'image': 'prod image',
                'a': '{{env}}1',
                'b': '{{a}}2',
                'c': '{{a}}{{b}}2',
            },
            'pipelines': [
                {
                    'name': 'mypipe1',
                    'param': '{{var}}',
                    'tasks': [
                        {'task': 'sub_tasks', 'type': 'dummy'},
                    ],
                },
                {
                    'name': 'mypipe2',
                    'param': '{{var-2   }}',
                    'tasks': [
                        {'task': 'sub_tasks', 'type': 'dummy'},
                    ],
                },
            ],
            'pipeline_defaults': {'param1': '{{var-2}}'},
            'task_defaults': {'job_start': {'task_def1:': 'task_sub_def_value'}},
            'services': [
                {'name': 'my_python_server', 'type': 'python_server', 'image': '{{image}}'},
                {'name': 'my_python_server_for_stg', 'type': 'python_server', 'image': '{{default_image}}'},
            ],
        }

        superliminal = {
            'name': 'my_superliminal_test',
            'type': 'super',
            'variables': {
                'var-2': 'override',
                'var3': 'super_var',
                'default_image': 'default_image_value',
                'image': 'default_image_value',
            },
            'super': 'super_superliminal',
            'pipeline_defaults': {
                'param2': '{{pipe-var}}',
                'param3': 'param3super_value',
                'before_tasks': [
                    {'task': 'second_task', 'type': 'dummy'},
                ],
            },
            'task_defaults': {
                'pipeline': {
                    'path': '{{var-2}}',
                    'task_def1': 'task_def1_value',
                    'task_def2': {
                        'task_def2_1': 'task_def2_1_value',
                    },
                }
            },
        }
        super_superliminal = {
            'name': 'super_superliminal',
            'type': 'super',
            'variables': {'default_image': 'def_default_image_value'},
            'pipeline_defaults': {
                'global_conf': '{{var3}}',
                'param2': 'param2super_value',
                'param3': 'param3hyper_value',
                'param4': 'param4hyper_value',
                'after_tasks': [
                    {'task': 'before_last_task', 'type': 'dummy'},
                ],
            },
        }

        expected = [
            {
                'executors': [
                    {'executor': 'default_k8s', 'type': 'kubernetes'},
                    {'executor': 'airflow_executor', 'type': 'airflow'},
                ],
                'name': 'my_subliminal_test',
                'pipeline_defaults': {'param1': '-case'},
                'pipelines': [
                    {
                        'description': 'add defaults parameters for all pipelines',
                        'global_conf': 'super_var',
                        'name': 'mypipe1',
                        'param': 'simple case',
                        'param1': '-case',
                        'param2': '{{pipe-var}}',
                        'param3': 'param3super_value',
                        'param4': 'param4hyper_value',
                        'tasks': [
                            {
                                'executor': 'airflow_executor',
                                'task': 'start',
                                'task_def1:': 'task_sub_def_value',
                                'type': 'job_start',
                            },
                            {'task': 'second_task', 'type': 'dummy'},
                            {'task': 'sub_tasks', 'type': 'dummy'},
                            {'task': 'before_last_task', 'type': 'dummy'},
                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},
                        ],
                    },
                    {
                        'description': 'add defaults parameters for all pipelines',
                        'global_conf': 'super_var',
                        'name': 'mypipe2',
                        'param': '-case',
                        'param1': '-case',
                        'param2': '{{pipe-var}}',
                        'param3': 'param3super_value',
                        'param4': 'param4hyper_value',
                        'tasks': [
                            {
                                'executor': 'airflow_executor',
                                'task': 'start',
                                'task_def1:': 'task_sub_def_value',
                                'type': 'job_start',
                            },
                            {'task': 'second_task', 'type': 'dummy'},
                            {'task': 'sub_tasks', 'type': 'dummy'},
                            {'task': 'before_last_task', 'type': 'dummy'},
                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},
                        ],
                    },
                ],
                'service_defaults': {'description': 'add defaults parameters for all ' 'services'},
                'images': [],
                'services': [
                    {
                        'description': 'add defaults parameters for all services',
                        'image': 'prod image',
                        'name': 'my_python_server',
                        'type': 'python_server',
                    },
                    {
                        'description': 'add defaults parameters for all services',
                        'image': 'default_image_value',
                        'name': 'my_python_server_for_stg',
                        'type': 'python_server',
                    },
                ],
                'super': 'my_superliminal_test',
                'task_defaults': {'job_start': {'task_def1:': 'task_sub_def_value'}},
                'type': 'sub',
                'variables': {
                    'a': 'myenv1',
                    'b': 'myenv12',
                    'c': 'myenv1myenv122',
                    'image': 'prod image',
                    'var': 'simple case',
                    'var-2': '-case',
                    'var_2': '_case',
                },
            }
        ]

        find_config_files_mock.return_value = {
            'my_subliminal_test': subliminal,
            'my_superliminal_test': superliminal,
            'super_superliminal': super_superliminal,
        }

        config_util = ConfigUtil('')

        self.assertEqual(expected, config_util.safe_load(is_render_variables=True))

        # validate cache
        self.assertEqual(expected, config_util.loaded_subliminals)

    @mock.patch('os.path.exists')
    @mock.patch('liminal.core.environment.get_airflow_home_dir')
    @mock.patch('liminal.core.util.files_util.load')
    @mock.patch.dict(os.environ, {'LIMINAL_STAND_ALONE_MODE': 'True', 'POD_NAMESPACE': 'my_pod_ns'})
    def test_liminal_config_snapshot(self, find_config_files_mock, get_airflow_dir_mock, path_exists_mock):
        subliminal = {
            'name': 'my_subliminal_test',
            'type': 'sub',
            'variables': {'var': 1, 'var-2': True},
            'pipelines': [{'name': 'mypipe1', 'param': '{{var}}'}, {'name': 'mypipe2', 'param': '{{var-2   }}'}],
        }

        expected = {
            'name': 'my_subliminal_test',
            'type': 'sub',
            'executors': [
                {'executor': 'default_k8s', 'type': 'kubernetes'},
                {'executor': 'airflow_executor', 'type': 'airflow'},
            ],
            'service_defaults': {'description': 'add defaults parameters for all services'},
            'task_defaults': {
                'description': 'add defaults parameters for all tasks separate by task type',
                'python': {'executor': 'default_k8s'},
                'spark': {'executor': 'default_k8s'},
                'job_end': {'executor': 'airflow_executor'},
                'job_start': {'executor': 'airflow_executor'},
            },
            'pipeline_defaults': {
                'description': 'add defaults parameters for all pipelines',
                'before_tasks': [{'task': 'start', 'type': 'job_start'}],
                'after_tasks': [{'task': 'end', 'type': 'job_end'}],
            },
            'variables': {'var': 1, 'var-2': True},
            'pipelines': [
                {
                    'name': 'mypipe1',
                    'param': '1',
                    'description': 'add defaults parameters for all pipelines',
                    'tasks': [
                        {'task': 'start', 'type': 'job_start', 'executor': 'airflow_executor'},
                        {'task': 'end', 'type': 'job_end', 'executor': 'airflow_executor'},
                    ],
                },
                {
                    'name': 'mypipe2',
                    'param': 'True',
                    'description': 'add defaults parameters for all pipelines',
                    'tasks': [
                        {'task': 'start', 'type': 'job_start', 'executor': 'airflow_executor'},
                        {'task': 'end', 'type': 'job_end', 'executor': 'airflow_executor'},
                    ],
                },
            ],
            'images': [],
            'services': [],
        }

        find_config_files_mock.return_value = {'my_subliminal_test': subliminal}

        get_airflow_dir_mock.return_value = '/tmp'
        path_exists_mock.return_value = True

        with mock.patch('builtins.open', mock.mock_open()) as m:
            with mock.patch('yaml.dump') as ydm:
                config_util = ConfigUtil('')
                config_util.safe_load(is_render_variables=True)
                config_util.snapshot_final_liminal_configs()

                m.assert_called_once_with(os.path.join('/tmp', '../liminal_config_files/my_subliminal_test.yml'), 'w')
                ydm.assert_called_once_with(expected, m.return_value, default_flow_style=False)

    @mock.patch('liminal.core.util.files_util.load')
    def test_soft_merge_load(self, find_config_files_mock):
        subliminal = {'name': 'my_name', 'type': 'sub', 'super': 'my_super'}
        find_config_files_mock.return_value = {'my_subliminal_test': subliminal}

        config_util = ConfigUtil('')

        self.assertEqual([subliminal], config_util.safe_load(is_render_variables=True, soft_merge=True))

    def test_non_soft_merge_load(self):
        subliminal = {'name': 'my_name', 'type': 'sub', 'super': 'my_super'}

        config_util = ConfigUtil('')

        self.assertRaises(FileNotFoundError, config_util._ConfigUtil__get_superliminal, subliminal, False)


================================================
FILE: tests/liminal/core/util/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/core/util/test_dict_utils.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
from unittest import TestCase, mock

from liminal.core.util import dict_util


class TestDictUtils(TestCase):
    def setUp(self) -> None:
        self.dict1 = {"env": "env1", "env_dict": {"env3": "env3"}, "env4": "env4"}

        self.dict2 = {"env": "env1", "env_dict": {"env2": "env2"}}

    def test_merge_dicts(self):
        expected = {"env": "env1", "env4": "env4", "env_dict": {"env2": "env2"}}

        self.assertEqual(expected, dict_util.merge_dicts(self.dict1, self.dict2))

    def test_recursive_merge_dicts(self):
        expected = {"env": "env1", "env4": "env4", "env_dict": {"env2": "env2", "env3": "env3"}}
        self.assertEqual(expected, dict_util.merge_dicts(self.dict1, self.dict2, True))

    def test_merge_with_empty(self):
        self.assertEqual(self.dict2, dict_util.merge_dicts({}, self.dict2, True))
        self.assertEqual(self.dict2, dict_util.merge_dicts({}, self.dict2))

        self.assertEqual(self.dict2, dict_util.merge_dicts(self.dict2, {}, True))
        self.assertEqual(self.dict2, dict_util.merge_dicts(self.dict2, {}))

    def test_replace_variables_simple_case(self):
        dct = {
            "env": "{{env_var}}",
            "env_dict": {"env3": "{{ var1 }}"},
            "env4": "{{var2 }}",
            "env5": "{{{var2}}",
            "env6": "{{var3}}",
        }

        variables = {"env_var": "env value", "var1": "value1", "var2": "value2"}

        expected = {
            "env": "env value",
            "env_dict": {"env3": "value1"},
            "env4": "value2",
            "env5": "{value2",
            "env6": "{{var3}}",
        }

        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))

    def test_replace_variables_empty_var(self):
        dct = {
            "env": "{{env_var}}",
            "env_dict": {"env3": "{{ var1 }}"},
            "env4": "{{var2 }}",
            "env5": "{{{var2}}",
            "env6": "{{var3}}",
        }
        self.assertEqual(dct, dict_util.replace_placeholders(dct, {}))

    @mock.patch.dict(os.environ, {"LIMINAL_STAND_ALONE_MODE": "False"})
    @mock.patch('airflow.models.Variable.get')
    def test_replace_variables_flat_replace(self, airflow_variable_mock):
        def airflow_variable_values(key, default_var):
            return 'liminal playground' if key == 'playground' else default_var

        airflow_variable_mock.side_effect = airflow_variable_values

        dct = {
            "query": "select * from my_table " "where event_type = {{event_type}} and region = {{region}}",
            "env": "{{prod}}, {{stg}}, {{playground}}",
            "optional": "{{optionals}}",
        }

        variables = {
            "region": "us_east_1",
            "event_type": "subscription",
            "prod": "liminal production",
            "stg": "liminal staging",
        }

        expected = {
            'query': 'select * from my_table ' 'where event_type = subscription and region = us_east_1',
            "env": "liminal production, liminal staging, liminal playground",
            "optional": "{{optionals}}",
        }

        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))

    @mock.patch.dict(os.environ, {"LIMINAL_STAND_ALONE_MODE": "False"})
    @mock.patch('airflow.models.Variable.get')
    def test_replace_variables_with_nested_list(self, airflow_variable_mock):
        def airflow_variable_values(key, default_var):
            return 'liminal playground' if key == 'playground' else default_var

        airflow_variable_mock.side_effect = airflow_variable_values

        dct = {
            "query": "select * from my_table " "where event_type = {{event_type}} and region = {{region}}",
            "env": ['{{prod}}', '{{stg}}', '{{playground}}'],
            "tasks": [{'id': 'id1', 'image': '{{image}}'}],
            "optional": "{{optionals}}",
        }

        variables = {
            "region": "us_east_1",
            "event_type": "subscription",
            "prod": "liminal production",
            "stg": "liminal staging",
            "image": "my_image_name",
        }

        expected = {
            'env': ['liminal production', 'liminal staging', 'liminal playground'],
            'optional': '{{optionals}}',
            'query': 'select * from my_table where event_type = subscription and region = ' 'us_east_1',
            'tasks': [{'id': 'id1', 'image': 'my_image_name'}],
        }

        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))

    @mock.patch.dict(os.environ, {"table": "my_table", "LIMINAL_STAND_ALONE_MODE": "True"})
    def test_replace_variables_from_env(self):
        dct = {
            "query": "select * from my_table "
            "where event_type = {{event_type}} and region = {{region}} from {{table}}"
        }

        variables = {}

        expected = {
            'query': 'select * from my_table '
            'where event_type = {{event_type}} '
            'and region = {{region}} from my_table'
        }

        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))

    @mock.patch.dict(os.environ, {"table": "my_table", "LIMINAL_STAND_ALONE_MODE": "True"})
    def test_replace_variables_from_variable_and_not_env(self):
        dct = {
            "query": "select * from my_table "
            "where event_type = {{event_type}} and region = {{region}} from {{table}}"
        }

        variables = {"table": "my_variable_table"}

        expected = {
            'query': 'select * from my_table '
            'where event_type = {{event_type}} '
            'and region = {{region}} from my_variable_table'
        }

        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))

    @mock.patch.dict(os.environ, {"table": "my_table", "LIMINAL_STAND_ALONE_MODE": "False"})
    @mock.patch('airflow.models.Variable.get')
    def test_replace_variables_from_airflow_and_not_enc(self, airflow_variable_mock):
        def airflow_variable_values(key, default_var):
            return 'my_airflow_table' if key == 'table' else default_var

        airflow_variable_mock.side_effect = airflow_variable_values

        dct = {
            "query": "select * from my_table "
            "where event_type = {{event_type}} and region = {{region}} from {{table}}"
        }

        variables = {}

        expected = {
            'query': 'select * from my_table '
            'where event_type = {{event_type}} '
            'and region = {{region}} from my_airflow_table'
        }

        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))


================================================
FILE: tests/liminal/kubernetes/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/liminal/kubernetes/test_volume_util.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import sys
import unittest

from kubernetes import config

from liminal.kubernetes import volume_util

try:
    config.load_kube_config()
except Exception:
    msg = "Kubernetes is not running\n"
    sys.stdout.write(f"INFO: {msg}")


class TestKubernetesVolume(unittest.TestCase):
    def setUp(self) -> None:
        self.config = {
            'volumes': [
                {
                    'volume': 'gettingstartedvol-test',
                    'claim_name': 'gettingstartedvol-test-pvc',
                    'local': {'path': '.'},
                }
            ]
        }

    def test_volume_config(self):
        volumes_config = volume_util.get_volume_configs(self.config, ".")
        self.assertEqual(str(self.config['volumes'][0]), str(volumes_config[0]))

    def test_create_volume(self):
        self._delete_volumes()
        self._create_volumes()
        matching_volumes = volume_util._list_persistent_volumes(self.config['volumes'][0]['volume'])
        self.assertTrue(
            self.config['volumes'][0]['volume'] in matching_volumes[0]['metadata']['name'],
            self.config['volumes'][0]['claim_name'] in matching_volumes[0]['spec']['claim_ref']['name'],
        )

    def test_delete_volume(self):
        self._create_volumes()
        self._delete_volumes()
        matching_volumes = volume_util._list_persistent_volumes(self.config['volumes'][0]['volume'])
        self.assertEqual([], matching_volumes)

    def _create_volumes(self):
        volume_util.create_local_volumes(self.config, ".")

    def _delete_volumes(self):
        volume_util.delete_local_volumes(self.config, ".")


================================================
FILE: tests/runners/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/build/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/build/http/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/build/http/python/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/build/http/python/test_python_server_image_builder.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
import logging
import os
import threading
import time
import unittest
import urllib.request
from unittest import TestCase

import docker

from liminal.build.image.python_server.python_server import PythonServerImageBuilder
from liminal.build.python import PythonImageVersions

IMAGE_NAME = 'liminal_server_image'


class TestPythonServer(TestCase):
    def setUp(self) -> None:
        super().setUp()
        self.docker_client = docker.from_env()
        self.config = self.__create_conf()
        self.__remove_containers()

    def tearDown(self) -> None:
        self.__remove_containers()
        self.docker_client.close()

    def test_build_python_server(self):
        versions = list(PythonImageVersions().supported_versions)
        for version in versions:
            build_out = self.__test_build_python_server(python_version=version)
            self.assertTrue('RUN pip install -r requirements.txt' in build_out, 'Incorrect pip command')

    def test_build_python_server_with_pip_conf(self):
        build_out = self.__test_build_python_server(use_pip_conf=True)

        self.assertTrue(
            'RUN --mount=type=secret,id=pip_config,dst=/etc/pip.conf  pip install' in build_out,
            'Incorrect pip command',
        )

    def __test_build_python_server(self, use_pip_conf=False, python_version=None):
        base_path = os.path.join(os.path.dirname(__file__), '../../../liminal')

        config = self.__create_conf()

        if use_pip_conf:
            config['pip_conf'] = os.path.join(base_path, 'pip.conf')

        if python_version:
            config['python_version'] = python_version

        builder = PythonServerImageBuilder(
            config=config, base_path=base_path, relative_source_path='myserver', tag=IMAGE_NAME
        )

        build_out = str(builder.build())

        thread = threading.Thread(target=self.__run_container)
        thread.start()

        time.sleep(20)

        logging.info('Sending request to server')

        json_string = '{"key1": "val1", "key2": "val2"}'

        encoding = 'ascii'

        server_response = str(
            urllib.request.urlopen('http://localhost:9294/myendpoint1', data=json_string.encode(encoding))
            .read()
            .decode(encoding)
        )

        logging.info(f'Response from server: {server_response}')

        self.assertEqual(f'Input was: {json.loads(json_string)}', server_response)

        server_favicon_response = urllib.request.urlopen('http://localhost:9294/favicon.ico')

        self.assertEqual(204, server_favicon_response.status)

        self.__remove_containers()

        return build_out

    def __remove_containers(self):
        logging.info(f'Stopping containers with image: {IMAGE_NAME}')

        matching_containers = self.__get_docker_containers()

        for container in matching_containers:
            container_id = container.id
            logging.info(f'Stopping container {container_id}')
            self.docker_client.api.stop(container_id)
            logging.info(f'Removing container {container_id}')
            self.docker_client.api.remove_container(container_id)

        while len(matching_containers) > 0:
            matching_containers = self.__get_docker_containers()

    def __get_docker_containers(self):
        return self.docker_client.containers.list(filters={'ancestor': IMAGE_NAME})

    def __run_container(self):
        try:
            logging.info(f'Running container for image: {IMAGE_NAME}')
            self.docker_client.containers.run(IMAGE_NAME, ports={'80/tcp': 9294}, detach=True)
        except Exception as err:
            logging.exception(err)
            pass

    @staticmethod
    def __create_conf():
        return {
            'image': IMAGE_NAME,
            'cmd': 'foo bar',
            'source': 'baz',
            'input_type': 'my_input_type',
            'input_path': 'my_input',
            'output_path': '/my_output.json',
            'no_cache': True,
            'endpoints': [{'endpoint': '/myendpoint1', 'module': 'my_server', 'function': 'myendpoint1func'}],
        }


if __name__ == '__main__':
    unittest.main()


================================================
FILE: tests/runners/airflow/build/python/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/build/python/test_python_image_builder.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import shutil
import tempfile
from unittest import TestCase

import docker

from liminal.build.image.python.python import PythonImageBuilder
from liminal.build.python import PythonImageVersions


class TestPythonImageBuilder(TestCase):
    __IMAGE_NAME = 'my_python_task_img'

    def setUp(self) -> None:
        super().setUp()
        os.environ['TMPDIR'] = '/tmp'
        self.temp_dir = self.__temp_dir()
        self.temp_airflow_dir = self.__temp_dir()

    def tearDown(self) -> None:
        super().tearDown()
        self.__remove_dir(self.temp_dir)
        self.__remove_dir(self.temp_airflow_dir)

    def test_build(self):
        for python_version in [None, PythonImageVersions().supported_versions[0]]:
            build_out = self.__test_build(python_version=python_version)
        self.assertTrue('RUN pip install -r requirements.txt' in build_out, 'Incorrect pip command')

        self.__test_image()

    def test_build_with_pip_conf(self):
        build_out = self.__test_build(use_pip_conf=True)

        self.assertTrue(
            'RUN --mount=type=secret,id=pip_config,dst=/etc/pip.conf  pip install' in build_out,
            'Incorrect pip command',
        )

        self.__test_image()

    def test_with_unsupported_python_version(self):
        with self.assertRaises(ValueError):
            self.__test_build(python_version='3.5.2')

    def __test_build(self, use_pip_conf=False, python_version=None):
        config = self.__create_conf('my_task')

        base_path = os.path.join(os.path.dirname(__file__), '../../liminal')

        if use_pip_conf:
            config['pip_conf'] = os.path.join(base_path, 'pip.conf')

        if python_version:
            config['python_version'] = python_version

        builder = PythonImageBuilder(
            config=config, base_path=base_path, relative_source_path='write_inputs', tag=self.__IMAGE_NAME
        )

        build_out = str(builder.build())

        return build_out

    def __test_image(self):
        docker_client = docker.from_env()
        docker_client.images.get(self.__IMAGE_NAME)

        cmds = ['/bin/bash', '-c', 'python write_inputs.py']

        container_log = docker_client.containers.run(
            self.__IMAGE_NAME,
            cmds,
            volumes={self.temp_dir: {'bind': '/mnt/vol1', 'mode': 'rw'}},
            environment={'NUM_FILES': 10, 'NUM_SPLITS': 3},
        )

        docker_client.close()

        logging.info(container_log)

        self.assertEqual(
            "b'"
            "Writing input file /mnt/vol1/inputs/0/input0.json\\n"
            "Writing input file /mnt/vol1/inputs/1/input1.json\\n"
            "Writing input file /mnt/vol1/inputs/2/input2.json\\n"
            "Writing input file /mnt/vol1/inputs/0/input3.json\\n"
            "Writing input file /mnt/vol1/inputs/1/input4.json\\n"
            "Writing input file /mnt/vol1/inputs/2/input5.json\\n"
            "Writing input file /mnt/vol1/inputs/0/input6.json\\n"
            "Writing input file /mnt/vol1/inputs/1/input7.json\\n"
            "Writing input file /mnt/vol1/inputs/2/input8.json\\n"
            "Writing input file /mnt/vol1/inputs/0/input9.json\\n"
            "'",
            str(container_log),
        )

    def __create_conf(self, task_id):
        return {
            'task': task_id,
            'cmd': 'foo bar',
            'image': self.__IMAGE_NAME,
            'source': 'baz',
            'no_cache': True,
        }

    @staticmethod
    def __temp_dir():
        temp_dir = tempfile.mkdtemp()
        return temp_dir

    @staticmethod
    def __remove_dir(temp_dir):
        shutil.rmtree(temp_dir, ignore_errors=True)


================================================
FILE: tests/runners/airflow/build/spark/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/build/spark/test_spark_image_builder.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import logging
import os
import shutil
import tempfile
from unittest import TestCase

import docker

from liminal.build.image.spark.spark import SparkImageBuilder


class TestSparkImageBuilder(TestCase):
    __IMAGE_NAME = 'my_spark_image'

    def setUp(self) -> None:
        super().setUp()
        os.environ['TMPDIR'] = '/tmp'
        self.temp_dir = self.__temp_dir()
        self.temp_airflow_dir = self.__temp_dir()

    def tearDown(self) -> None:
        super().tearDown()
        self.__remove_dir(self.temp_dir)
        self.__remove_dir(self.temp_airflow_dir)

    def test_build(self):
        build_out = self.__test_build()
        self.assertTrue('RUN pip install -r requirements.txt' in build_out, 'Incorrect pip command')

        self.__test_image()

    def __test_build(self):
        config = self.__create_conf('my_task')

        base_path = os.path.join(os.path.dirname(__file__), '../../../apps/test_spark_app')

        builder = SparkImageBuilder(
            config=config, base_path=base_path, relative_source_path='wordcount', tag=self.__IMAGE_NAME
        )

        build_out = str(builder.build())

        return build_out

    def __test_image(self):
        docker_client = docker.from_env()
        docker_client.images.get(self.__IMAGE_NAME)

        cmds = ['spark-submit', 'wordcount.py', 'words.txt', '/mnt/vol1/outputs']

        container_log = docker_client.containers.run(
            self.__IMAGE_NAME, cmds, volumes={self.temp_dir: {'bind': '/mnt/vol1', 'mode': 'rw'}}
        )

        docker_client.close()

        logging.info(container_log)

        self.assertEqual(
            "b'\\n"
            "my: 1\\n"
            "first: 1\\n"
            "liminal: 1\\n"
            "spark: 1\\n"
            "task: 1\\n"
            "writing the results to /mnt/vol1/outputs\\n'",
            str(container_log),
        )

    def __create_conf(self, task_id):
        return {
            'task': task_id,
            'cmd': 'foo bar',
            'image': self.__IMAGE_NAME,
            'source': 'baz',
            'no_cache': True,
        }

    @staticmethod
    def __temp_dir():
        temp_dir = tempfile.mkdtemp()
        return temp_dir

    @staticmethod
    def __remove_dir(temp_dir):
        shutil.rmtree(temp_dir, ignore_errors=True)


================================================
FILE: tests/runners/airflow/build/test_liminal_apps_builder.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import unittest
from unittest import TestCase

import docker

from liminal.build import liminal_apps_builder


class TestLiminalAppsBuilder(TestCase):
    __image_names = ['my_python_task_img', 'my_parallelized_python_task_img']

    def setUp(self) -> None:
        self.docker_client = docker.client.from_env()
        self.__remove_images()

    def tearDown(self) -> None:
        self.__remove_images()
        self.docker_client.close()

    def __remove_images(self):
        for image_name in self.__image_names:
            if len(self.docker_client.images.list(image_name)) > 0:
                self.docker_client.images.remove(image=image_name, force=True)

    def test_build_liminal(self):
        liminal_apps_builder.build_liminal_apps(os.path.join(os.path.dirname(__file__), '../liminal'))

        for image in self.__image_names:
            self.docker_client.images.get(image)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: tests/runners/airflow/compiler/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/dag/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/dag/test_liminal_dags.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import unittest
from unittest import TestCase, mock

from liminal.core.config.config import ConfigUtil
from liminal.runners.airflow.dag import liminal_register_dags
from liminal.runners.airflow.operators.job_status_operator import (
    JobEndOperator,
    JobStartOperator,
)


class Test(TestCase):
    def test_register_dags(self):
        dags = self.get_register_dags()

        self.assertEqual(len(dags), 1)

        test_pipeline = dags[0][1]

        # TODO: elaborate tests to assert all dags have correct tasks
        self.assertEqual(test_pipeline.dag_id, 'my_pipeline')

    def test_default_start_task(self):
        dags = self.get_register_dags()[0]

        task_dict = dags[1].task_dict

        self.assertIsInstance(task_dict['start'], JobStartOperator)

    def test_default_end_task(self):
        dags = self.get_register_dags()[0]

        task_dict = dags[1].task_dict

        self.assertIsInstance(task_dict['end'], JobEndOperator)

    def test_default_args(self):
        dag = self.get_register_dags()[0]
        default_args = dag[1].default_args

        keys = default_args.keys()
        self.assertIn('default_arg_loaded', keys)
        self.assertIn('default_array_loaded', keys)
        self.assertIn('default_object_loaded', keys)

    @staticmethod
    @mock.patch.object(ConfigUtil, "snapshot_final_liminal_configs")
    def get_register_dags(mock_snapshot_final_liminal_configs):
        mock_snapshot_final_liminal_configs.side_effect = None
        base_path = os.path.join(os.path.dirname(__file__), '../../apps/test_app')
        return liminal_register_dags.register_dags(base_path)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: tests/runners/airflow/executors/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/executors/test_airflow_executor.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase
from unittest.mock import MagicMock

from liminal.runners.airflow.executors import airflow
from liminal.runners.airflow.tasks.airflow import AirflowTask


class TestAirflowExecutorTask(TestCase):
    """
    Test AirflowExecutor task
    """

    def test_apply_task_to_dag(self):
        task0 = MagicMock(spec=AirflowTask)

        task0.apply_task_to_dag = MagicMock()

        airflow.AirflowExecutor("executor-task", {}, {}).apply_task_to_dag(task=task0)

        task0.apply_task_to_dag.assert_called_once()


================================================
FILE: tests/runners/airflow/executors/test_emr.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from copy import deepcopy
from unittest import TestCase, mock
from unittest.mock import MagicMock

import boto3
from airflow.contrib.hooks.emr_hook import EmrHook
from airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator
from airflow.contrib.sensors.emr_step_sensor import EmrStepSensor
from moto import mock_emr

from liminal.runners.airflow import DummyDag
from liminal.runners.airflow.executors.emr import EMRExecutor
from liminal.runners.airflow.operators.operator_with_variable_resolving import (
    OperatorWithVariableResolving,
)
from liminal.runners.airflow.tasks import hadoop
from tests.util import dag_test_utils


@mock_emr
class TestEMRExecutorTask(TestCase):
    """
    Test EMRExecutor task
    """

    def setUp(self) -> None:
        self.run_job_flow_args = dict(
            Instances={
                "InstanceCount": 1,
                "KeepJobFlowAliveWhenNoSteps": True,
                "MasterInstanceType": "c3.medium",
                "Placement": {"AvailabilityZone": "us-east-1"},
                "SlaveInstanceType": "c3.xlarge",
            },
            JobFlowRole="EMR_EC2_DefaultRole",
            LogUri="s3://liminal/log",
            Name="test-emr-cluster",
            ServiceRole="EMR_DefaultRole",
            VisibleToAllUsers=True,
        )

        self.client = boto3.client("emr", region_name="us-east-1")

        args = deepcopy(self.run_job_flow_args)

        self.cluster_id = self.client.run_job_flow(**args)["JobFlowId"]

        self.dag = dag_test_utils.create_dag()
        self.dag.context = DummyDag(dag_id=self.dag.dag_id, task_id="").context
        self.executor_name = 'test-emr-cluster'
        executor_config = {
            'executor': self.executor_name,
            'cluster_name': self.executor_name,
            'aws_conn_id': 'us-east-1',
            'type': 'emr',
            'properties': {'ActionOnFailure': 'CONTINUE'},
        }
        self.hadoop_task = MagicMock(spec=hadoop.HadoopTask)
        self.hadoop_task.get_runnable_command.return_value = ['spark-submit', 'test', 'params', '--param']
        self.hadoop_task.task_id = 'spark-task'
        self.hadoop_task.dag = self.dag
        self.hadoop_task.trigger_rule = 'all_done'
        self.hadoop_task.parent = None
        self.hadoop_task.task_config = {}
        self.hadoop_task.variables = {}

        self.emr = EMRExecutor(self.executor_name, liminal_config={}, executor_config=executor_config)

    def test_apply_task_to_dag(self):
        self.emr.apply_task_to_dag(task=self.hadoop_task)

        self.assertEqual(len(self.dag.tasks), 2)

        self.assertIsInstance(self.dag.tasks[0], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[0].operator_delegate, EmrAddStepsOperator)
        self.assertIsInstance(self.dag.tasks[1], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[1].operator_delegate, EmrStepSensor)

    @mock.patch.object(EmrHook, 'get_conn')
    def test_add_step(self, mock_emr_hook_get_conn):
        aws_region = 'us-east-1'
        mock_emr_hook_get_conn.return_value = boto3.client('emr', region_name=aws_region)

        self.emr.apply_task_to_dag(task=self.hadoop_task)

        emr_add_step_task = self.dag.tasks[0]

        self.assertIsInstance(emr_add_step_task, OperatorWithVariableResolving)
        self.assertIsInstance(emr_add_step_task.operator_delegate, EmrAddStepsOperator)

        emr_add_step_task.render_template_fields({})

        step_id = emr_add_step_task.execute(self.dag.context)[0]

        desc_step = self.client.describe_step(ClusterId=self.cluster_id, StepId=step_id)['Step']

        self.assertEqual(
            desc_step['Config'],
            {'Jar': 'command-runner.jar', 'Properties': {}, 'Args': ['spark-submit', 'test', 'params', '--param']},
        )

        self.assertEqual(desc_step['Name'], 'spark-task')

        self.assertEqual(desc_step['ActionOnFailure'], 'CONTINUE')

    def test_watch_step(self):
        self.emr.apply_task_to_dag(task=self.hadoop_task, parents=[])

        emr_watch_step_task = self.dag.tasks[1]

        self.assertIsInstance(emr_watch_step_task, OperatorWithVariableResolving)
        self.assertIsInstance(emr_watch_step_task.operator_delegate, EmrStepSensor)

        # todo - elaborate tests


================================================
FILE: tests/runners/airflow/liminal/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/liminal/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: MyPipeline
volumes:
  - volume: myvol1
    local:
      path: /tmp/liminal_tests
secrets:
  - secret: aws
    local_path_file: "~/.aws/credentials"
images:
  - image: my_python_task_img
    type: python
    source: write_inputs
    no_cache: true
  - image: my_parallelized_python_task_img
    type: python
    source: write_outputs
    no_cache: true
  - image: my_server_image
    type: python_server
    source: myserver
    no_cache: true
    endpoints:
      - endpoint: /myendpoint1
        module: my_server
        function: myendpoint1func
pipelines:
  - pipeline: my_pipeline
    owner: Bosco Albert Baracus
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    default_arg_loaded: check
    default_array_loaded: [2, 3, 4]
    default_object_loaded:
      key1: val1
      key2: val2
    metrics:
      namespace: TestNamespace
      backends: []
    tasks:
      - task: my_python_task
        type: python
        description: static input task
        image: my_python_task_img
        env_vars:
          NUM_FILES: 10
          NUM_SPLITS: 3
          AWS_CONFIG_FILE: "/mnt/credentials"
          AWS_PROFILE: "dev"
        secrets:
          - secret: aws
            remote_path: "/mnt"
        mounts:
          - mount: mymount
            volume: myvol1
            path: /mnt/vol1
        cmd: python -u write_inputs.py
      - task: my_parallelized_python_task
        type: python
        description: parallelized python task
        image: my_parallelized_python_task_img
        env_vars:
          FOO: BAR
        executors: 3
        mounts:
          - mount: mymount
            volume: myvol1
            path: /mnt/vol1
        cmd: python -u write_inputs.py
services:
  - service: my_python_server
    description: my python server
    image: my_server_image


================================================
FILE: tests/runners/airflow/liminal/myserver/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/liminal/myserver/my_server.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import json


def myendpoint1func(input_json):
    input_dict = json.loads(input_json) if input_json else {}
    return f'Input was: {input_dict}'


================================================
FILE: tests/runners/airflow/liminal/pip.conf
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/liminal/requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

pillow


================================================
FILE: tests/runners/airflow/liminal/write_inputs/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/liminal/write_inputs/write_inputs.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
import os

inputs_dir = '/mnt/vol1/inputs/'

num_files = int(os.environ['NUM_FILES'])
num_splits = int(os.environ['NUM_SPLITS'])

# create input files - split by round robin
for i in range(0, num_files):
    split_id = i % num_splits
    split_dir = os.path.join(inputs_dir, str(split_id))

    if not os.path.exists(split_dir):
        os.makedirs(split_dir)

    filename = os.path.join(split_dir, f'input{i}.json')
    with open(filename, 'w') as f:
        print(f'Writing input file {filename}')
        f.write(json.dumps({'mykey': f'myval{i}'}))


================================================
FILE: tests/runners/airflow/liminal/write_outputs/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/liminal/write_outputs/write_outputs.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
import logging
import os

# Needs to be fixed in the following Jira:
# https://issues.apache.org/jira/browse/LIMINAL-76
inputs_dir = f'/mnt/vol1/inputs/'
outputs_dir = '/mnt/vol1/outputs/'

if not os.path.exists(outputs_dir):
    os.makedirs(outputs_dir)

for directory in os.listdir(inputs_dir):
    logging.info(f'Running write_outputs for split id {directory}')
    inputs_dir_file = os.path.join(inputs_dir, directory)
    for filename in os.listdir(inputs_dir_file):
        with open(os.path.join(inputs_dir_file, filename)) as infile, open(
            os.path.join(outputs_dir, filename.replace('input', 'output').replace('.json', '.txt')), 'w'
        ) as outfile:
            logging.info(f'Writing output file: {outfile.name}')
            data = json.loads(infile.read())
            outfile.write(data['mykey'])


================================================
FILE: tests/runners/airflow/operators/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/operators/test_operator_with_variable_resolving.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
from unittest import TestCase
from unittest.mock import MagicMock, patch

from airflow.models import BaseOperator

from liminal.runners.airflow.operators.operator_with_variable_resolving import (
    OperatorWithVariableResolving,
)


class TestKubernetesPodOperatorWithAutoImage(TestCase):
    @patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True', 'nested': 'value1'})
    def test_value_from_environment_and_liminal_config(self):
        variables = {'my-image': 'my-image', 'something-value1': 'stg'}
        operator = TestOperator(task_id='abc', field='{{env}}', expected='{{my-image}}')
        ret_val = OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables=variables
        ).execute({})

        self.assertEqual(operator.field, 'myenv')
        self.assertEqual(ret_val, 'my-image')

    @patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True', 'nested': 'value1'})
    def test_value_from_task_variables(self):
        variables = {'my-image': 'my-image', 'my_dict_var': {'env': 'myenv2'}}
        operator = TestOperator(task_id='abc', field='{{env}}', expected='{{my-image}}')
        ret_val = OperatorWithVariableResolving(
            dag=None,
            operator=operator,
            task_id='my_task',
            task_config={'variables': 'my_dict_var'},
            variables=variables,
        ).execute({})

        self.assertEqual(operator.field, 'myenv2')
        self.assertEqual(ret_val, 'my-image')

    @patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True', 'nested': 'value1'})
    def test_value_from_inline_task_variables(self):
        variables = {'my-image': 'my-image'}
        operator = TestOperator(task_id='abc', field='{{env}}', expected='{{my-image}}')
        ret_val = OperatorWithVariableResolving(
            dag=None,
            operator=operator,
            task_id='my_task',
            task_config={'variables': {'env': 'myenv2'}},
            variables=variables,
        ).execute({})

        self.assertEqual(operator.field, 'myenv2')
        self.assertEqual(ret_val, 'my-image')

    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})
    def test_nested_variables(self):
        variables = {
            'nested-the-s3-location': 'good',
            'my-image': 'my-image',
            'something-value1': 'stg',
            'env_val-staging': 'the-s3-location',
            'nested': 'value1',
        }
        operator = TestOperator(
            task_id='abc', field='something-{{nested-{{env_val-{{env}}}}}}', expected='{{my-image}}'
        )

        OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables=variables
        ).execute({})

        self.assertEqual(operator.field, 'something-good')

    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})
    def test_dag_run_conf_parameters(self):
        dag_run = MagicMock()
        dag_run.conf = {
            'nested-the-s3-location': 'good',
            'my-image': 'my-image',
            'something-value1': 'stg',
            'env_val-staging': 'the-s3-location',
            'nested': 'value1',
        }
        operator = TestOperator(
            task_id='abc', field='something-{{nested-{{env_val-{{env}}}}}}', expected='{{my-image}}'
        )
        OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}
        ).execute({'dag_run': dag_run})

        self.assertEqual(operator.field, 'something-good')

    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})
    def test_duplicated_params(self):
        dag_run = MagicMock()
        dag_run.conf = {
            'nested-the-s3-location': 'good',
            'my-image': 'my-image',
            'something-value1': 'stg',
            'env_val-staging': 'the-s3-location',
            'nested': 'value1',
        }
        operator = TestOperator(
            task_id='abc',
            field='something-{{nested-{{env_val-{{env}}}}}}-' '{{nested-{{env_val-{{env}}}}}}',
            expected='{{my-image}}',
        )
        OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}
        ).execute({'dag_run': dag_run})

        self.assertEqual(operator.field, 'something-good-good')

        operator = TestOperator(task_id='abc', field='something-{{env}} {{env}}', expected='{{my-image}}')
        OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}
        ).execute({'dag_run': dag_run})

        self.assertEqual(operator.field, 'something-staging staging')

        dag_run.conf = {'var1': '{{var2}}', 'var2': 'bla', 'my-image': 'my-image'}
        operator = TestOperator(task_id='abc', field='{{ var1 }}', expected='{{my-image}}')
        OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}
        ).execute({'dag_run': dag_run})

        self.assertEqual(operator.field, 'bla')

    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})
    def test_non_existing_param(self):
        dag_run = MagicMock()
        dag_run.conf = {'var1': '{{var2}}', 'var2': 'bla'}
        operator = TestOperator(task_id='abc', field='{{myimage}}', expected='{{myimage}}')
        OperatorWithVariableResolving(
            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}
        ).execute({'dag_run': dag_run})

        self.assertEqual(operator.field, '')


class BaseTestOperator(BaseOperator):
    def execute(self, context):
        raise NotImplementedError()

    def __init__(self, field, expected, *args, **kwargs):
        self.field = field
        self.expected = expected
        super().__init__(*args, **kwargs)


class TestOperator(BaseTestOperator):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def execute(self, context):
        return self.expected


================================================
FILE: tests/runners/airflow/tasks/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/airflow/tasks/test_create_cloudformation_stack.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
from unittest import TestCase, mock
from unittest.mock import MagicMock

from airflow.operators.dummy import DummyOperator
from airflow.operators.python import BranchPythonOperator
from moto import mock_cloudformation

from liminal.runners.airflow.executors import airflow
from liminal.runners.airflow.operators.cloudformation import (
    CloudFormationCreateStackOperator,
    CloudFormationCreateStackSensor,
    CloudFormationHook,
)
from liminal.runners.airflow.operators.operator_with_variable_resolving import (
    OperatorWithVariableResolving,
)
from liminal.runners.airflow.tasks.create_cloudformation_stack import (
    CreateCloudFormationStackTask,
)
from tests.util import dag_test_utils


# noinspection DuplicatedCode
@mock_cloudformation
class TestCreateCloudFormationStackTask(TestCase):
    """
    Test CreateCloudFormationStackTask
    """

    def setUp(self) -> None:
        self.dag = dag_test_utils.create_dag()
        self.dag.context = {
            'ti': self.dag,
            'ts': datetime.now().timestamp(),
            'execution_date': datetime.now().timestamp(),
        }
        self.dag.get_dagrun = MagicMock()

        self.cluster_name = "liminal-cluster-for-tests"
        self.config = {
            'task': 'create_emr',
            'type': 'create_cloudformation_stack',
            'description': 'create emr',
            'stack_name': self.cluster_name,
            'properties': {
                'OnFailure': 'DO_NOTHING',
                'TimeoutInMinutes': 25,
                'Capabilities': ['CAPABILITY_NAMED_IAM'],
                'TemplateURL': 'https://s3.amazonaws.com/liminal-tests/emr_cluster_creation.yml',
                'Parameters': {
                    'Environment': 'Staging',
                    'OwnerTeam': 'liminal-team',
                    'Tenancy': 'Ephemeral',
                    'MasterServerCount': '1',
                    'CoreServerCount': '1',
                    'EmrApplicationRelease': '5.28',
                    'InstanceTypeMaster': 'm5.xlarge',
                    'InstanceTypeCore': 'm5.xlarge',
                },
            },
        }

        self.create_cloudformation_task = CreateCloudFormationStackTask(
            self.config['task'],
            self.dag,
            [],
            trigger_rule='all_success',
            liminal_config={},
            pipeline_config={},
            task_config=self.config,
        )

        airflow.AirflowExecutor("airflow-executor", {}, {}).apply_task_to_dag(task=self.create_cloudformation_task)

    def test_apply_task_to_dag(self):
        self.assertEqual(len(self.dag.tasks), 4)

        self.assertIsInstance(self.dag.tasks[0], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[0].operator_delegate, BranchPythonOperator)
        self.assertIsInstance(self.dag.tasks[1], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[1].operator_delegate, CloudFormationCreateStackOperator)
        self.assertIsInstance(self.dag.tasks[2], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[2].operator_delegate, CloudFormationCreateStackSensor)
        self.assertIsInstance(self.dag.tasks[3], DummyOperator)

    def test_cloudformation_does_not_exist(self):
        with mock.patch.object(CloudFormationHook, 'get_conn') as mock_conn:
            mock_cf_conn = MagicMock()
            mock_cf_conn.describe_stacks.return_value.raiseError.side_effect = Exception()

            mock_conn.return_value = mock_cf_conn
            is_cloudformation_exists = self.dag.tasks[0].operator_delegate

            print(is_cloudformation_exists)
            self.assertEqual(
                is_cloudformation_exists.python_callable(stack_name=self.cluster_name),
                'create-cloudformation-create_emr',
            )

    def test_cloudformation_exist_and_running(self):
        is_cloudformation_exists = self.dag.tasks[0].operator_delegate

        for status in ['CREATE_COMPLETE', 'DELETE_FAILED']:
            with mock.patch.object(CloudFormationHook, 'get_conn') as mock_conn:
                mock_cloudformation_conn = MagicMock()
                mock_cloudformation_conn.describe_stacks.return_value = {'Stacks': [{'StackStatus': status}]}

                mock_conn.return_value = mock_cloudformation_conn

                self.assertEqual(
                    is_cloudformation_exists.python_callable(stack_name=self.cluster_name), 'creation-end-create_emr'
                )

    def test_cloudformation_exists_and_not_running(self):
        is_cloudformation_exists = self.dag.tasks[0].operator_delegate

        for status in ['DELETED']:
            with mock.patch.object(CloudFormationHook, 'get_conn') as mock_conn:
                mock_cloudformation_conn = MagicMock()
                mock_cloudformation_conn.describe_stacks.return_value = {'Stacks': [{'StackStatus': status}]}

                mock_conn.return_value = mock_cloudformation_conn

                self.assertEqual(
                    is_cloudformation_exists.python_callable(stack_name=self.cluster_name),
                    'create-cloudformation-create_emr',
                )

    def test_cloudformation_create_stack_operator_task(self):
        create_cloudformation_stack = self.dag.tasks[1]
        self.assertEqual(create_cloudformation_stack.task_id, 'create-cloudformation-create_emr')


================================================
FILE: tests/runners/airflow/tasks/test_delete_cloudformation_stack.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime
from unittest import TestCase
from unittest.mock import MagicMock

from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.utils.trigger_rule import TriggerRule
from moto import mock_cloudformation

from liminal.runners.airflow.operators.cloudformation import (
    CloudFormationDeleteStackOperator,
    CloudFormationDeleteStackSensor,
)
from liminal.runners.airflow.operators.operator_with_variable_resolving import (
    OperatorWithVariableResolving,
)
from liminal.runners.airflow.tasks.delete_cloudformation_stack import (
    DeleteCloudFormationStackTask,
)
from tests.util import dag_test_utils


# noinspection DuplicatedCode
@mock_cloudformation
class TestDeleteCloudFormationStackTask(TestCase):
    """
    Test DeleteCloudFormationStackTask
    """

    def setUp(self) -> None:
        self.dag = dag_test_utils.create_dag()
        self.dag.context = {
            'ti': self.dag,
            'ts': datetime.now().timestamp(),
            'execution_date': datetime.now().timestamp(),
        }
        self.dag.get_dagrun = MagicMock()

        self.cluster_name = "liminal-cluster-for-tests"

        self.delete_cloudformation_task = DeleteCloudFormationStackTask(
            'delete-emr',
            self.dag,
            [],
            trigger_rule='all_success',
            liminal_config={},
            pipeline_config={},
            task_config={'stack_name': self.cluster_name},
        )

        self.delete_cloudformation_task.apply_task_to_dag()

    def test_apply_task_to_dag(self):
        self.assertEqual(len(self.dag.tasks), 4)

        self.assertIsInstance(self.dag.tasks[0], BranchPythonOperator)
        self.assertIsInstance(self.dag.tasks[1], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[1].operator_delegate, CloudFormationDeleteStackOperator)
        self.assertIsInstance(self.dag.tasks[2], OperatorWithVariableResolving)
        self.assertIsInstance(self.dag.tasks[2].operator_delegate, CloudFormationDeleteStackSensor)
        self.assertIsInstance(self.dag.tasks[3], DummyOperator)

    def test_check_dags_queued_task(self):
        self.dag.get_num_active_runs = MagicMock()

        self.dag.get_num_active_runs.return_value = 2
        check_dags_queued_task = self.dag.tasks[0]
        self.assertEqual(check_dags_queued_task.trigger_rule, TriggerRule.ALL_DONE)

        self.assertEqual(check_dags_queued_task.python_callable(), f'delete-end-delete-emr')

        self.dag.get_num_active_runs.return_value = 1

        self.assertEqual(check_dags_queued_task.python_callable(), f'delete-cloudformation-delete-emr')


================================================
FILE: tests/runners/airflow/tasks/test_job_end.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import unittest
from unittest import TestCase

from liminal.runners.airflow.executors import airflow
from liminal.runners.airflow.tasks import job_end
from tests.util import dag_test_utils


# noinspection DuplicatedCode
class TestJobEndTask(TestCase):
    def test_apply_task_to_dag(self):
        dag = dag_test_utils.create_dag()

        task0 = job_end.JobEndTask(
            task_id='job_end',
            dag=dag,
            pipeline_config={'pipeline': 'my_end_pipeline'},
            task_config={},
            parent=None,
            trigger_rule='all_done',
            liminal_config={'metrics': {'namespace': 'EndJobNameSpace', 'backends': ['cloudwatch']}},
        )
        airflow.AirflowExecutor("airflow-executor", {}, {}).apply_task_to_dag(task=task0)

        self.assertEqual(len(dag.tasks), 1)
        dag_task0 = dag.tasks[0]

        self.assertEqual(dag_task0.namespace, 'EndJobNameSpace')
        self.assertEqual(dag_task0.backends, ['cloudwatch'])

        self.assertEqual(dag_task0.task_id, 'end')

    def test_apply_task_to_dag_missing_metrics(self):
        conf = {'pipeline': 'my_pipeline'}
        dag = dag_test_utils.create_dag()

        task0 = job_end.JobEndTask(
            task_id="job_end",
            dag=dag,
            pipeline_config={'pipeline': 'my_end_pipeline'},
            liminal_config=conf,
            parent=None,
            trigger_rule='all_done',
            task_config={},
        )

        airflow.AirflowExecutor("airflow-executor", {}, {}).apply_task_to_dag(task=task0)

        self.assertEqual(len(dag.tasks), 1)
        dag_task0 = dag.tasks[0]

        self.assertEqual(dag_task0.namespace, '')
        self.assertEqual(dag_task0.backends, [])
        self.assertEqual(dag_task0.trigger_rule, 'all_done')

    def test_apply_task_to_dag_with_partial_configuration(self):
        dag = dag_test_utils.create_dag()

        task0 = job_end.JobEndTask(
            task_id="job_enc",
            dag=dag,
            liminal_config={'metrics': {'namespace': 'EndJobNameSpace'}},
            pipeline_config={'pipeline': 'my_end_pipeline'},
            task_config={},
            parent=None,
            trigger_rule='all_done',
        )
        task0.apply_task_to_dag()

        self.assertEqual(len(dag.tasks), 1)
        dag_task0 = dag.tasks[0]

        self.assertEqual(dag_task0.namespace, 'EndJobNameSpace')
        self.assertEqual(dag_task0.backends, [])
        self.assertEqual(dag_task0.trigger_rule, 'all_done')


if __name__ == '__main__':
    unittest.main()


================================================
FILE: tests/runners/airflow/tasks/test_job_start.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import unittest
from unittest import TestCase

from liminal.runners.airflow.executors import airflow
from liminal.runners.airflow.tasks import job_start
from tests.util import dag_test_utils


# noinspection DuplicatedCode
class TestJobStartTask(TestCase):
    def test_apply_task_to_dag(self):
        dag = dag_test_utils.create_dag()

        task0 = job_start.JobStartTask(
            task_id="start_task",
            dag=dag,
            liminal_config={'metrics': {'namespace': 'StartJobNameSpace', 'backends': ['cloudwatch']}},
            pipeline_config={'pipeline': 'my_start_pipeline'},
            task_config={},
            parent=None,
            trigger_rule='all_success',
        )
        task0.apply_task_to_dag()

        self.assertEqual(len(dag.tasks), 1)
        dag_task0 = dag.tasks[0]

        self.assertEqual(dag_task0.namespace, 'StartJobNameSpace')
        self.assertEqual(dag_task0.backends, ['cloudwatch'])

        self.assertEqual(dag_task0.task_id, 'start')

    def test_apply_task_to_dag_missing_metrics(self):
        conf = {'pipeline': 'my_pipeline'}

        dag = dag_test_utils.create_dag()

        task0 = job_start.JobStartTask(
            task_id="start_task",
            dag=dag,
            liminal_config=conf,
            task_config={},
            pipeline_config={'pipeline': 'my_end_pipeline'},
            parent=None,
            trigger_rule='all_success',
        )
        airflow.AirflowExecutor("airflow-executor", {}, {}).apply_task_to_dag(task=task0)

        self.assertEqual(len(dag.tasks), 1)
        dag_task0 = dag.tasks[0]

        self.assertEqual(dag_task0.namespace, '')
        self.assertEqual(dag_task0.backends, [])
        self.assertEqual(dag_task0.trigger_rule, 'all_success')

    def test_apply_task_to_dag_with_partial_configuration(self):
        dag = dag_test_utils.create_dag()

        task0 = job_start.JobStartTask(
            task_id="start_task",
            dag=dag,
            liminal_config={'metrics': {'namespace': 'StartJobNameSpace'}},
            pipeline_config={'pipeline': 'my_start_pipeline'},
            task_config={},
            parent=None,
            trigger_rule='all_success',
        )
        airflow.AirflowExecutor("airflow-executor", {}, {}).apply_task_to_dag(task=task0)

        self.assertEqual(len(dag.tasks), 1)
        dag_task0 = dag.tasks[0]

        self.assertEqual(dag_task0.namespace, 'StartJobNameSpace')
        self.assertEqual(dag_task0.backends, [])


if __name__ == '__main__':
    unittest.main()


================================================
FILE: tests/runners/airflow/tasks/test_python.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import tempfile
import unittest
from unittest import TestCase

from liminal.build import liminal_apps_builder
from liminal.kubernetes import secret_util, volume_util
from liminal.runners.airflow import DummyDag
from liminal.runners.airflow.executors.kubernetes import KubernetesPodExecutor
from liminal.runners.airflow.tasks import python
from tests.util import dag_test_utils


class TestPythonTask(TestCase):
    _VOLUME_NAME = 'myvol1'
    _SECRET_NAME = 'aws'

    def setUp(self) -> None:
        volume_util.delete_local_volume(self._VOLUME_NAME)
        secret_util.delete_local_secret(self._SECRET_NAME)
        os.environ['TMPDIR'] = '/tmp'
        self.temp_dir = tempfile.mkdtemp()
        self.liminal_config = {
            'volumes': [
                {
                    'volume': self._VOLUME_NAME,
                    'local': {'path': self.temp_dir.replace("/var/folders", "/private/var/folders")},
                }
            ],
            'secrets': [{'secret': self._SECRET_NAME, 'remote_path': "/mnt"}],
        }

        volume_util.create_local_volumes(self.liminal_config, None)
        secret_util.create_local_secrets(self.liminal_config)

        liminal_apps_builder.build_liminal_apps(os.path.join(os.path.dirname(__file__), '../liminal'))

    def test_apply_task_to_dag(self):
        dag = dag_test_utils.create_dag()

        task0 = self.__create_python_task(
            dag,
            'my_input_task',
            None,
            'my_python_task_img',
            'python -u write_inputs.py',
            env_vars={'NUM_FILES': 10, 'NUM_SPLITS': 3, 'AWS_CONFIG_FILE': '/mnt/credentials', 'AWS_PROFILE': 'dev'},
        )
        executor = KubernetesPodExecutor(
            task_id='k8s', liminal_config=self.liminal_config, executor_config={'executor': 'k8s', 'name': 'mypod'}
        )

        executor.apply_task_to_dag(task=task0)

        task1 = self.__create_python_task(
            dag,
            'my_output_task',
            dag.tasks[0],
            'my_parallelized_python_task_img',
            'python -u write_outputs.py',
            executors=3,
        )
        executor.apply_task_to_dag(task=task1)

        for task in dag.tasks:
            print(f'Executing task {task.task_id}')
            task.execute(DummyDag('my_dag', task.task_id).context)

        inputs_dir = os.path.join(self.temp_dir, 'inputs')
        outputs_dir = os.path.join(self.temp_dir, 'outputs')

        self.assertListEqual(sorted(os.listdir(self.temp_dir)), sorted(['outputs', 'inputs']))

        inputs_dir_contents = sorted(os.listdir(inputs_dir))

        self.assertListEqual(inputs_dir_contents, ['0', '1', '2'])

        self.assertListEqual(
            sorted(os.listdir(os.path.join(inputs_dir, '0'))),
            ['input0.json', 'input3.json', 'input6.json', 'input9.json'],
        )

        self.assertListEqual(
            sorted(os.listdir(os.path.join(inputs_dir, '1'))), ['input1.json', 'input4.json', 'input7.json']
        )

        self.assertListEqual(
            sorted(os.listdir(os.path.join(inputs_dir, '2'))), ['input2.json', 'input5.json', 'input8.json']
        )

        self.assertListEqual(
            sorted(os.listdir(outputs_dir)),
            [
                'output0.txt',
                'output1.txt',
                'output2.txt',
                'output3.txt',
                'output4.txt',
                'output5.txt',
                'output6.txt',
                'output7.txt',
                'output8.txt',
                'output9.txt',
            ],
        )

        for filename in os.listdir(outputs_dir):
            with open(os.path.join(outputs_dir, filename)) as f:
                expected_file_content = filename.replace('output', 'myval').replace('.txt', '')
                self.assertEqual(f.read(), expected_file_content)

    def __create_python_task(self, dag, task_id, parent, image, cmd, env_vars=None, executors=None):

        self.liminal_config['volumes'] = [
            {
                'volume': self._VOLUME_NAME,
                'local': {'path': self.temp_dir.replace("/var/folders", "/private/var/folders")},
            }
        ]

        self.liminal_config['executors'] = [
            {
                'executor': 'k8s',
                'type': 'kubernetes',
            }
        ]
        task_config = {
            'task': task_id,
            'cmd': cmd,
            'image': image,
            'env_vars': env_vars if env_vars is not None else {},
            'mounts': [{'mount': 'mymount', 'volume': self._VOLUME_NAME, 'path': '/mnt/vol1'}],
            'secrets': [{'secret': self._SECRET_NAME, 'remote_path': '/mnt'}],
        }

        if executors:
            task_config['executors'] = executors

        return python.PythonTask(
            task_id=task_id,
            dag=dag,
            liminal_config=self.liminal_config,
            pipeline_config={'pipeline': 'my_pipeline'},
            task_config=task_config,
            parent=parent,
            trigger_rule='all_success',
        )


if __name__ == '__main__':
    unittest.main()


================================================
FILE: tests/runners/airflow/tasks/test_spark_task.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import tempfile
from unittest import TestCase

from liminal.build import liminal_apps_builder
from liminal.kubernetes import volume_util
from liminal.runners.airflow import DummyDag
from liminal.runners.airflow.executors.kubernetes import KubernetesPodExecutor
from liminal.runners.airflow.tasks.spark import SparkTask
from tests.util import dag_test_utils


class TestSparkTask(TestCase):
    """
    Test Spark Task
    """

    _VOLUME_NAME = 'myvol1'

    def test_spark_on_k8s(self):
        volume_util.delete_local_volume(self._VOLUME_NAME)
        os.environ['TMPDIR'] = '/tmp'
        self.temp_dir = tempfile.mkdtemp()
        self.liminal_config = {
            'volumes': [
                {
                    'volume': self._VOLUME_NAME,
                    'local': {'path': self.temp_dir.replace("/var/folders", "/private/var/folders")},
                }
            ]
        }
        volume_util.create_local_volumes(self.liminal_config, None)

        # build spark image
        liminal_apps_builder.build_liminal_apps(os.path.join(os.path.dirname(__file__), '../../apps/test_spark_app'))

        outputs_dir = os.path.join(self.temp_dir, 'outputs')

        task_config = {
            'task': "my_spark_task",
            'image': "my_spark_image",
            'application_source': 'wordcount.py',
            'application_arguments': ['words.txt', '/mnt/vol1/outputs/'],
            'env_vars': {},
            'mounts': [{'mount': 'mymount', 'volume': self._VOLUME_NAME, 'path': '/mnt/vol1'}],
        }

        dag = dag_test_utils.create_dag()

        task1 = SparkTask(
            task_id="my_spark_task",
            dag=dag,
            liminal_config=self.liminal_config,
            pipeline_config={'pipeline': 'my_pipeline'},
            task_config=task_config,
            parent=None,
            trigger_rule='all_success',
        )

        executor = KubernetesPodExecutor(
            task_id='k8s', liminal_config=self.liminal_config, executor_config={'executor': 'k8s', 'name': 'mypod'}
        )
        executor.apply_task_to_dag(task=task1)

        for task in dag.tasks:
            print(f'Executing task {task.task_id}')
            task.execute(DummyDag('my_dag', task.task_id).context)

        expected_output = (
            '{"word":"my","count":1}\n'
            '{"word":"first","count":1}\n'
            '{"word":"liminal","count":1}\n'
            '{"word":"spark","count":1}\n'
            '{"word":"task","count":1}\n'.split("\n")
        )

        actual = ''
        for filename in os.listdir(outputs_dir):
            if filename.endswith(".json"):
                with open(os.path.join(outputs_dir, filename)) as f:
                    actual = f.read()

        self.assertEqual(actual.split("\n"), expected_output)

    def test_get_runnable_command(self):
        task_config = {
            'application_source': 'my_app.py',
            'master': 'yarn',
            'class': 'org.apache.liminal.MySparkApp',
            'conf': {
                'spark.driver.memory': '1g',
                'spark.driver.maxResultSize': '1g',
                'spark.yarn.executor.memoryOverhead': '500M',
            },
            'application_arguments': {
                '--query': "select * from " "my_table where date_prt >= " "'{{yesterday_ds}}'",
                '--output': 'mytable',
            },
        }

        expected = [
            'spark-submit',
            '--master',
            'yarn',
            '--class',
            'org.apache.liminal.MySparkApp',
            '--conf',
            'spark.driver.maxResultSize=1g',
            '--conf',
            'spark.driver.memory=1g',
            '--conf',
            'spark.yarn.executor.memoryOverhead=500M',
            'my_app.py',
            '--query',
            "select * from my_table where " "date_prt >= '{{yesterday_ds}}'",
            '--output',
            'mytable',
        ]

        actual = SparkTask(
            'my_spark_task',
            DummyDag('dag-id', 'my_spark_task'),
            [],
            trigger_rule='all_success',
            liminal_config={},
            pipeline_config={'pipeline': 'pipeline'},
            task_config=task_config,
        ).get_runnable_command()

        self.assertEqual(actual.sort(), expected.sort())

    def test_missing_spark_arguments(self):
        task_config = {
            'application_source': 'my_app.py',
            'application_arguments': {
                '--query': "select * from my_table where" " date_prt >= '{{yesterday_ds}}'",
                '--output': 'myoutputtable',
            },
        }

        expected = [
            'spark-submit',
            'my_app.py',
            '--query',
            "select * from my_table where " "date_prt >= '{{yesterday_ds}}'",
            '--output',
            'myoutputtable',
        ]

        actual = SparkTask(
            'my_spark_task',
            DummyDag('dag-id', 'my_spark_task'),
            [],
            trigger_rule='all_success',
            liminal_config={},
            pipeline_config={'pipeline': 'pipeline'},
            task_config=task_config,
        ).get_runnable_command()

        self.assertEqual(actual.sort(), expected.sort())

    def test_partially_missing_spark_arguments(self):
        task_config = {
            'application_source': 'my_app.py',
            'class': 'org.apache.liminal.MySparkApp',
            'conf': {
                'spark.driver.memory': '1g',
                'spark.driver.maxResultSize': '1g',
                'spark.yarn.executor.memoryOverhead': '500M',
            },
            'application_arguments': {
                '--query': "select * from my_table where " "date_prt >= '{{yesterday_ds}}'",
                '--output': 'myoutputtable',
            },
        }

        expected = [
            'spark-submit',
            '--class',
            'org.apache.liminal.MySparkApp',
            '--conf',
            'spark.driver.maxResultSize=1g',
            '--conf',
            'spark.driver.memory=1g',
            '--conf',
            'spark.yarn.executor.memoryOverhead=500M',
            'my_app.py',
            '--query',
            'select * from my_table where ' "date_prt >= '{{yesterday_ds}}'",
            '--output',
            'myoutputtable',
        ]

        actual = SparkTask(
            'my_spark_task',
            DummyDag('dag-id', 'my_spark_task'),
            [],
            trigger_rule='all_success',
            liminal_config={},
            pipeline_config={'pipeline': 'pipeline'},
            task_config=task_config,
        ).get_runnable_command()

        self.assertEqual(actual.sort(), expected.sort())


================================================
FILE: tests/runners/apps/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_app/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_app/defaults/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_app/defaults/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: default_liminal
type: super
service_defaults:
  one: two
task_defaults:
  env_vars:
    env1: a
pipeline_defaults:
  one: one
  two: twoww


================================================
FILE: tests/runners/apps/test_app/extra/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_app/extra/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: extra_liminal
type: super
super: default_liminal
service_defaults:
  endpoints:
    - endpoint: /myendpoint1
      module: myserver.my_server
      function: myendpoint1func
images:
  - image: my_static_input_task_image
    type: python
    source: helloworld
    no_cache: true
  - image: my_task_output_input_task_image
    type: python
    source: helloworld
    no_cache: true
pipeline_defaults:
  tasks:
    - task: my_static_input_task
      type: python
      description: static input task
      image: my_static_input_task_image
      env_vars:
        env2: b
      cmd: python -u hello_world.py
    - task: extenable
      type: pipeline
      description: optional sub tasks
    - task: my_task_output_input_task
      type: python
      no_cache: true
      description: task with input from other task's output
      image: my_task_output_input_task_image
      env_vars:
        env1: c
        env2: b
      cmd: python -u hello_world.py


================================================
FILE: tests/runners/apps/test_app/helloworld/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

print('Hello world!\n')


================================================
FILE: tests/runners/apps/test_app/helloworld/hellp_world.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import json
import os

inputs_dir = '/mnt/vol1/inputs/'

num_files = int(os.environ['NUM_FILES'])
num_splits = int(os.environ['NUM_SPLITS'])

# create input files - split by round robin
for i in range(0, num_files):
    split_id = i % num_splits
    split_dir = os.path.join(inputs_dir, str(split_id))

    if not os.path.exists(split_dir):
        os.makedirs(split_dir)

    filename = os.path.join(split_dir, f'input{i}.json')
    with open(filename, 'w') as f:
        print(f'Writing input file {filename}')
        f.write(json.dumps({'mykey': f'myval{i}'}))


================================================
FILE: tests/runners/apps/test_app/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: MyPipeline
super: extra_liminal
images:
  - image: my_static_input_task_image
    type: python
    source: helloworld
    no_cache: true
  - image: my_server_image
    type: python_server
    source: myserver
    no_cache: true
pipelines:
  - pipeline: my_pipeline
    owner: Bosco Albert Baracus
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    default_arg_loaded: check
    default_array_loaded: [2, 3, 4]
    default_object_loaded:
      key1: val1
      key2: val2
    tasks:
      - task: my_parallelized_static_input_task
        type: python
        description: parallelized static input task
        image: my_static_input_task_image
        executors: 2
        cmd: python -u hello_world.py
services:
  - service: my_python_server
    description: my python server
    image: my_server_image


================================================
FILE: tests/runners/apps/test_app/my_server/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_app/my_server/my_server.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


import json


def myendpoint1func(input_json):
    input_dict = json.loads(input_json) if input_json else {}
    return f'Input was: {input_dict}'


================================================
FILE: tests/runners/apps/test_spark_app/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_spark_app/liminal.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
name: MyPipeline
images:
  - image: my_spark_image
    type: spark
    source: wordcount
    no_cache: true
pipelines:
  - pipeline: my_pipeline
    owner: Bosco Albert Baracus
    start_date: 1970-01-01
    timeout_minutes: 45
    schedule: 0 * 1 * *
    tasks:
      - task: my_test_spark_task
        type: spark
        description: spark task on k8s
        image: my_spark_image
        executors: 2
        application_source: wordcount.py
        application_arguments:
          - words.txt


================================================
FILE: tests/runners/apps/test_spark_app/wordcount/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/runners/apps/test_spark_app/wordcount/requirements.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

pyyaml==5.4


================================================
FILE: tests/runners/apps/test_spark_app/wordcount/wordcount.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import sys
from operator import add

import yaml
from pyspark.sql import SparkSession

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: wordcount <file> output <outputlocation>", file=sys.stderr)
        sys.exit(-1)

    spark = SparkSession.builder.appName("PythonWordCount").getOrCreate()

    lines = spark.read.text(sys.argv[1]).rdd.filter(lambda x: not x[0].startswith('#')).map(lambda r: r[0])
    counts = lines.flatMap(lambda x: x.split(' ')).map(lambda x: (x, 1)).reduceByKey(add)
    output = counts.collect()
    for (word, count) in output:
        print("%s: %i" % (word, count))

    print(f"writing the results to {sys.argv[2]}")
    counts.toDF(["word", "count"]).coalesce(1).write.mode("overwrite").json(sys.argv[2])

    spark.stop()


================================================
FILE: tests/runners/apps/test_spark_app/wordcount/words.txt
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
my first liminal spark task


================================================
FILE: tests/test_licenses.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import pathlib
from unittest import TestCase

from termcolor import colored

EXCLUDED_EXTENSIONS = ['.gif', '.png', '.pyc', 'LICENSE', 'DISCLAIMER', 'DISCLAIMER-WIP', 'NOTICE', '.whl']
EXCLUDED_DIRS = [
    'docs/build',
    'build',
    'dist',
    '.git',
    '.idea',
    'venv',
    '.venv',
    'apache_liminal.egg-info',
    '.pytest_cache',
]
EXCLUDED_FILES = ['DISCLAIMER-WIP', 'LICENSE.txt', '.autovenv', 'credentials-aws.txt']

PYTHON_LICENSE_HEADER = """
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
""".strip().split(
    "\n"
)

MD_LICENSE_HEADER = """
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements.  See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.  The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License.  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied.  See the License for the
specific language governing permissions and limitations
under the License.
-->
""".strip().split(
    "\n"
)

RST_LICENSE_HEADER = """
..
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
   regarding copyright ownership.  The ASF licenses this file
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
..

..  http://www.apache.org/licenses/LICENSE-2.0

..
   Unless required by applicable law or agreed to in writing,
   software distributed under the License is distributed on an
   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   KIND, either express or implied.  See the License for the
   specific language governing permissions and limitations
   under the License.
..
""".strip().split(
    "\n"
)

BAT_LICENSE_HEADER = """
REM
REM Licensed to the Apache Software Foundation (ASF) under one
REM or more contributor license agreements.  See the NOTICE file
REM distributed with this work for additional information
REM regarding copyright ownership.  The ASF licenses this file
REM to you under the Apache License, Version 2.0 (the
REM "License"); you may not use this file except in compliance
REM with the License.  You may obtain a copy of the License at
REM
REM   http://www.apache.org/licenses/LICENSE-2.0
REM
REM Unless required by applicable law or agreed to in writing,
REM software distributed under the License is distributed on an
REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
REM KIND, either express or implied.  See the License for the
REM specific language governing permissions and limitations
REM under the License.
""".strip().split(
    "\n"
)

JSON_LICENSE_HEADER = """
  "": [
    "Licensed to the Apache Software Foundation (ASF) under one",
    "or more contributor license agreements.  See the NOTICE file",
    "distributed with this work for additional information",
    "regarding copyright ownership.  The ASF licenses this file",
    "to you under the Apache License, Version 2.0 (the",
    "\\"License\\"); you may not use this file except in compliance",
    "with the License.  You may obtain a copy of the License at",
    "",
    "  http://www.apache.org/licenses/LICENSE-2.0",
    "",
    "Unless required by applicable law or agreed to in writing,",
    "software distributed under the License is distributed on an",
    "\\"AS IS\\"BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY",
    "KIND, either express or implied.  See the License for the",
    "specific language governing permissions and limitations",
    "under the License."
  ],
""".strip().split(
    "\n"
)


class TestLicenses(TestCase):
    def test_licenses(self):
        files = []
        base_dir = os.path.join(pathlib.Path(__file__).parent.parent.absolute())
        print(f'Checking licenses for files in {base_dir}')
        for r, d, f in os.walk(base_dir):
            if not any(os.path.relpath(r, base_dir).startswith(excluded) for excluded in EXCLUDED_DIRS):
                for file in f:
                    if (
                        not any(os.path.basename(file).endswith(ext) for ext in EXCLUDED_EXTENSIONS)
                        and not os.path.basename(file) in EXCLUDED_FILES
                    ):
                        files.append(os.path.join(r, file))

        output = ''
        files_missing_license = []
        success = True
        for file in files:
            print(f'Checking license for file {file}')
            has_license = self.check_license(file)
            if not has_license:
                output += colored(f'Missing License: {file}\n', 'red')
                files_missing_license.append(file)
            success = success and has_license

        print(output)

        if not success:
            self.assertListEqual([], files_missing_license)

    @staticmethod
    def check_license(file):
        header_lines = PYTHON_LICENSE_HEADER
        if file.endswith('.md'):
            header_lines = MD_LICENSE_HEADER
        elif file.endswith('.rst'):
            header_lines = RST_LICENSE_HEADER
        elif file.endswith('.bat'):
            header_lines = BAT_LICENSE_HEADER
        elif file.endswith('.json'):
            header_lines = JSON_LICENSE_HEADER
            header_lines[0] = f'  {header_lines[0]}'
        with open(file) as f:
            file_lines = f.readlines()
        return all(f'{line}\n' in file_lines for line in header_lines)


================================================
FILE: tests/util/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/util/dag_test_utils.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


from datetime import datetime

from airflow import DAG


def create_dag():
    """
    Test util to create a basic DAG for testing.
    """

    return DAG(dag_id='test_dag', default_args={'start_date': datetime(1970, 1, 1)})


================================================
FILE: tests/util/test_class_utils.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from unittest import TestCase

from liminal.core.util import class_util
from tests.util.test_pkg_1.test_clazz_base import A
from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B
from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_2 import C
from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 import D
from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_2.test_clazz_leaf_2 import E


class Test(TestCase):
    def test_find_full_hierarchy_from_root(self):
        expected_set = {
            'test_clazz_child_1': B,
            'test_clazz_child_2': C,
            'test_clazz_leaf_1': D,
            'test_clazz_leaf_2': E,
        }
        self.hierarchy_check(A, expected_set)

    def hierarchy_check(self, clazz, expected_dict):
        pkg_root = 'tests.util.test_pkg_1'
        result_dict = class_util.find_subclasses_in_packages([pkg_root], clazz)
        self.assertDictEqual(result_dict, expected_dict)


================================================
FILE: tests/util/test_pkg_1/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/util/test_pkg_1/test_clazz_base.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


class A:
    pass


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


from tests.util.test_pkg_1.test_clazz_base import A


class B(A):
    pass


class M:
    pass


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


from tests.util.test_pkg_1.test_clazz_base import A


class C(A):
    pass


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


from tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B


class D(B):
    pass


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/__init__.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


================================================
FILE: tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py
================================================
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 import D


class E(D):
    pass


================================================
FILE: yamllint-config.yml
================================================
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

---
# https://yamllint.readthedocs.io/en/stable/
extends: default

locale: en_US.UTF-8

yaml-files:
  - '*.yaml'
  - '*.yml'
  - .yamllint

rules:
  line-length:
    max: 140
    # level: warning
  brackets: enable
  colons: enable
  commas: enable
  empty-lines: enable
  empty-values: disable
  key-duplicates: disable
  hyphens: enable
  key-ordering: disable
  new-line-at-end-of-file: enable
  new-lines: enable
  # quoted-strings: disable
  truthy:
    level: warning
  indentation:
    spaces: 2
    indent-sequences: true
ignore: |
  docs/
  .asf.yaml