[
  {
    "path": ".asf.yaml",
    "content": "---\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\ngithub:\n  description: Apache Liminals goal is to operationalise the machine learning process, allowing data scientists to quickly transition from a successful\n    experiment to an automated pipeline of model training, validation, deployment and inference in production. Liminal provides a Domain Specific Language\n    to build ML workflows on top of Apache Airflow.\n  homepage: https://liminal.apache.org\n  labels:\n    - data-science\n    - big-data\n    - machine-learning\n    - airflow\n    - ai\n    - ml\n    - workflows\n\nnotifications:\n  commits: commits@liminal.apache.org\n  issues: issues@liminal.apache.org\n  pullrequests: dev@liminal.apache.org\n  jira_options: link label worklog\n"
  },
  {
    "path": ".bumpversion.cfg",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n[bumpversion]\ncurrent_version = 0.0.1rc6\nparse = (?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\.?\\w*)(\\-(?P<release>[a-z]+)(?P<build>\\d+))?\nserialize =\n\t{major}.{minor}.{patch}-dev{build}\n\t{major}.{minor}.{patch}\n\n[bumpversion:part:release]\noptional_value = prod\nfirst_value = dev\nvalues =\n dev\n prod\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "content": "## Changes Title (replace this with a logical title for your changes)\n\n*Issue #, if available:*\n\n### *Description of changes:*\n\nReplace this with the PR description (mention the changes you have made, why you have made them, provide some background and any references to the provider documentation if needed, etc.).\n\nFor more information on contributing, please see [Contributing](../CONTRIBUTING.md)\nsection of our documentation.\n\n### Status\n\nReplace this: describe the PR status. Examples:\n\n- WIP - work in progress\n- DONE - ready for review\n\n### Checklist (tick everything that applies)\n\n- [ ] [PreCommitChecks - Code linting](../CONTRIBUTING.md#InstallRunPreCommit) (required)\n- [ ] [Tests](../CONTRIBUTING.md#RunningTests)\n"
  },
  {
    "path": ".github/workflows/pre_commits.yml",
    "content": "---\nname: PreCommitChecks\n\non:\n  push:\n    branches:\n      - master\n      # - '!release*'\n  pull_request:\n    branches:\n      - master\n      # - '!release*'\njobs:\n  linting:\n    name: Run pre-commit hooks on py3.6\n    runs-on: ubuntu-20.04\n    steps:\n      #----------------------------------------------\n      # Checkout, SetUp Python, Load Cache and Run PreCommitChecks\n      #----------------------------------------------\n      - uses: actions/checkout@v2\n        with:\n          fetch-depth: 0\n\n      # Install Python\n      - uses: actions/setup-python@v2\n        with:\n          python-version: 3.6\n\n      - name: Check Python ${{ matrix.python-version }} version\n        run: python -V\n\n      - uses: actions/cache@v2\n        with:\n          path: ~/.cache/pip\n          key: ${{ runner.os }}-pip\n          restore-keys: ${{ runner.os }}-pip\n\n      - name: Install python requirements\n        run: |\n          python -m pip install --upgrade pip\n          pip install -r requirements.txt\n          pip install pre-commit\n\n      # load cached venv if cache exists\n      - name: Load cached venv\n        id: cached-poetry-dependencies\n        uses: actions/cache@v2\n        with:\n          path: .venv\n          key: venv-${{ runner.os }}-${{ hashFiles('**/requirements.txt') }}\n\n      - run: pre-commit install\n      - name: Run pre-commit hooks on all the files\n        run: pre-commit run --all-files --show-diff-on-failure --color always --verbose\n      # - uses: pre-commit/action@v2.0.3\n      #   with:\n      #     token: ${{ secrets.GITHUB_TOKEN }}\n"
  },
  {
    "path": ".github/workflows/unittests.yml",
    "content": "---\nname: Running unittest\n\non:\n  # Trigger the workflow on push or pull request, but only for the master branch\n  push:\n    branches:\n      - master\n  pull_request:\n    branches:\n      - master\n  # Trigger the workflow on cron schedule\n  schedule:\n    - cron: '7 0 * * *'\n\njobs:\n  unittest:\n    runs-on: ubuntu-20.04\n    timeout-minutes: 20\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v1\n      - name: Setup Minikube\n        uses: manusa/actions-setup-minikube@v2.7.1\n        with:\n          minikube version: 'v1.26.1'\n          kubernetes version: 'v1.25.0'\n          github token: ${{ secrets.GITHUB_TOKEN }}\n      - name: Set up Python\n        uses: actions/setup-python@v1\n        with:\n          python-version: '3.6'\n      - name: Install python requirements\n        run: |\n          python -m pip install --upgrade pip\n          pip install -r requirements.txt\n      - name: Run unittest - runners\n        run: ./run_tests.sh\n  approve:\n    needs: unittest\n    runs-on: ubuntu-20.04\n\n    steps:\n      - run: | # approve the pull request\n          curl --request POST \\\n          --url https://api.github.com/repos/${{github.repository}}/pulls/${{github.event.number}}/reviews \\\n          --header 'authorization: Bearer ${{ secrets.GITHUB_TOKEN }}' \\\n          --header 'content-type: application/json' \\\n          -d '{\"event\":\"APPROVE\"}'\n"
  },
  {
    "path": ".github/workflows/versioning.yml",
    "content": "---\nname: Versioning RC\n\non:\n  workflow_run:\n    workflows: [Running unittest]\n    branches: [master]\n    types:\n      - completed\n\njobs:\n  versioning-auto-commit:\n    if: ${{ github.event.workflow_run.conclusion == 'success' }}\n    name: Publish package\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v1\n      - name: Set up Python\n        uses: actions/setup-python@v1\n        with:\n          python-version: '3.6'\n      - name: Install dependencies for setup\n        run: |\n          python -m pip install --upgrade pip\n          pip install bump2version\n      - name: Project versioning\n        run: bumpversion build\n      - name: Commit report\n        run: |\n          git config --global user.name 'Liminal Bot'\n          git commit -am \"Increment version\"\n          git push\n"
  },
  {
    "path": ".gitignore",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nenv\n.idea\nbin\ninclude\nlib\nvenv\n/build\n.Python\n*.pyc\npip-selfcheck.json\n.DS_Store\n/build\napache_liminal.egg-info\nscripts/*.tar.gz\nscripts/*.whl\ndist\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nminimum_pre_commit_version: 2.16.0\nexclude: >\n  (?x)^(\n      .+/.venv/.+|.+/dist/.+|.+/.autovenv|.+/docs/|.github\n  )$\nfail_fast: true\ndefault_language_version:\n  python: python3\ndefault_stages:\n  - prepare-commit-msg\n  - commit\n  # - push\n\nrepos:\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.0.1\n    hooks:\n      - id: check-case-conflict\n      - id: check-merge-conflict\n        stages:\n          - commit\n      - id: check-added-large-files\n        args: [--maxkb=1000]\n        stages:\n          - commit\n      - id: detect-aws-credentials\n        args:\n          - --allow-missing-credentials\n      - id: fix-encoding-pragma\n        args:\n          - --remove\n      - id: detect-private-key\n      - id: destroyed-symlinks\n      - id: mixed-line-ending\n      - id: trailing-whitespace\n      - id: check-toml\n      # To match test*.py instead\n      # - id: name-tests-test\n      #   args: [--django]\n      #   exclude: |\n      #     - tests/test_licenses\n      - id: end-of-file-fixer\n        description: Ensures that a file is either empty, or ends with one newline.\n        exclude_types: [sql]\n        # types: [text]\n      - id: pretty-format-json\n        args:\n          - --autofix\n          - --no-sort-keys\n          - --indent\n          - '2'\n        # files:\n        pass_filenames: true\n\n  - repo: https://github.com/Lucas-C/pre-commit-hooks\n    rev: v1.1.10\n    hooks:\n      - id: insert-license\n        name: Add license for all md files\n        files: \\.md$\n        exclude: ^\\.github/.*$\n        args:\n          - --comment-style\n          - <!--|| -->\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n      - id: insert-license\n        name: Add license for all SQL files\n        files: \\.sql$\n        exclude: ^\\.github/.*$|^airflow/_vendor/\n        args:\n          - --comment-style\n          - /*||*/\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n      - id: insert-license\n        name: Add license for all other files\n        exclude: ^\\.github/.*$|^airflow/_vendor/\n        args:\n          - --comment-style\n          - '|#|'\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n        files: >\n          \\.properties$|\\.cfg$|\\.conf$|\\.ini$|\\.ldif$|\\.readthedocs$|\\.service$|\\.tf$|Dockerfile.*$\n      - id: insert-license\n        name: Add license for all JINJA template files\n        files: ^airflow/www/templates/.*\\.html$|^docs/templates/.*\\.html$.*\\.jinja2\n        exclude: ^\\.github/.*$^airflow/_vendor/\n        args:\n          - --comment-style\n          - '{#||#}'\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n      - id: insert-license\n        name: Add license for all shell files\n        exclude: ^\\.github/.*$|^airflow/_vendor/\n        files: ^breeze$|^breeze-complete$|\\.sh$|\\.bash$|\\.bats$\n        args:\n          - --comment-style\n          - '|#|'\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n      - id: insert-license\n        name: Add license for all Python files\n        exclude: ^\\.github/.*$|^airflow/_vendor/\n        types: [python]\n        args:\n          - --comment-style\n          - '|#|'\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n      - id: insert-license\n        name: Add license for all YAML files\n        exclude: ^\\.github/.*$|^airflow/_vendor/\n        types: [yaml]\n        files: \\.yml$|\\.yaml$\n        args:\n          - --comment-style\n          - '|#|'\n          - --license-filepath\n          - dev/LICENSE.txt\n          - --fuzzy-match-generates-todo\n\n  # Python: Black formatter\n  - repo: https://github.com/psf/black\n    rev: 21.12b0\n    hooks:\n      - id: black\n        args: [--safe, --quiet, --config=./pyproject.toml]\n        files: \\.pyi?$\n        exclude: .github/\n        # override until resolved: https://github.com/psf/black/issues/402\n        types: []\n\n  # - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt\n  #   rev: 0.1.0\n  #   hooks:\n  #     - id: yamlfmt\n  #       args: [--mapping, '2', --sequence, '4', --offset, '2']\n\n  # Yaml: lint\n  - repo: https://github.com/adrienverge/yamllint\n    rev: v1.26.3\n    hooks:\n      - id: yamllint\n        name: Check YAML files with yamllint\n        entry: yamllint -c yamllint-config.yml # --strict\n        types: [yaml]\n\n  # Python - isort to sort imports in Python files\n  - repo: https://github.com/timothycrosley/isort\n    rev: 5.10.1\n    hooks:\n      - id: isort\n        name: Run isort to sort imports in Python files\n        args: [--profile, black]\n        files: \\.py$\n        # To keep consistent with the global isort skip config defined in setup.cfg\n        exclude: ^build/.*$|^.tox/.*$|^venv/.*$\n\n  # PyUpgrade - 3.6\n  - repo: https://github.com/asottile/pyupgrade\n    rev: v2.29.1\n    hooks:\n      - id: pyupgrade\n        args: [--py36-plus]\n        exclude: ^scripts/|^docs\n\n  - repo: https://github.com/asottile/blacken-docs\n    rev: v1.12.0\n    hooks:\n      - id: blacken-docs\n        alias: black\n        additional_dependencies: [black==21.9b0]\n\n  # - repo: https://github.com/PyCQA/bandit\n  #   rev: 1.7.1\n  #   hooks:\n  #     - id: bandit\n  #       args: [-ll, -r, liminal]\n  #       files: .py$\n  #       exclude: tests/test_licenses.py|^tests/runners\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Contributing to Liminal docs\n\nApache Liminal use Sphinx and readthedocs.org to create and publish documentation to readthedocs.org\n\n## Basic setup\nHere is what you need to do in order to work and create docs locally\n\n## Installing sphinx and documentation\n\nMore details  about sphinx and readthedocs can be found [here:](https://docs.readthedocs.io/en/stable/intro/getting-started-with-sphinx.html)\n\nHere is what you need to do:\n```\npip install sphinx\n```\nInside your project go to docs lib, update the docs you like and build them:\n```\ncd docs\nmake html\n```\n\n## Automatically rebuild sphinx docs\n```\npip install sphinx-autobuild\ncd docs\nsphinx-autobuild source/ build/html/\n```\nNow open a browser on\nhttp://localhost:8000/\nYour docs will automatically update\n\n## Rebuilding the pythondoc liminal code documentation\n\n```\ncd docs\nsphinx-apidoc -o source/ ../liminal\n```\n\n## RunningTests\n\nWhen doing local development and running Liminal unit-tests, make sure to set LIMINAL_STAND_ALONE_MODE=True\n\n1. Setup Minikube\n2.Install python requirements: `pip install -r requirements.txt`\n3. Run tests: `./run_tests.sh`\n\n## InstallRunPreCommit\n\n[pre-commit](https://pre-commit.com/) is used to install Python code linting and formatting tools:\n\n### Getting started\n\n**Requires `python >=3.6`, `pre-commit>=1.14` and a `git` repository**\n\n1. Run `pip install pre-commit` or `pip install  -r requirements.txt`\n2. Install the hooks:\n  Simple install without post hooks: `pre-commit install`\n\n  OR\n  Install the hooks: `pre-commit install --install-hooks`\n\n  Optional:\n  Install the post commit hooks: `pre-commit install --hook-type post-commit`\n\n1. To run pre commit hooks:\n   1. Either run `git commit` the new configuration files\n\n   1. Run `pre-commit run -a` to lint and format your entire project\n\nNow on every commit, `pre-commit` will use a git hook to run the tools.\n**Warning: the first commit will take some time because the tools are being installed by\n`pre-commit`**\n\n`pre-commit run -a` to lint and format your entire project\n\n### Resolving failed commits\n\n* If `black` fail, they have reformatted your code.\n* You should check the changes made. Then simply \"git add --update .\" and re-commit or `git add` and `git commit` the changes.\n\nExample:\n\n![Fixing black failed commits](https://user-images.githubusercontent.com/16241795/146011210-7bc11b24-2033-43f7-8150-5ece4fe7bfea.png)\n\n### EnabledHooks\n\n1. [black](https://black.readthedocs.io/en/stable/): a Python automatic code formatter\n1. [yamllint](https://yamllint.readthedocs.io/): A linter for YAML files.\n  yamllint does not only check for syntax validity, but for weirdnesses like key repetition and cosmetic problems such as lines length, trailing spaces, indentation, etc.\n1. [OutOfBoxHooks](https://github.com/pre-commit/pre-commit-hooks) - Out-of-the-box hooks for pre-commit like Check for files that  contain merge conflict strings\n1. [Bandit](https://bandit.readthedocs.io/en/latest/) is a tool designed to find common security issues in Python code.\n1. [blacken-docs](https://github.com/asottile/blacken-docs) Run `black` on python code blocks in documentation files\n1. [pyupgrade](https://github.com/asottile/pyupgrade) A tool (and pre-commit hook) to automatically upgrade syntax for newer versions of the language.\n1. [isort](https://pycqa.github.io/isort/) A Python utility / library to sort imports.\n"
  },
  {
    "path": "DISCLAIMER-WIP",
    "content": "Apache Liminal is an effort undergoing incubation at the Apache Software\nFoundation (ASF), sponsored by the Apache Incubator PMC.\n\nIncubation is required of all newly accepted projects until a further review\nindicates that the infrastructure, communications, and decision making process\nhave stabilized in a manner consistent with other successful ASF projects.\n\nWhile incubation status is not necessarily a reflection of the completeness\nor stability of the code, it does indicate that the project has yet to be\nfully endorsed by the ASF.\n\nSome of the incubating project’s releases may not be fully compliant with ASF policy.\nFor example, releases may have incomplete or un-reviewed licensing conditions.\nWhat follows is a list of known issues the project is currently aware of\n(note that this list, by definition, is likely to be incomplete):\n\n1. The Liminal source code is distributed under the Apache License, Version 2.0. However\nbuilding Liminal requires using a transitive required library chardet V 4.0.0, which is under LGPL license, Version 2.1.\n\nIf you are planning to incorporate this work into your product/project,\nplease be aware that you will need to conduct a thorough licensing review\nto determine the overall implications of including this work.\n"
  },
  {
    "path": "LICENSE",
    "content": "                              Apache License\n                        Version 2.0, January 2004\n                     http://www.apache.org/licenses/\n\nTERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n1. Definitions.\n\n   \"License\" shall mean the terms and conditions for use, reproduction,\n   and distribution as defined by Sections 1 through 9 of this document.\n\n   \"Licensor\" shall mean the copyright owner or entity authorized by\n   the copyright owner that is granting the License.\n\n   \"Legal Entity\" shall mean the union of the acting entity and all\n   other entities that control, are controlled by, or are under common\n   control with that entity. For the purposes of this definition,\n   \"control\" means (i) the power, direct or indirect, to cause the\n   direction or management of such entity, whether by contract or\n   otherwise, or (ii) ownership of fifty percent (50%) or more of the\n   outstanding shares, or (iii) beneficial ownership of such entity.\n\n   \"You\" (or \"Your\") shall mean an individual or Legal Entity\n   exercising permissions granted by this License.\n\n   \"Source\" form shall mean the preferred form for making modifications,\n   including but not limited to software source code, documentation\n   source, and configuration files.\n\n   \"Object\" form shall mean any form resulting from mechanical\n   transformation or translation of a Source form, including but\n   not limited to compiled object code, generated documentation,\n   and conversions to other media types.\n\n   \"Work\" shall mean the work of authorship, whether in Source or\n   Object form, made available under the License, as indicated by a\n   copyright notice that is included in or attached to the work\n   (an example is provided in the Appendix below).\n\n   \"Derivative Works\" shall mean any work, whether in Source or Object\n   form, that is based on (or derived from) the Work and for which the\n   editorial revisions, annotations, elaborations, or other modifications\n   represent, as a whole, an original work of authorship. For the purposes\n   of this License, Derivative Works shall not include works that remain\n   separable from, or merely link (or bind by name) to the interfaces of,\n   the Work and Derivative Works thereof.\n\n   \"Contribution\" shall mean any work of authorship, including\n   the original version of the Work and any modifications or additions\n   to that Work or Derivative Works thereof, that is intentionally\n   submitted to Licensor for inclusion in the Work by the copyright owner\n   or by an individual or Legal Entity authorized to submit on behalf of\n   the copyright owner. For the purposes of this definition, \"submitted\"\n   means any form of electronic, verbal, or written communication sent\n   to the Licensor or its representatives, including but not limited to\n   communication on electronic mailing lists, source code control systems,\n   and issue tracking systems that are managed by, or on behalf of, the\n   Licensor for the purpose of discussing and improving the Work, but\n   excluding communication that is conspicuously marked or otherwise\n   designated in writing by the copyright owner as \"Not a Contribution.\"\n\n   \"Contributor\" shall mean Licensor and any individual or Legal Entity\n   on behalf of whom a Contribution has been received by Licensor and\n   subsequently incorporated within the Work.\n\n2. Grant of Copyright License. Subject to the terms and conditions of\n   this License, each Contributor hereby grants to You a perpetual,\n   worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n   copyright license to reproduce, prepare Derivative Works of,\n   publicly display, publicly perform, sublicense, and distribute the\n   Work and such Derivative Works in Source or Object form.\n\n3. Grant of Patent License. Subject to the terms and conditions of\n   this License, each Contributor hereby grants to You a perpetual,\n   worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n   (except as stated in this section) patent license to make, have made,\n   use, offer to sell, sell, import, and otherwise transfer the Work,\n   where such license applies only to those patent claims licensable\n   by such Contributor that are necessarily infringed by their\n   Contribution(s) alone or by combination of their Contribution(s)\n   with the Work to which such Contribution(s) was submitted. If You\n   institute patent litigation against any entity (including a\n   cross-claim or counterclaim in a lawsuit) alleging that the Work\n   or a Contribution incorporated within the Work constitutes direct\n   or contributory patent infringement, then any patent licenses\n   granted to You under this License for that Work shall terminate\n   as of the date such litigation is filed.\n\n4. Redistribution. You may reproduce and distribute copies of the\n   Work or Derivative Works thereof in any medium, with or without\n   modifications, and in Source or Object form, provided that You\n   meet the following conditions:\n\n   (a) You must give any other recipients of the Work or\n       Derivative Works a copy of this License; and\n\n   (b) You must cause any modified files to carry prominent notices\n       stating that You changed the files; and\n\n   (c) You must retain, in the Source form of any Derivative Works\n       that You distribute, all copyright, patent, trademark, and\n       attribution notices from the Source form of the Work,\n       excluding those notices that do not pertain to any part of\n       the Derivative Works; and\n\n   (d) If the Work includes a \"NOTICE\" text file as part of its\n       distribution, then any Derivative Works that You distribute must\n       include a readable copy of the attribution notices contained\n       within such NOTICE file, excluding those notices that do not\n       pertain to any part of the Derivative Works, in at least one\n       of the following places: within a NOTICE text file distributed\n       as part of the Derivative Works; within the Source form or\n       documentation, if provided along with the Derivative Works; or,\n       within a display generated by the Derivative Works, if and\n       wherever such third-party notices normally appear. The contents\n       of the NOTICE file are for informational purposes only and\n       do not modify the License. You may add Your own attribution\n       notices within Derivative Works that You distribute, alongside\n       or as an addendum to the NOTICE text from the Work, provided\n       that such additional attribution notices cannot be construed\n       as modifying the License.\n\n   You may add Your own copyright statement to Your modifications and\n   may provide additional or different license terms and conditions\n   for use, reproduction, or distribution of Your modifications, or\n   for any such Derivative Works as a whole, provided Your use,\n   reproduction, and distribution of the Work otherwise complies with\n   the conditions stated in this License.\n\n5. Submission of Contributions. Unless You explicitly state otherwise,\n   any Contribution intentionally submitted for inclusion in the Work\n   by You to the Licensor shall be under the terms and conditions of\n   this License, without any additional terms or conditions.\n   Notwithstanding the above, nothing herein shall supersede or modify\n   the terms of any separate license agreement you may have executed\n   with Licensor regarding such Contributions.\n\n6. Trademarks. This License does not grant permission to use the trade\n   names, trademarks, service marks, or product names of the Licensor,\n   except as required for reasonable and customary use in describing the\n   origin of the Work and reproducing the content of the NOTICE file.\n\n7. Disclaimer of Warranty. Unless required by applicable law or\n   agreed to in writing, Licensor provides the Work (and each\n   Contributor provides its Contributions) on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n   implied, including, without limitation, any warranties or conditions\n   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n   PARTICULAR PURPOSE. You are solely responsible for determining the\n   appropriateness of using or redistributing the Work and assume any\n   risks associated with Your exercise of permissions under this License.\n\n8. Limitation of Liability. In no event and under no legal theory,\n   whether in tort (including negligence), contract, or otherwise,\n   unless required by applicable law (such as deliberate and grossly\n   negligent acts) or agreed to in writing, shall any Contributor be\n   liable to You for damages, including any direct, indirect, special,\n   incidental, or consequential damages of any character arising as a\n   result of this License or out of the use or inability to use the\n   Work (including but not limited to damages for loss of goodwill,\n   work stoppage, computer failure or malfunction, or any and all\n   other commercial damages or losses), even if such Contributor\n   has been advised of the possibility of such damages.\n\n9. Accepting Warranty or Additional Liability. While redistributing\n   the Work or Derivative Works thereof, You may choose to offer,\n   and charge a fee for, acceptance of support, warranty, indemnity,\n   or other liability obligations and/or rights consistent with this\n   License. However, in accepting such obligations, You may act only\n   on Your own behalf and on Your sole responsibility, not on behalf\n   of any other Contributor, and only if You agree to indemnify,\n   defend, and hold each Contributor harmless for any liability\n   incurred by, or claims asserted against, such Contributor by reason\n   of your accepting any such warranty or additional liability.\n\nEND OF TERMS AND CONDITIONS\n\nAPPENDIX: How to apply the Apache License to your work.\n\n   To apply the Apache License to your work, attach the following\n   boilerplate notice, with the fields enclosed by brackets \"[]\"\n   replaced with your own identifying information. (Don't include\n   the brackets!)  The text should be enclosed in the appropriate\n   comment syntax for the file format. We also recommend that a\n   file or class name and description of purpose be included on the\n   same \"printed page\" as the copyright notice for easier\n   identification within third-party archives.\n\nCopyright [yyyy] [name of copyright owner]\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\ninclude scripts/* requirements.txt requirements-airflow.txt\nrecursive-include liminal/build/ *\n"
  },
  {
    "path": "NOTICE",
    "content": "Apache Liminal\nCopyright 2020-2023 The Apache Software Foundation\n\nThis product includes software developed at\nThe Apache Software Foundation (http://www.apache.org/).\nLicensed under the Apache License 2.0.\n"
  },
  {
    "path": "README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Apache Liminal\n\nApache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,\ntrain and deploy machine learning models in a robust and agile way.\n\nThe platform provides the abstractions and declarative capabilities for\ndata extraction & feature engineering followed by model training and serving.\nLiminal's goal is to operationalize the machine learning process, allowing data scientists to\nquickly transition from a successful experiment to an automated pipeline of model training,\nvalidation, deployment and inference in production, freeing them from engineering and\nnon-functional tasks, and allowing them to focus on machine learning code and artifacts.\n\n# Basics\n\nUsing simple YAML configuration, create your own schedule data pipelines (a sequence of tasks to\nperform), application servers,  and more.\n\n## Getting Started\n\nA simple getting stated guide for Liminal can be found [here](docs/getting-started/hello_world.md)\n\n## Apache Liminal Documentation\n\nFull documentation of Apache Liminal can be found [here](docs/liminal)\n\n## High Level Architecture\n\nHigh level architecture documentation can be found [here](docs/architecture.md)\n\n## Example YAML config file\n\n```yaml\n---\nname: MyLiminalStack\nowner: Bosco Albert Baracus\nvolumes:\n  - volume: myvol1\n    local:\n      path: /Users/me/myvol1\nimages:\n  - image: my_python_task_img\n    type: python\n    source: write_inputs\n  - image: my_parallelized_python_task_img\n    source: write_outputs\n  - image: my_server_image\n    type: python_server\n    source: myserver\n    endpoints:\n      - endpoint: /myendpoint1\n        module: my_server\n        function: myendpoint1func\npipelines:\n  - pipeline: my_pipeline\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    metrics:\n      namespace: TestNamespace\n      backends: [ 'cloudwatch' ]\n    tasks:\n      - task: my_python_task\n        type: python\n        description: static input task\n        image: my_python_task_img\n        env_vars:\n          NUM_FILES: 10\n          NUM_SPLITS: 3\n        mounts:\n          - mount: mymount\n            volume: myvol1\n            path: /mnt/vol1\n        cmd: python -u write_inputs.py\n      - task: my_parallelized_python_task\n        type: python\n        description: parallelized python task\n        image: my_parallelized_python_task_img\n        env_vars:\n          FOO: BAR\n        executors: 3\n        mounts:\n          - mount: mymount\n            volume: myvol1\n            path: /mnt/vol1\n        cmd: python -u write_inputs.py\nservices:\n  - service: my_python_server\n    description: my python server\n    image: my_server_image\n```\n\n# Installation\n\n1. Install this repository (HEAD)\n\n```bash\n   pip install git+https://github.com/apache/incubator-liminal.git\n```\n\n2. Optional: set LIMINAL_HOME to path of your choice (if not set, will default to ~/liminal_home)\n\n```bash\necho 'export LIMINAL_HOME=</path/to/some/folder>' >> ~/.bash_profile && source ~/.bash_profile\n```\n\n# Authoring pipelines\n\nThis involves at minimum creating a single file called liminal.yml as in the example above.\n\nIf your pipeline requires custom python code to implement tasks, they should be organized\n[like this](https://github.com/apache/incubator-liminal/tree/master/tests/runners/airflow/liminal)\n\nIf your pipeline  introduces imports of external packages which are not already a part\nof the liminal framework (i.e. you had to pip install them yourself), you need to also provide\na requirements.txt in the root of your project.\n\n# Testing the pipeline locally\n\nWhen your pipeline code is ready, you can test it by running it locally on your machine.\n\n1. Ensure you have The Docker engine running locally, and enable a local Kubernetes cluster:\n\n  ![Kubernetes configured](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/k8s_running.png)\n\n  And allocate it at least 3 CPUs (under \"Resources\" in the Docker preference UI).\n\n  If you want to execute your pipeline on a remote kubernetes cluster, make sure the cluster is configured using:\n\n  ```bash\n  kubectl config set-context <your remote kubernetes cluster>\n  ```\n\n2. Build the docker images used by your pipeline.\n\nIn the example pipeline above, you can see that tasks and services have an \"image\" field - such as\n\"my_static_input_task_image\". This means that the task is executed inside a docker container, and the docker container\nis created from a docker image where various code and libraries are installed.\n\nYou can take a look at what the build process looks like, e.g.\n[here](https://github.com/apache/incubator-liminal/tree/master/liminal/build/image/python)\n\nIn order for the images to be available for your pipeline, you'll need to build them locally:\n\n```bash\ncd </path/to/your/liminal/code>\nliminal build\n```\n\nYou'll see that a number of outputs indicating various docker images built.\n\n3. Create a kubernetes local volume \\\nIn case your Yaml includes working with [volumes](https://github.com/apache/incubator-liminal/blob/6253f8b2c9dc244af032979ec6d462dc3e07e170/docs/getting_started.md#mounted-volumes)\nplease first run the following command:\n\n```bash\ncd </path/to/your/liminal/code>\nliminal create\n```\n\n4. Deploy the pipeline:\n\n```bash\ncd </path/to/your/liminal/code>\nliminal deploy\n```\n\nNote: after upgrading liminal, it's recommended to issue the command\n\n```bash\nliminal deploy --clean\n```\n\nThis will rebuild the airlfow docker containers from scratch with a fresh version of liminal, ensuring consistency.\n\n5. Start the server\n\n```bash\nliminal start\n```\n\n6. Stop the server\n\n```bash\nliminal stop\n```\n\n7. Display the server logs\n\n```bash\nliminal logs --follow/--tail\n\nNumber of lines to show from the end of the log:\nliminal logs --tail=10\n\nFollow log output:\nliminal logs --follow\n```\n\n8. Navigate to [http://localhost:8080/admin](http://localhost:8080/admin)\n\n9. You should see your ![pipeline](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/airflow.png)\nThe pipeline is scheduled to run according to the ```json schedule: 0 * 1 * *``` field in the .yml file you provided.\n\n10. To manually activate your pipeline:\n\n- Click your pipeline and then click \"trigger DAG\"\n- Click \"Graph view\"\nYou should see the steps in your pipeline getting executed in \"real time\" by clicking \"Refresh\" periodically.\n\n![Pipeline activation](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/airflow_trigger.png)\n\n# Contributing\n\nMore information on contributing can be found [here](CONTRIBUTING.md)\n\n# Community\n\nThe Liminal community holds a public call every Monday\n\n- [Liminal Community Calendar](https://calendar.google.com/calendar/u/0/r?cid=jom1i20emghura6s6ookhe2skk@group.calendar.google.com)\n- [Dev-Mailing-List](https://lists.apache.org/list.html?dev@liminal.apache.org)\n\n## Running Tests (for contributors)\n\nWhen doing local development and running Liminal unit-tests, make sure to set LIMINAL_STAND_ALONE_MODE=True\n"
  },
  {
    "path": "RETIRED.txt",
    "content": "This podling has been retired. Please see http://incubator.apache.org/projects/index.html#liminal\n"
  },
  {
    "path": "dev/LICENSE.txt",
    "content": "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n"
  },
  {
    "path": "dev/RELEASE.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n<!-- START doctoc generated TOC please keep comment here to allow auto update -->\n<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->\n\n\n- [Apache Liminal source releases](#apache-liminal-source-releases)\n  - [Apache Liminal Package](#apache-liminal-package)\n- [Prerequisites for the release manager preparing the release](#prerequisites-for-the-release-manager-preparing-the-release)\n  - [version IDs](#version-ids)\n  - [Testing the release locally](#testing-the-release-locally)\n  - [Upload Public keys to id.apache.org](#upload-public-keys-to-idapacheorg)\n  - [Configure PyPI uploads](#configure-pypi-uploads)\n  - [Hardware used to prepare and verify the packages](#hardware-used-to-prepare-and-verify-the-packages)\n- [Apache Liminal packages](#apache-liminal-packages)\n  - [Prepare the Apache Liminal Package RC](#prepare-the-apache-liminal-package-rc)\n    - [Build RC artifacts (both source packages and convenience packages)](#build-rc-artifacts-both-source-packages-and-convenience-packages)\n    - [Prepare PyPI convenience \"snapshot\" packages](#prepare-pypi-convenience-snapshot-packages)\n  - [Vote and verify the Apache Liminal release candidate](#vote-and-verify-the-apache-liminal-release-candidate)\n    - [Prepare Vote email on the Apache Liminal release candidate](#prepare-vote-email-on-the-apache-liminal-release-candidate)\n    - [Verify the release candidate by PMCs (legal)](#verify-the-release-candidate-by-pmcs-legal)\n      - [PMC responsibilities](#pmc-responsibilities)\n      - [SVN check](#svn-check)\n      - [Verify the licences](#verify-the-licences)\n      - [Verify the signatures](#verify-the-signatures)\n      - [Verify the SHA512 sum](#verify-the-sha512-sum)\n    - [Verify if the release candidate \"works\" by Contributors](#verify-if-the-release-candidate-works-by-contributors)\n  - [Publish the final Apache Liminal release](#publish-the-final-apache-liminal-release)\n    - [Summarize the voting for the Apache Liminal release](#summarize-the-voting-for-the-apache-liminal-release)\n    - [Publish release to SVN](#publish-release-to-svn)\n    - [Prepare PyPI \"release\" packages](#prepare-pypi-release-packages)\n    - [Update CHANGELOG.md](#update-changelogmd)\n    - [Notify developers of release](#notify-developers-of-release)\n\n<!-- END doctoc generated TOC please keep comment here to allow auto update -->\n\n# Apache Liminal source releases\n\n## Apache Liminal Package\n\nThis package contains sources that allow the user building fully-functional Apache Liminal package.\nThey contain sources for \"apache-liminal\" python package that installs \"liminal\" Python package and includes\nall the assets required to release the webserver UI coming with Apache Liminal\n\nThe Source releases are the only \"official\" Apache Software Foundation releases, and they are distributed\nvia [Official Apache Download sources](https://downloads.apache.org/)\n\nFollowing source releases Apache Liminal release manager also distributes convenience packages:\n\n* PyPI packages released via https://pypi.org/project/apache-liminal/\n* Docker Images released via https://hub.docker.com/repository/docker/apache/liminal (coming soon...)\n\nThose convenience packages are not \"official releases\" of Apache Liminal, but the users who\ncannot or do not want to build the packages themselves can use them as a convenient way of installing\nApache Liminal, however they are not considered as \"official source releases\". You can read more\ndetails about it in the [ASF Release Policy](http://www.apache.org/legal/release-policy.html).\n\nThis document describes the process of releasing both - official source packages and convenience\npackages for Apache Liminal packages.\n\n# Prerequisites for the release manager preparing the release\n\nThe person acting as release manager has to fulfill certain pre-requisites. More details and FAQs are\navailable in the [ASF Release Policy](http://www.apache.org/legal/release-policy.html) but here some important\npre-requisites are listed below. Note that release manager does not have to be a PMC - it is enough\nto be committer to assume the release manager role, but there are final steps in the process (uploading\nfinal releases to SVN) that can only be done by PMC member. If needed, the release manager\ncan ask PMC to perform that final step of release.\n\n## version IDs\n\nShould follow https://www.python.org/dev/peps/pep-0440/\n\n## Testing the release locally\n\n- Build a local version of liminal:\n```bash\n# Set Version\nexport LIMINAL_BUILD_VERSION=0.0.1rc1-incubating\npython setup.py sdist bdist_wheel\n```\n\n- Start a test project with a liminal .yml file in it\n\n- Clear folder $LIMINAL_HOME\n\n- Install liminal in the test project\n\n```bash\npip install <path to lminal .whl created by setup.py>\nliminal build\nliminal deploy --clean\nliminal start\n```\n\nNote:\nWhen you run liminal deploy, a liminal installs itself inside airflow container.\nYou can control which version of liminal gets installed inside docker using LIMINAL_VERSION environment variable.\n\nThis can be any string which results in a legal call to:\n```bash\npip install ${LIMINAL_VERSION}\n```\n\nThis includes\n- A standard pip version like apache-liminal==0.0.1dev1 avail from pypi\n- A URL for git e.g. git+https://github.com/apache/incubator-liminal.git\n- A string indicating where to get the package from like --index <url> apache-liminal==xyz\n- A local path to a .whl which is available inside the docker (e.g. which you placed\nin the scripts/ folder)\n\nThis is useful if you are making changes in liminal locally and want to test them.\n\nIf you don't specify this variable, liminal attempts to discover how to install itself\nby running a pip freeze and looking at the result.\nThis covers pip repositories, files and installtion from URL.\n\nThe fallback in case no string is found, is simply 'apache-liminal' assuming your .pypirc contains an\nindex which has this package.\n\n\n## Testing a version from testpypi:\n\nInstalling liminal locally:\npip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple apache-liminal\n\nTell liminal where to take the version to put inside the airflow docker:\nexport LIMINAL_VERSION='--index-url https://test.pypi.org/simple/ apache-liminal==0.0.1rc1-incubating'\n\n## Upload Public keys to id.apache.org\n\nMake sure your public key is on id.apache.org and in KEYS. You will need to sign the release artifacts\nwith your pgp key. After you have created a key, make sure you:\n\n- Add your GPG pub key to https://dist.apache.org/repos/dist/release/liminal/KEYS , follow the instructions at the top of that file. Upload your GPG public key to https://pgp.mit.edu\n- Add your key fingerprint to https://id.apache.org/ (login with your apache credentials, paste your fingerprint into the pgp fingerprint field and hit save).\n\n```shell script\n# Create PGP Key\ngpg --gen-key\n\n# Checkout ASF dist repo\nsvn checkout https://dist.apache.org/repos/dist/release/incubator/liminal\ncd liminal\n\n\n# Add your GPG pub key to KEYS file. Replace \"Aviem Zur\" with your name\n(gpg --list-sigs \"Aviem Zur\" && gpg --armor --export \"Aviem Zur\" ) >> KEYS\n\n\n# Commit the changes\nsvn commit -m \"Add PGP keys of Liminal developers\"\n```\n\nSee this for more detail on creating keys and what is required for signing releases.\n\nhttp://www.apache.org/dev/release-signing.html#basic-facts\n\n## Configure PyPI uploads\n\nIn order to not reveal your password in plain text, it's best if you create and configure API Upload tokens.\nYou can add and copy the tokens here:\n\n* [Test PyPI](https://test.pypi.org/manage/account/token/)\n* [Prod PyPI](https://pypi.org/manage/account/token/)\n\n\nCreate a `~/.pypirc` file:\n\n```ini\n[distutils]\nindex-servers =\n  pypi\n  pypitest\n\n[pypi]\nusername=__token__\npassword=<API Upload Token>\n\n[pypitest]\nrepository=https://test.pypi.org/legacy/\nusername=__token__\npassword=<API Upload Token>\n```\n\nSet proper permissions for the pypirc file:\n\n```shell script\n$ chmod 600 ~/.pypirc\n```\n\n- Install [twine](https://pypi.org/project/twine/) if you do not have it already (it can be done\n  in a separate virtual environment).\n\n```shell script\npip install twine\n```\n\n(more details [here](https://peterdowns.com/posts/first-time-with-pypi.html).)\n\n- Set proper permissions for the pypirc file:\n`$ chmod 600 ~/.pypirc`\n\n## Hardware used to prepare and verify the packages\n\nThe best way to prepare and verify the releases is to prepare them on a hardware owned and controlled\nby the committer acting as release manager. While strictly speaking, releases must only be verified\non hardware owned and controlled by the committer, for practical reasons it's best if the packages are\nprepared using such hardware. More information can be found in this\n[FAQ](http://www.apache.org/legal/release-policy.html#owned-controlled-hardware)\n\n# Apache Liminal packages\n\n## Prepare the Apache Liminal Package RC\n\n### Build RC artifacts (both source packages and convenience packages)\n\nThe Release Candidate artifacts we vote upon should be the exact ones we vote against, without any modification than renaming – i.e. the contents of the files must be the same between voted release canidate and final release. Because of this the version in the built artifacts that will become the official Apache releases must not include the rcN suffix.\n\n- Set environment variables\n\n```bash\n# Set Version\nexport LIMINAL_BUILD_VERSION=0.0.1rc1-incubating\n\n# Example after cloning\ngit clone https://github.com/apache/incubating-liminal.git\ncd incubating-liminal\n```\n\n- Mark the next version\n```bash\n# Set Version\nsed -i '/^current_version /s/=.*$/= '\"$LIMINAL_BUILD_VERSION\"'/' .bumpversion.cfg\n```\n\n- Tag your release\n\n```bash\ngit tag -a ${LIMINAL_BUILD_VERSION} -m \"some message\"\n\n```\n\n- Clean the checkout: the sdist step below will\n\n```bash\ngit clean -fxd\n```\n\n- Tarball the repo\n\n```bash\ngit archive --format=tar.gz ${LIMINAL_BUILD_VERSION} --prefix=apache-liminal-${LIMINAL_BUILD_VERSION}/ -o apache-liminal-${LIMINAL_BUILD_VERSION}-source.tar.gz\n```\n\n\n- Generate sdist\n\n    NOTE: Make sure your checkout is clean at this stage - any untracked or changed files will otherwise be included\n     in the file produced.\n\n```bash\npython setup.py sdist bdist_wheel\n```\n\n- Generate SHA512/ASC (If you have not generated a key yet, generate it by following instructions on http://www.apache.org/dev/openpgp.html#key-gen-generate-key)\n\n```bash\ndev/sign.sh apache-liminal-${LIMINAL_BUILD_VERSION}-source.tar.gz\ndev/sign.sh dist/apache-liminal-${LIMINAL_BUILD_VERSION}.tar.gz\ndev/sign.sh dist/apache_liminal-${LIMINAL_BUILD_VERSION}-py3-none-any.whl\n```\n\n- Push Tags\n\n```bashs\ngit push origin ${LIMINAL_BUILD_VERSION}\n```\n\n- Push the artifacts to ASF dev dist repo\n```\n# First clone the repo\nsvn checkout https://dist.apache.org/repos/dist/dev/liminal liminal-dev\n\n# Create new folder for the release\ncd liminal-dev\nsvn mkdir ${LIMINAL_BUILD_VERSION}\n\n# Move the artifacts to svn folder & commit\nmv apache{-,_}liminal-${LIMINAL_BUILD_VERSION}* ${LIMINAL_BUILD_VERSION}/\ncd ${LIMINAL_BUILD_VERSION}\nsvn add *\nsvn commit -m \"Add artifacts for Liminal ${LIMINAL_BUILD_VERSION}\"\n```\n\n### Prepare PyPI convenience \"snapshot\" packages\n\nAt this point we have the artefact that we vote on, but as a convenience to developers we also want to\npublish \"snapshots\" of the RC builds to pypi for installing via pip. To do this we need to\n\n- Verify the artifacts that would be uploaded:\n\n```bash\ntwine check dist/*\n```\n\n- Upload the package to PyPi's test environment:\n\n```bash\ntwine upload -r pypitest dist/*\n```\n\n- Verify that the test package looks good by downloading it and installing it into a virtual environment. The package download link is available at:\nhttps://test.pypi.org/project/apache-liminal/#files\n\n- Upload the package to PyPi's production environment:\n`twine upload -r pypi dist/*`\n\n- Again, confirm that the package is available here:\nhttps://pypi.python.org/pypi/apache-liminal\n\n\nIt is important to stress that this snapshot should not be named \"release\", and it\nis not supposed to be used by and advertised to the end-users who do not read the devlist.\n\n## Vote and verify the Apache Liminal release candidate\n\n### Prepare Vote email on the Apache Liminal release candidate\n\n- Use the dev/liminal-jira script to generate a list of Liminal JIRAs that were closed in the release.\n\n- Send out a vote to the dev@liminal.apache.org mailing list:\n\nSubject:\n```\n[VOTE] Liminal 1.10.2rc3\n```\n\nBody:\n\n```\nHey all,\n\nI have cut Liminal 1.10.2 RC3. This email is calling a vote on the release,\nwhich will last for 72 hours. Consider this my (binding) +1.\n\nLiminal 1.10.2 RC3 is available at:\nhttps://dist.apache.org/repos/dist/dev/liminal/1.10.2rc3/\n\n*apache-liminal-1.10.2rc3-source.tar.gz* is a source release that comes\nwith INSTALL instructions.\n*apache-liminal-1.10.2rc3-bin.tar.gz* is the binary Python \"sdist\" release.\n\nPublic keys are available at:\nhttps://dist.apache.org/repos/dist/release/liminal/KEYS\n\nOnly votes from PMC members are binding, but the release manager should encourage members of the community\nto test the release and vote with \"(non-binding)\".\n\nThe test procedure for PMCs and Contributors who would like to test this RC are described in\nhttps://github.com/apache/liminal/blob/master/dev/README.md#vote-and-verify-the-apache-liminal-release-candidate\n\nPlease note that the version number excludes the `rcX` string, so it's now\nsimply 1.10.2. This will allow us to rename the artifact without modifying\nthe artifact checksums when we actually release.\n\n\nChanges since 1.10.2rc2:\n*Bugs*:\n[LIMINAL-3732] ...\n...\n\n\n*Improvements*:\n[LIMINAL-3302] ...\n...\n\n\n*New features*:\n[LIMINAL-2874] ...\n...\n\n\n*Doc-only Change*:\n[LIMINAL-XXX] Fix Minor issues in Documentation\n...\n\nCheers,\n<your name>\n```\n\n### Verify the release candidate by PMCs (legal)\n\n#### PMC responsibilities\n\nThe PMCs should verify the releases in order to make sure the release is following the\n[Apache Legal Release Policy](http://www.apache.org/legal/release-policy.html).\n\nAt least 3 (+1) votes should be recorded in accordance to\n[Votes on Package Releases](https://www.apache.org/foundation/voting.html#ReleaseVotes)\n\nThe legal checks include:\n\n* checking if the packages are present in the right dist folder on svn\n* verifying if all the sources have correct licences\n* verifying if release manager signed the releases with the right key\n* verifying if all the checksums are valid for the release\n\n#### SVN check\n\nThe files should be present in the sub-folder of\n[Liminal dist](https://dist.apache.org/repos/dist/dev/incubator/liminal/)\n\nThe following files should be present (9 files):\n\n* .tar.gz + .asc + .sha512\n* -.whl + .asc + .sha512\n\nAs a PMC you should be able to clone the SVN repository:\n\n```shell script\n    svn co https://dist.apache.org/repos/dist/dev/incubator/liminal\n```\n\nOr update it if you already checked it out:\n\n```shell script\nsvn update .\n```\n\n#### Verify the licences\n\nThis can be done with the Apache RAT tool.\n\n* Download the latest jar from https://creadur.apache.org/rat/download_rat.cgi (unpack the sources,\n  the jar is inside)\n* Unpack the .tar.gz to a folder\n* Enter the folder and run the check (point to the place where you extracted the .jar)\n\n```shell script\njava -jar ../../apache-rat-0.13/apache-rat-0.13.jar -E .rat-excludes -d .\n```\n\n#### Verify the signatures\n\nMake sure you have the key of person signed imported in your GPG. You can find the valid keys in\n[KEYS](https://dist.apache.org/repos/dist/release/liminal/KEYS).\n\nYou can import the whole KEYS file:\n\n```shell script\ngpg --import KEYS\n```\n\nYou can also import the keys individually from a keyserver. The below one uses Kaxil's key and\nretrieves it from the default GPG keyserver\n[OpenPGP.org](https://keys.openpgp.org):\n\n```shell script\ngpg --receive-keys 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B\n```\n\nYou should choose to import the key when asked.\n\nNote that by being default, the OpenPGP server tends to be overloaded often and might respond with\nerrors or timeouts. Many of the release managers also uploaded their keys to the\n[GNUPG.net](https://keys.gnupg.net) keyserver, and you can retrieve it from there.\n\n```shell script\ngpg --keyserver keys.gnupg.net --receive-keys 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B\n```\n\nOnce you have the keys, the signatures can be verified by running this:\n\n```shell script\nfor i in *.asc\ndo\n   echo \"Checking $i\"; gpg --verify `basename $i .sha512 `\ndone\n```\n\nThis should produce results similar to the below. The \"Good signature from ...\" is indication\nthat the signatures are correct. Do not worry about the \"not certified with a trusted signature\"\nwarning. Most of the certificates used by release managers are self signed, that's why you get this\nwarning. By importing the server in the previous step and importing it via ID from\n[KEYS](https://dist.apache.org/repos/dist/release/liminal/KEYS) page, you know that\nthis is a valid Key already.\n\n```\nChecking apache-liminal-1.10.12rc4-bin.tar.gz.asc\ngpg: assuming signed data in 'apache-liminal-1.10.12rc4-bin.tar.gz'\ngpg: Signature made sob, 22 sie 2020, 20:28:28 CEST\ngpg:                using RSA key 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B\ngpg: Good signature from \"Kaxil Naik <kaxilnaik@gmail.com>\" [unknown]\ngpg: WARNING: This key is not certified with a trusted signature!\ngpg:          There is no indication that the signature belongs to the owner.\nPrimary key fingerprint: 1271 7556 040E EF2E EAF1  B9C2 75FC CD0A 25FA 0E4B\nChecking apache_liminal-1.10.12rc4-py2.py3-none-any.whl.asc\ngpg: assuming signed data in 'apache_liminal-1.10.12rc4-py2.py3-none-any.whl'\ngpg: Signature made sob, 22 sie 2020, 20:28:31 CEST\ngpg:                using RSA key 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B\ngpg: Good signature from \"Kaxil Naik <kaxilnaik@gmail.com>\" [unknown]\ngpg: WARNING: This key is not certified with a trusted signature!\ngpg:          There is no indication that the signature belongs to the owner.\nPrimary key fingerprint: 1271 7556 040E EF2E EAF1  B9C2 75FC CD0A 25FA 0E4B\nChecking apache-liminal-1.10.12rc4-source.tar.gz.asc\ngpg: assuming signed data in 'apache-liminal-1.10.12rc4-source.tar.gz'\ngpg: Signature made sob, 22 sie 2020, 20:28:25 CEST\ngpg:                using RSA key 12717556040EEF2EEAF1B9C275FCCD0A25FA0E4B\ngpg: Good signature from \"Kaxil Naik <kaxilnaik@gmail.com>\" [unknown]\ngpg: WARNING: This key is not certified with a trusted signature!\ngpg:          There is no indication that the signature belongs to the owner.\nPrimary key fingerprint: 1271 7556 040E EF2E EAF1  B9C2 75FC CD0A 25FA 0E4B\n```\n\n#### Verify the SHA512 sum\n\nRun this:\n\n```shell script\nfor i in *.sha512\ndo\n    echo \"Checking $i\"; gpg --print-md SHA512 `basename $i .sha512 ` | diff - $i\ndone\n```\n\nYou should get output similar to:\n\n```\nChecking apache_liminal-1.10.12rc4-py3-none-any.whl.sha512\nChecking apache-liminal-1.10.12rc4.tar.gz.sha512\n```\n\n### Verify if the release candidate \"works\" by Contributors\n\nThis can be done (and we encourage to) by any of the Contributors. In fact, it's best if the\nactual users of Apache Liminal test it in their own staging/test installations. Each release candidate\nis available on PyPI apart from SVN packages, so everyone should be able to install\nthe release candidate version of Liminal via simply (<VERSION> is 1.10.12 for example, and <X> is\nrelease candidate number 1,2,3,....).\n\n```shell script\npip install apache-liminal==<LIMINAL_BUID_VERSION>rc<X>\n```\n\n### Seek approval from Incubator PMC\n\n[Post to the Incubator’s general list requesting approval from the Incubator PMC](https://lists.apache.org/thread.html/06655226ba08c16a8cb273f9b45e0b0a15ebaed0d06783fdd06a03f6@%3Cgeneral.incubator.apache.org%3E).\nShould the Incubator PMC vote to approve a release, the Podling MAY make that release available to the public under these conditions:\n\n* The release archive(s) MUST include the word \"incubating\" in the filename.\n* The release archive(s) MUST contain a disclaimer found in DISCLAIMER file.\n\nReleases for the Podling MUST be distributed through http://www.apache.org/dist/incubator/Podling\nhttps://lists.apache.org/thread.html/06655226ba08c16a8cb273f9b45e0b0a15ebaed0d06783fdd06a03f6@%3Cgeneral.incubator.apache.org%3E\n\n## Publish the final Apache Liminal release\n\n### Summarize the voting for the Apache Liminal release\n\nOnce the vote has been passed, you will need to send a result vote to dev@liminal.apache.org:\n\nSubject:\n```\n[RESULT][VOTE] Liminal 1.10.2rc3\n```\n\nMessage:\n\n```\nHello,\n\nApache Liminal 1.10.2 (based on RC3) has been accepted.\n\n3 “+1” binding votes received:\n....\n\n3 \"+1\" non-binding votes received:\n\n...\n\nVote thread:\n...\n\nI'll continue with the release process, and the release announcement will follow shortly.\n\nCheers,\n<your name>\n```\n\n\n### Publish release to SVN\n\nYou need to migrate the RC artifacts that passed to this repository:\nhttps://dist.apache.org/repos/dist/release/liminal/\n(The migration should include renaming the files so that they no longer have the RC number in their filenames.)\n\nThe best way of doing this is to svn cp  between the two repos (this avoids having to upload the binaries again, and gives a clearer history in the svn commit logs):\n\n```shell script\n# First clone the repo\nexport RC=1.10.4rc5\nexport VERSION=${RC/rc?/}\nsvn checkout https://dist.apache.org/repos/dist/release/liminal liminal-release\n\n# Create new folder for the release\ncd liminal-release\nsvn mkdir ${VERSION}\ncd ${VERSION}\n\n# Move the artifacts to svn folder & commit\nfor f in ../../liminal-dev/$RC/*; do svn cp $f ${$(basename $f)/rc?/}; done\nsvn commit -m \"Release Liminal ${VERSION} from ${RC}\"\n\n# Remove old release\n# http://www.apache.org/legal/release-policy.html#when-to-archive\ncd ..\nexport PREVIOUS_VERSION=1.10.1\nsvn rm ${PREVIOUS_VERSION}\nsvn commit -m \"Remove old release: ${PREVIOUS_VERSION}\"\n```\n\nVerify that the packages appear in [liminal](https://dist.apache.org/repos/dist/release/liminal/)\n\n### Prepare PyPI \"release\" packages\n\nAt this point we release an official package:\n\n- Build the package:\n\n    ```shell script\n    python setup.py sdist bdist_wheel`\n    ```\n\n- Verify the artifacts that would be uploaded:\n\n    ```shell script\n    twine check dist/*`\n    ```\n\n- Upload the package to PyPi's test environment:\n\n    ```shell script\n    twine upload -r pypitest dist/*\n    ```\n\n- Verify that the test package looks good by downloading it and installing it into a virtual environment.\n    The package download link is available at: https://test.pypi.org/project/apache-liminal/#files\n\n- Upload the package to PyPi's production environment:\n\n    ```shell script\n    twine upload -r pypi dist/*\n    ```\n\n- Again, confirm that the package is available here: https://pypi.python.org/pypi/apache-liminal\n\n### Update CHANGELOG.md\n\n- Get a diff between the last version and the current version:\n\n    ```shell script\n    $ git log 1.8.0..1.9.0 --pretty=oneline\n    ```\n- Update CHANGELOG.md with the details, and commit it.\n\n### Notify developers of release\n\n- Notify users@liminal.apache.org (cc'ing dev@liminal.apache.org and announce@apache.org) that\nthe artifacts have been published:\n\nSubject:\n```shell script\ncat <<EOF\nLiminal ${VERSION} is released\nEOF\n```\n\nBody:\n```shell script\ncat <<EOF\nDear Liminal community,\n\nI'm happy to announce that Liminal ${VERSION} was just released.\n\nThe source release, as well as the binary \"sdist\" release, are available\nhere:\n\nhttps://dist.apache.org/repos/dist/release/liminal/${VERSION}/\n\nWe also made this version available on PyPi for convenience (`pip install apache-liminal`):\n\nhttps://pypi.python.org/pypi/apache-liminal\n\nThe documentation is available on:\nhttps://liminal.apache.org/\nhttps://liminal.apache.org/1.10.2/\nhttps://liminal.readthedocs.io/en/1.10.2/\nhttps://liminal.readthedocs.io/en/stable/\n\nFind the CHANGELOG here for more details:\n\nhttps://liminal.apache.org/changelog.html#liminal-1-10-2-2019-01-19\n\nCheers,\n<your name>\nEOF\n```\n"
  },
  {
    "path": "dev/sign.sh",
    "content": "#!/usr/bin/env bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n# Use this to sign the tar balls generated from\n# python setup.py sdist --formats=gztar\n# ie. sign.sh <my_tar_ball>\n# you will still be required to type in your signing key password\n# or it needs to be available in your keychain\n\nNAME=\"${1}\"\n\ngpg --armor --output \"${NAME}.asc\" --detach-sig \"${NAME}\"\ngpg --print-md SHA512 \"${NAME}\" > \"${NAME}.sha512\"\n"
  },
  {
    "path": "docs/Makefile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n# Minimal makefile for Sphinx documentation\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nSOURCEDIR     = .\nBUILDDIR      = build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n"
  },
  {
    "path": "docs/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Apache Liminal\n\nApache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,\ntrain and deploy machine learning models in a robust and agile way.\n\nThe platform provides the abstractions and declarative capabilities for\ndata extraction & feature engineering followed by model training and serving.\nLiminal's goal is to operationalize the machine learning process, allowing data scientists to\nquickly transition from a successful experiment to an automated pipeline of model training,\nvalidation, deployment and inference in production, freeing them from engineering and\nnon-functional tasks, and allowing them to focus on machine learning code and artifacts.\n\n## Basics\n\nUsing simple YAML configuration, create your own schedule data pipelines (a sequence of tasks to\nperform), application servers,  and more.\n\n## Getting Started\nA simple hello world guide for Liminal can be found [here](getting-started/hello_world.md) \\\nA more advanced example which demonstrates a simple data-science workflow can be found [here](getting-started/iris_classification.md)\n\n## Apache Liminal Documentation\nFull documentation of Apache Liminal can be found [here](liminal)\n\n## High Level Architecture\nHigh level architecture documentation can be found [here](architecture.md)\n"
  },
  {
    "path": "docs/architecture.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# High Level Architecture\nLiminal is an end-to-end platform for data engineers & scientists, allowing them to build, train and deploy machine learning models in a robust and agile way. The platform provides the abstractions and declarative capabilities for data extraction & feature engineering followed by model training and serving. Apache Liminal's goal is to operationalise the machine learning process, allowing data scientists to quickly transition from a successful experiment to an automated pipeline of model training, validation, deployment and inference in production, freeing them from engineering and non-functional tasks, and allowing them to focus on machine learning code and artifacts.\n\n## Motivation\nThe challenges involved in operationalizing machine learning models are one of the main reasons why many machine learning projects never make it to production.\nThe process involves automating and orchestrating multiple steps which run on heterogeneous infrastructure - different compute environments, data processing platforms, ML frameworks, notebooks, containers and monitoring tools.\n\nThere are no mature standards for this workflow, and most organizations do not have the experience to build it in-house. In the best case, dev-ds-devops teams form in order to accomplish this task together; in many cases, it's the data scientists who try to deal with this themselves without the knowledge or the inclination to become infrastructure experts.\nAs a result, many projects never make it through the cycle. Those who do suffer from a very long lead time from a successful experiment to an operational, refreshable, deployed and monitored model in production.\n\nThe goal of Apache Liminal is to simplify the creation and management of machine learning pipelines by data engineers & scientists. The platform provides declarative building blocks which define the workflow, orchestrate the underlying infrastructure,  take care of non functional concerns, enabling focus in business logic / algorithm code.\nSome Commercial E2E solutions have started to emerge in the last few years, however, they are limited to specific parts of the workflow, such as Databricks MLFlow. Other solutions are tied to specific environments (e.g. SageMaker on AWS).\n\n## High Level Architecture\nThe platform is aimed to provide data engineers & scientists with a solution for end to end flows from model training to real time inference in production. It’s architecture enables and promotes adoption of specific components in existing (non-Liminal) frameworks, as well as seamless integration with other open source projects. Liminal was created to enable scalability in ML efforts and after a thorough review of available solutions and frameworks, which did not meet our main KPIs:\n- Provide an opinionated but customizable end-to-end workflow\n- Abstract away the complexity of underlying infrastructure\n- Support major open source tools and cloud-native infrastructure to carry out many of the steps\n- Allow teams to leverage their existing investments or bring in their tools of choice into the workflow\n\nWe have found that other tech companies in the Hi-Tech ecosystem also have an interest in such a platform, hence decided to share our work with the community.\nThe following diagram depicts these main components and where Apache Liminal comes in:\n\n![](nstatic/liminal_arch_001.png)\n\nA classical data scientist workflow includes some base phases:\n_Train, Deploy and Consume._\n\n**The Train phase includes the following tasks:**\n\n1. Fetch -  get the data needed to build a model - usually using SQL\n1. Clean - make sure the data is useful for building the model\n1. Prepare - split data and encode features from the data according to model needs\n1. Train - Build the model and tune it\n1. Evaluate - make sure the model is correct - run it on a test set, etc…\n1. Validate - make sure the model is up to the standards you need\n\n**The Deploy phase includes these tasks:**\n1. Deploy - make it available for usage in production\n1. Inference - Batch or Real-time - use the model to evaluate data by your offline or online by your applications\n1. Consume - The actual use of the models created by applications and ETLs, usually through APIs to the batch or real-time inference that usually rely on Model and Feature stores.\n\nLiminal provides its users a declarative composition capabilities to materialize these steps in a robust way, while exploiting existing frameworks and tools. e.g. Data science frameworks such as scikit-learn, Tensor flow, Keras and such, for running core data science algorithms; as numerous core mechanisms as data stores, processing engines, parallelism, schedulers, code deployment as well as batch and real-time inference.\nLiminal allows the creation and wiring of these kinds of functional and non functional tasks while making the underlying infrastructure used by these tasks very easy to use and even abstracted away entirely. While handling the non-functional aspects as monitoring (in a standard fashion) deployment, scheduling, resource management and execution.\n\n![](nstatic/liminal_arch_002.png)\n"
  },
  {
    "path": "docs/conf.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n#\n# -*- coding: utf-8 -*-\n#\n# Configuration file for the Sphinx documentation builder.\n#\n# This file does only contain a selection of the most common options. For a\n# full list see the documentation:\n# http://www.sphinx-doc.org/en/master/config\n\n# -- Path setup --------------------------------------------------------------\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\nimport os\nimport sys\n\nsys.path.insert(0, os.path.abspath('source'))\nsys.path.insert(0, os.path.abspath('/'))\n\n# -- Project information -----------------------------------------------------\n\nproject = u'Apchae Liminal'\ncopyright = u'Apache Liminal'\nauthor = u'Apache Software Foundation'\n\n# The short X.Y version\nversion = u''\n# The full version, including alpha/beta/rc tags\nrelease = u'0.0.1'\n\n# -- General configuration ---------------------------------------------------\n\n# If your documentation needs a minimal Sphinx version, state it here.\n#\n# needs_sphinx = '1.0'\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    'recommonmark',\n    'sphinx.ext.autodoc',\n]\n\nhtml_logo = 'nstatic/liminal_logo.png'\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = ['ntemplates']\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\n#\n# source_suffix = ['.rst', '.md']\nsource_suffix = '.rst'\n\n# The master toctree document.\nmaster_doc = 'index'\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n#\n# This is also used if you do content translation via gettext catalogs.\n# Usually you set \"language\" from the command line for these cases.\nlanguage = None\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This pattern also affects html_static_path and html_extra_path.\nexclude_patterns = ['README.md', '**/README.md']\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = None\n\n# -- Options for HTML output -------------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\nhtml_theme = 'classic'\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\n#\n# html_theme_options = {}\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = ['nstatic']\n\n# Custom sidebar templates, must be a dictionary that maps document names\n# to template names.\n#\n# The default sidebars (for documents that don't match any pattern) are\n# defined by theme itself.  Builtin themes are using these templates by\n# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',\n# 'searchbox.html']``.\n#\n# html_sidebars = {}\n\n\n# -- Options for HTMLHelp output ---------------------------------------------\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = 'ApchaeLiminaldoc'\n\n# -- Options for LaTeX output ------------------------------------------------\n\nlatex_elements = {\n    # The paper size ('letterpaper' or 'a4paper').\n    #\n    # 'papersize': 'letterpaper',\n    # The font size ('10pt', '11pt' or '12pt').\n    #\n    # 'pointsize': '10pt',\n    # Additional stuff for the LaTeX preamble.\n    #\n    # 'preamble': '',\n    # Latex figure (float) alignment\n    #\n    # 'figure_align': 'htbp',\n}\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title,\n#  author, documentclass [howto, manual, or own class]).\nlatex_documents = [\n    (master_doc, 'ApchaeLiminal.tex', u'Apchae Liminal Documentation', u'Apache Software Foundation', 'manual'),\n]\n\n# -- Options for manual page output ------------------------------------------\n\n# One entry per manual page. List of tuples\n# (source start file, name, description, authors, manual section).\nman_pages = [(master_doc, 'apchaeliminal', u'Apchae Liminal Documentation', [author], 1)]\n\n# -- Options for Texinfo output ----------------------------------------------\n\n# Grouping the document tree into Texinfo files. List of tuples\n# (source start file, target name, title, author,\n#  dir menu entry, description, category)\ntexinfo_documents = [\n    (\n        master_doc,\n        'ApchaeLiminal',\n        u'Apchae Liminal Documentation',\n        author,\n        'ApchaeLiminal',\n        'One line description of project.',\n        'Miscellaneous',\n    ),\n]\n\n# -- Options for Epub output -------------------------------------------------\n\n# Bibliographic Dublin Core info.\nepub_title = project\n\n# The unique identifier of the text. This can be a ISBN number\n# or the project homepage.\n#\n# epub_identifier = ''\n\n# A unique identification for the text.\n#\n# epub_uid = ''\n\n# A list of files that should not be packed into the epub file.\nepub_exclude_files = ['search.html']\n\n\ndef on_missing_reference(app, env, node, contnode):\n    if node['reftype'] == 'any':\n        return contnode\n    else:\n        return None\n\n\ndef setup(app):\n    app.connect('missing-reference', on_missing_reference)\n"
  },
  {
    "path": "docs/getting-started/hello_world.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Getting started / ***Hello World***\n\nThis guide will allow you to set up your first Apache Liminal environment and allow you to create\nsome simple ML pipelines. These will be very similar to the ones you are going to build for real\nproduction scenarios.\n\n## Prerequisites\n\nPython 3 (3.6 and up)\n\n[Docker Desktop](https://www.docker.com/products/docker-desktop)\n\n*Note: Make sure kubernetes cluster is running in docker desktop (or custom kubernetes installation\non your machine).*\n\n## Deploying the Example\n\nIn this tutorial, we will go through setting up Liminal for the first time on your local machine.\n\n### First, let’s build our example project:\n\nIn the dev folder, just clone the example code from liminal:\n\n\n```BASH\ngit clone https://github.com/apache/incubator-liminal\n```\n***Note:*** *You just cloned the entire Liminal Project, you actually only need examples folder.*\n\nCreate a python virtual environment to isolate your runs:\n\n```BASH\ncd incubator-liminal/examples/liminal-getting-started\npython3 -m venv env\n```\n\nActivate your virtual environment:\n\n```BASH\nsource env/bin/activate\n```\n\nNow we are ready to install liminal:\n\n```BASH\npip install apache-liminal\n```\nLet's build the images you need for the example:\n```BASH\nliminal build\n```\n##### The build will create docker images based on the liminal.yml file in the `images` section.\n\n```BASH\nliminal deploy --clean\n```\nThe deploy command deploys a liminal server and deploys any liminal.yml files in your working\ndirectory or any of its subdirectories to your liminal home directory.\n\n*Note: liminal home directory is located in the path defined in LIMINAL_HOME env variable.\nIf the LIMINAL_HOME environemnet variable is not defined, home directory defaults to\n~/liminal_home directory.*\n\nNow lets runs liminal:\n```BASH\nliminal start\n```\nThe start command spins up the liminal server containers which will run pipelines based on your\ndeployed liminal.yml files.\nIt runs the following three containers:\n* liminal-postgress\n* liminal-webserver\n* liminal-scheduler\n\nOnce liminal server has completed starting up, you can navigate to admin UI in your browser:\n[http://localhost:8080](http://localhost:8080)\nBy default liminal server starts Apache Airflow servers and admin UI will be that of Apache Airflow.\n\n\n![](../nstatic/hello-world/airflow_main.png)\n\n***Important:** Set off/on toggle to activate your pipeline (DAG), nothing will happen otherwise!*\n\nYou can go to graph view to see all the tasks configured in the liminal.yml file:\n[http://localhost:8080/admin/airflow/graph?dag_id=example_pipeline](\nhttp://localhost:8080/admin/airflow/graph?dag_id=example_pipeline\n)\n\n#### Now lets see what actually happened to our task:\n\n![](../nstatic/hello-world/airflow_view_dag.png)\n\n\n#### Click on “hello_world_example” and you will get this popup:\n\n![](../nstatic/hello-world/airflow_view_log.png)\n\n\n#### Click on “view log” button and you can see the log of the current task run:\n\n![](../nstatic/hello-world/airflow_task_log.png)\n\n\n## Mounted volumes\nAll tasks use a mounted volume as defined in the pipeline YAML:\n```YAML\nname: GettingStartedPipeline\nvolumes:\n  - volume: gettingstartedvol\n    claim_name: gettingstartedvol-pvc\n    local:\n      path: .\n```\nIn our case the mounted volume will point to the liminal hello world example. \\\nThe hello world task will read the **hello_world.json** file from the mounted volume and will write\nthe **hello_world_output.json** to it.\n\n*Note:* Each task will internally mount the volume defined above to an internal representation,\ndescribed under the task section in the yml:\n\n```YAML\n  task:\n  ...\n  mounts:\n     - mount: taskmount\n       volume: gettingstartedvol\n       path: /mnt/vol1\n```\n\n\n## Here are the entire list of commands, if you want to start from scratch:\n\n```\ngit clone https://github.com/apache/incubator-liminal\ncd examples/liminal-getting-started\npython3 -m venv env\nsource env/bin/activate\npip uninstall apache-liminal\npip install apache-liminal\nLiminal build\nliminal deploy --clean\nliminal start\n```\n\n## Closing up\n\nTo make sure liminal containers are stopped use:\n```\nliminal stop\n```\n\nTo deactivate the python virtual env use:\n```\ndeactivate\n```\n"
  },
  {
    "path": "docs/getting-started/iris_classification.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Getting started / ***Iris Classification***\n\n* [Setup your local environment](#Setup-your-local-environment)\n* [Setup liminal](#setup-liminal)\n    * [Liminal build](#Liminal-build)\n    * [Liminal create](#Liminal-create)\n    * [Liminal deploy](#Liminal-deploy)\n    * [Liminal start](#Liminal-start)\n* [Liminal YAML walkthrough](#Liminal-YAML-walkthrough)\n* [Evaluate the Iris Classification model](#Evaluate-the-iris-classification-model)\n* [Debugging Kubernetes Deployments](#Debugging-Kubernetes-Deployments)\n* [Closing up](#Closing-up)\n\nIn this tutorial, we will guide you through setting up Apache Liminal on your local machine and run a simple machine-learning workflow, based on the classic Iris dataset classification example. \\\nMore details in this [link](https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html).\n\n#### Prerequisites\n\n* [Python 3 (3.6 and up)](https://www.python.org/downloads)\n* [Python Virtual Environments](https://pypi.org/project/virtualenv)\n* [Docker Desktop](https://www.docker.com/products/docker-desktop)\n* [Kubernetes CLI (kubectl)](https://kubernetes.io/docs/tasks/tools/install-kubectl-macos)\n\n*Note: Make sure kubernetes cluster is running in docker desktop*\n\nWe will define the following steps and services to implement the Iris classification example: \\\nTrain, Validate & Deploy - Training and validation execution is managed by Liminal Airflow extension. The training task trains a regression model using a public dataset. \\\nWe then validate the model and deploy it to a model-store in mounted volume. \\\nInference - online inference is done using a Python Flask service running on the local Kubernetes in docker desktop. The service exposes the `/predict` endpoint. It reads the model stored in the mounted drive and uses it to evaluate the request.\n## Setup your local env environment\n\nIn the dev folder, clone the example code from liminal:\n\n\n```BASH\ngit clone https://github.com/apache/incubator-liminal\n```\n***Note:*** *You just cloned the entire Liminal Project, you actually only need examples folder.*\n\n\n\nCreate a python virtual environment to isolate your runs:\n\n```BASH\ncd incubator-liminal/examples/aws-ml-app-demo\npython3 -m venv env\n```\n\nActivate your virtual environment:\n\n```BASH\nsource env/bin/activate\n```\n\nNow we are ready to install liminal:\n\n```BASH\npip install apache-liminal\n```\n\n## Setup liminal\n### Liminal build\nThe build will create docker images based on the liminal.yml file in the `images` section and will create a kubernetes local volume.\n\nBe informed that all tasks use a mounted volume as defined in the pipeline YAML. \\\nIn our case the mounted volume will point to the liminal Iris Classification example.\nThe training task trains a regression model using a public dataset. We then validate the model and deploy it to a model-store in the mounted volume.\n```BASH\nliminal build\n```\n\n### Liminal deploy\nThe deploy command deploys a liminal server and deploys any liminal.yml files in your working directory or any of its subdirectories to your liminal home directory.\n```BASH\nliminal deploy --clean\n```\n\n*Note: liminal home directory is located in the path defined in LIMINAL_HOME env variable.\nIf the LIMINAL_HOME environemnet variable is not defined, home directory defaults to\n~/liminal_home directory.*\n\n### Liminal start\nThe start command spins up 3 containers that load the Apache Airflow stack. Liminal's Airflow extension is responsible to execute the workflows defined in the liminal.yml file as standard Airflow DAGs.\n```BASH\nliminal start\n```\n\nIt runs the following three containers:\n* liminal-postgress\n* liminal-webserver\n* liminal-scheduler\n\nOnce liminal server has completed starting up, you can navigate to admin UI in your browser:\n[http://localhost:8080](http://localhost:8080)\n\n\n![](../nstatic/iris-classification/airflow_main.png)\n\n***Important:** Set off/on toggle to activate your pipeline (DAG), nothing will happen otherwise!*\n\nYou can go to graph view to see all the tasks configured in the liminal.yml file:\n[http://localhost:8080/admin/airflow/graph?dag_id=my_datascience_pipeline](\nhttp://localhost:8080/admin/airflow/graph?dag_id=my_datascience_pipeline\n)\n\n#### Now lets see what actually happened to our task:\n![](../nstatic/iris-classification/airflow_view_dag.png)\n\n\n#### Click on “train” and you will get this popup:\n![](../nstatic/iris-classification/airflow_view_log.png)\n\n\n#### Click on “view log” button and you can see the log of the current task run:\n![](../nstatic/iris-classification/airflow_task_log.png)\n\n## Liminal YAML walkthrough\n* [Mounted volumes](#Mounted-volumes)\n* [Pipeline flow](#Pipeline-flow)\n\n### Mounted volumes\nDeclaration of the mounted volume in your liminal YAML:\n```YAML\nname: MyDataScienceApp\nowner: Bosco Albert Baracus\nvolumes:\n  - volume: gettingstartedvol\n    claim_name: gettingstartedvol-pvc\n    local:\n      path: .\n```\n\n### Pipeline flow\nDeclaration of the pipeline tasks flow in your liminal YAML:\n```YAML\npipelines:\n  - pipeline: my_datascience_pipeline\n    ...\n    schedule: 0 * 1 * *\n    tasks:\n      - task: train\n        type: python\n        description: train model\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py train\n        ...\n      - task: validate\n        type: python\n        description: validate model and deploy\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py validate\n        ...\n```\n\n##### Each task will internally mount the volume defined above to an internal representation, described under the task section in the yml:\n\n```YAML\npipelines:\n    ...\n    tasks:\n      - task: train\n        ...\n        env:\n          MOUNT_PATH: /mnt/gettingstartedvol\n        mounts:\n          - mount: mymount\n            volume: gettingstartedvol\n            path: /mnt/gettingstartedvol\n```\n###### We specify the `MOUNT_PATH` in which we store the trained model.\n\n## Evaluate the iris classification model\n\nOnce the Iris Classification model trainging is completed and model is deployed (to the mounted volume), you can launch a pod of the pre-built image which contains a flask server, by applying the following Kubernetes manifest configuration:\n```BASH\nkubectl apply -f manifests/aws-ml-app-demo.yaml\n```\n\nAlternatively, create a Kubernetes pod from stdin:\n```YAML\ncat <<EOF | kubectl apply -f -\n---\napiVersion: v1\nkind: Pod\nmetadata:\n  name: aws-ml-app-demo\nspec:\n  volumes:\n    - name: task-pv-storage\n      persistentVolumeClaim:\n        claimName: gettingstartedvol-pvc\n  containers:\n    - name: task-pv-container\n      imagePullPolicy: Never\n      image: myorg/mydatascienceapp\n      lifecycle:\n        postStart:\n          exec:\n            command: [\"/bin/bash\", \"-c\", \"apt update && apt install curl -y\"]\n      ports:\n        - containerPort: 80\n          name: \"http-server\"\n      volumeMounts:\n        - mountPath: \"/mnt/gettingstartedvol\"\n          name: task-pv-storage\nEOF\n```\n\nCheck that the service is running:\n```BASH\nkubectl get pods --namespace=default\n```\n\nCheck that the service is up:\n```BASH\nkubectl exec -it --namespace=default aws-ml-app-demo -- /bin/bash -c \"curl localhost/healthcheck\"\n```\n\nCheck the prediction:\n```BASH\nkubectl exec -it --namespace=default aws-ml-app-demo -- /bin/bash -c \"curl -X POST -d '{\\\"petal_width\\\": \\\"2.1\\\"}' localhost/predict\"\n```\n\n## Debugging Kubernetes Deployments\nkubectl get pods will help you check your pod status:\n```BASH\nkubectl get pods --namespace=default\n```\nkubectl logs will help you check your pods log:\n```BASH\nkubectl logs --namespace=default aws-ml-app-demo\n```\nkubectl exec to get a shell to a running container:\n```BASH\nkubectl exec --namespace=default aws-ml-app-demo -- bash\n```\nThen you can check the mounted volume `df -h` and to verify the result of the model.\n\n\n## Here are the entire list of commands, if you want to start from scratch:\n\n```\ngit clone https://github.com/apache/incubator-liminal\ncd examples/aws-ml-app-demo\npython3 -m venv env\nsource env/bin/activate\nrm -rf ~/liminal_home\npip uninstall apache-liminal\npip install apache-liminal\nLiminal build\nLiminal create\nliminal deploy --clean\nliminal start\n```\n\n## Closing up\n\nTo make sure liminal containers are stopped use:\n```\nliminal stop\n```\n\nTo deactivate the python virtual env use:\n```\ndeactivate\n```\n\nTo terminate the kubernetes pod:\n```\nkubectl delete pod --namespace=default aws-ml-app-demo\n```\n"
  },
  {
    "path": "docs/getting-started/spark_app_demo.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Getting started / ***Spark application***\n\n* [Setup your local environment](#Setup-your-local-environment)\n* [Spark on K8S](#Spark-On-K8S)\n    * [Setup liminal](#setup-liminal)\n    * [Liminal YAML walkthrough](#Liminal-YAML-walkthrough)\n    * [Evaluate the Iris Classification model](#Evaluate-the-iris-classification-model)\n    * [Debugging Kubernetes Deployments](#Debugging-Kubernetes-Deployments)\n\n* [Closing up](#Closing-up)\n\nIn this tutorial, we will guide you through setting up Apache Liminal on your local machine and run\na simple machine-learning workflow, based on the classic Iris dataset classification example\nincluding a feature engineering with Spark engine. \\\nMore details in\nthis [link](https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html).\n\n#### Prerequisites\n\n* [Python 3 (3.6 and up)](https://www.python.org/downloads)\n* [Python Virtual Environments](https://pypi.org/project/virtualenv)\n* [Docker Desktop](https://www.docker.com/products/docker-desktop)\n* [Kubernetes CLI (kubectl)](https://kubernetes.io/docs/tasks/tools/install-kubectl-macos)\n\n*Note: Make sure kubernetes cluster is running in docker desktop*\n\n## Setup your local env environment\n\nIn the dev folder, clone the example code from liminal:\n\n```BASH\ngit clone https://github.com/apache/incubator-liminal\n```\n\n***Note:*** *You just cloned the entire Liminal Project, you actually only need examples folder.*\n\nCreate a python virtual environment to isolate your runs:\n\n```BASH\ncd incubator-liminal/examples/spark-app-demo/\npython3 -m venv env\n```\n\nActivate your virtual environment:\n\n```BASH\nsource env/bin/activate\n```\n\nNow we are ready to install liminal:\n\n```BASH\npip install apache-liminal\n```\n\n## Spark On K8S\n\nWe will define the following steps and services to implement the Iris classification example\nincluding a simple feature engineering with Apache Spark: \\\nClean data, Train, Validate & Deploy - Cleaning the input data set and prepare it for training and\nvalidation execution is managed by Liminal Airflow extension. The training task trains a regression\nmodel using a public dataset. \\\nWe then validate the model and deploy it to a model-store in mounted volume. \\\nInference - online inference is done using a Python Flask service running on the local Kubernetes in\ndocker desktop. The service exposes the `/predict` endpoint. It reads the model stored in the\nmounted drive and uses it to evaluate the request.\n\n### Setup liminal\n\n```BASH\ncd incubator-liminal/examples/spark-app-demo/k8s\n```\n\n#### Liminal build\nThe build will create docker images based on the liminal.yml file in the `images` section and will create a kubernetes local volume.\n\nBe informed that all tasks use a mounted volume as defined in the pipeline YAML. \\\nIn our case the mounted volume will point to the liminal Iris Classification example.\nThe training task trains a regression model using a public dataset. We then validate the model and deploy it to a model-store in the mounted volume.\n```BASH\nliminal build\n```\n\n#### Liminal deploy\n\nThe deploy command deploys a liminal server and deploys any liminal.yml files in your working\ndirectory or any of its subdirectories to your liminal home directory.\n\n```BASH\nliminal deploy --clean\n```\n\n*Note: liminal home directory is located in the path defined in LIMINAL_HOME env variable. If the\nLIMINAL_HOME environemnet variable is not defined, home directory defaults to\n~/liminal_home directory.*\n\n#### Liminal start\n\nThe start command spins up 3 containers that load the Apache Airflow stack. Liminal's Airflow\nextension is responsible to execute the workflows defined in the liminal.yml file as standard\nAirflow DAGs.\n\n```BASH\nliminal start\n```\n\nYou can go to graph view to see all the tasks configured in the liminal.yml file:\n[http://localhost:8080/admin/airflow/graph?dag_id=my_first_spark_pipeline](\nhttp://localhost:8080/admin/airflow/graph?dag_id=my_first_spark_pipeline\n)\n\nYou should see the following dag:\n\n![](../nstatic/spark-demo-app/k8s_dag.png)\n\n### Liminal YAML walkthrough\n\n* [Local archetype](#Local-archetype)\n* [Pipeline flow](#Pipeline-flow)\n\n#### Local archetype\n\nA superliminal for an easy local development\n\n```YAML\nname: InfraSpark\nowner: Bosco Albert Baracus\ntype: super\nexecutors:\n  - executor: k8s\n    type: kubernetes\nvariables:\n  output_root_dir: /mnt/gettingstartedvol\n  input_root_dir: ''\nimages:\n  - image: my_spark_image\n    type: spark\n    source: .\n    no_cache: True\ntask_defaults:\n  spark:\n    executor: k8s\n    executors: 2\n    application_source: '{{application}}'\n    mounts:\n      - mount: mymount\n        volume: gettingstartedvol\n        path: /mnt/gettingstartedvol\n```\n\n###### We specify the `MOUNT_PATH` in which we store the trained model.\n\nYou can read more about `superliminal` `variables` and `defaults`\nin [advanced.liminal.yml](../liminal/advanced.liminal.yml.md)\n\n#### Pipeline flow\n\nDeclaration of the pipeline tasks flow in your liminal YAML:\n\n```YAML\nname: MyFirstLiminalSparkApp\nsuper: InfraSpark\nowner: Bosco Albert Baracus\nvariables:\n  output_path: '{{output_root_dir}}/my_first_liminal_spark_app_outputs/'\n  application: data_cleanup.py\ntask_defaults:\n  python:\n    mounts:\n      - mount: mymount\n        volume: gettingstartedvol\n        path: /mnt/gettingstartedvol\npipelines:\n  - pipeline: my_first_spark_pipeline\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    tasks:\n      - task: data_preprocessing\n        type: spark\n        description: prepare the data for training\n        application_arguments:\n          - '{{input_root_dir}}data/iris.csv'\n          - '{{output_path}}'\n      - task: train\n        type: python\n        description: train model\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py train '{{output_path}}'\n        env:\n          MOUNT_PATH: /mnt/gettingstartedvol\n        ...\n```\n\n### Evaluate the iris classification model\n\nOnce the Iris Classification model trainging is completed and model is deployed (to the mounted\nvolume), you can launch a pod of the pre-built image which contains a flask server, by applying the\nfollowing Kubernetes manifest configuration:\n\n```BASH\nkubectl apply -f manifests/spark-app-demo.yaml\n```\n\nAlternatively, create a Kubernetes pod from stdin:\n\n```YAML\ncat <<EOF | kubectl apply -f -\n---\napiVersion: v1\nkind: Pod\nmetadata:\n  name: spark-app-demo\nspec:\n  volumes:\n    - name: task-pv-storage\n      persistentVolumeClaim:\n        claimName: gettingstartedvol-pvc\n  containers:\n    - name: task-pv-container\n      imagePullPolicy: Never\n      image: myorg/mydatascienceapp\n      lifecycle:\n        postStart:\n          exec:\n            command: [ \"/bin/bash\", \"-c\", \"apt update && apt install curl -y\" ]\n      ports:\n        - containerPort: 80\n          name: \"http-server\"\n      volumeMounts:\n        - mountPath: \"/mnt/gettingstartedvol\"\n          name: task-pv-storage\nEOF\n```\n\nCheck that the service is running:\n\n```BASH\nkubectl get pods --namespace=default\n```\n\nCheck that the service is up:\n\n```BASH\nkubectl exec -it --namespace=default spark-app-demo -- /bin/bash -c \"curl localhost/healthcheck\"\n```\n\nCheck the prediction:\n\n```BASH\nkubectl exec -it --namespace=default spark-app-demo -- /bin/bash -c \"curl -X POST -d '{\\\"petal_width\\\": [1.1,1.1,1.1,1.1]}' localhost/predict\"\n```\n\n## Debugging Kubernetes Deployments\n\nkubectl get pods will help you check your pod status:\n\n```BASH\nkubectl get pods --namespace=default\n```\n\nkubectl logs will help you check your pods log:\n\n```BASH\nkubectl logs --namespace=default spark-app-demo\n```\n\nkubectl exec to get a shell to a running container:\n\n```BASH\nkubectl exec --namespace=default spark-app-demo -- bash\n```\n\nThen you can check the mounted volume `df -h` and to verify the result of the model.\n\nYou can go to graph view to see all the tasks configured in the liminal.yml file:\n[http://localhost:8080/admin/airflow/graph?dag_id=my_first_pipeline](\nhttp://localhost:8080/admin/airflow/graph?dag_id=my_first_pipeline\n\n## Here are the entire list of commands, if you want to start from scratch:\n\n```\ngit clone https://github.com/apache/incubator-liminal\ncd examples/spark-app-demo/k8s\n# cd examples/spark-app-demo/emr\npython3 -m venv env\nsource env/bin/activate\nrm -rf ~/liminal_home\npip uninstall apache-liminal\npip install apache-liminal\nLiminal build\nLiminal create\nliminal deploy --clean\nliminal start\n```\n\n## Closing up\n\nTo make sure liminal containers are stopped use:\n\n```\nliminal stop\n```\n\nTo deactivate the python virtual env use:\n\n```\ndeactivate\n```\n\nTo terminate the kubernetes pod:\n\n```\nkubectl delete pod --namespace=default spark-app-demo\n```\n"
  },
  {
    "path": "docs/index.rst",
    "content": ".. Apchae Liminal documentation master file, created by\n   sphinx-quickstart on Sun Nov 15 08:45:27 2020.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\nApache Liminal\n==============\n\nApache Liminal is a data systems orchestration platform. Liminal enables data scientists and data\nengineers to define their data systems and flow using configuration. From feature engineering to\nproduction monitoring - and the framework takes care of the infra behind the scenes and seamlessly\nintegrates with the infrastructure behind the scenes.\n\n.. toctree::\n   :maxdepth: 1\n\n   getting_started\n   liminal/index\n   architecture\n\nIndices and tables\n==================\n\n* :ref:`genindex`\n* :ref:`modindex`\n* :ref:`search`\n"
  },
  {
    "path": "docs/liminal/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Apache Liminal Documentation\n\n## Overview\n\nApache Liminal is an end-to-end platform for data engineers & scientists, allowing them to build,\ntrain and deploy machine learning models in a robust and agile way.\n\nThe platform provides the abstractions and declarative capabilities for\ndata extraction & feature engineering followed by model training and serving.\nLiminal's goal is to operationalize the machine learning process, allowing data scientists to\nquickly transition from a successful experiment to an automated pipeline of model training,\nvalidation, deployment and inference in production, freeing them from engineering and\nnon-functional tasks, and allowing them to focus on machine learning code and artifacts.\n\n## Chapters\n\n1. [liminal.yml](liminal.yml.md)\n2. [Images](images)\n3. [Pipelines](pipelines.md)\n4. [Tasks](tasks)\n5. [Services](services.md)\n6. [Monitoring](monitoring.md)\n7. [Metrics Backends](metrics_backends)\n8. [Alerts Backends](alerts_backends)\n9. [Advanced liminal.yml](advanced.liminal.yml.md)\n10. [Executors](executors)\n"
  },
  {
    "path": "docs/liminal/advanced.liminal.yml.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Advanced liminal.yml\n\nIn this section you will learn about advanced features of liminal.yml\n\n## Variables\n\nMuch like in programming languages, you can define variables for re-use across your liminal.yml\nfile.\n\n```yaml\nvariables:\n  myvar1: myvalue1\n  myvar2: myvalue2\n```\n\nIn the `variables` section of your liminal.yml you can define your variables as key value pairs\n\n## Placeholders\n\nYou can use placeholders of format `{{myplaceholder}}` in most any string value in your liminal.yml\nMake sure that any string that includes placeholders is surrounded by single or double quotes.\n\nFor example:\n\n```yaml\nvariables:\n  image_name: myorg/myrepo:myapp\nimages:\n  - image: '{{image_name}}'\n    type: python\n    source: .\npipelines:\n  - pipeline: my_pipeline\n    tasks:\n      - task: my_python_task\n        type: python\n        image: \"{{image_name}}\"\n        cmd: python -u my_module.py\n```\n\n### Placeholders rendering\n\nThe process of rendering placeholders checks for values in several places, by order.\n\n#### Pipeline placeholder rendering\n\nWhen running pipelines placeholders will be rendered by replacing values from sources in the\nfollowing order:\n\n1. Current [DAG run conf](https://airflow.apache.org/docs/apache-airflow/stable/dag-run.html)\n   (Airflow).\n2. liminal.yml [variables](#variables) section.\n3. [Airflow variables](https://airflow.apache.org/docs/apache-airflow/stable/howto/variable.html)\n   (Airflow).\n4. [Airflow macros](https://airflow.apache.org/docs/apache-airflow/stable/macros-ref.html) (Airflow)\n   . For example: `\"{{yesterday_ds}}\"`. For more information see:\n5.\n\nAirflow [Jinja Templating](https://airflow.apache.org/docs/apache-airflow/stable/concepts.html#jinja-templating)\n(Airflow).\n\n#### Build placeholder rendering\n\nWhen using `liminal build` placeholders will be rendered by replacing values from sources in the\nfollowing order:\n\n1. Environment variables.\n2. liminal.yml [variables](#variables) section.\n\n## Task variables\n\nIn addition to the variables section you can also set specific variables for specific `task`s using\nthe `variables`\nattribute of that task. For example:\n\n```yaml\n  - task: my_task\n    type: python\n    cmd: python -u my_module.py\n    variables:\n      myvar1: myvalue1\n      myvar2: myvalue2\n```\n\nYou can also pass a reference to a variable map (dictionary) set in your variables section:\n\n```yaml\nvariables:\n  my_variable_map:\n    myvar1: myvalue1\n    myvar2: myvalue2\n  my_variable_map2:\n    myvar1: myvalue_a\n    myvar2: myvalue_x\npipelines:\n  - pipeline: my_pipeline\n    tasks:\n      - task: my_task1\n        type: python\n        cmd: python -u my_module.py\n        variables: my_variable_map1\n      - task: my_task2\n        type: python\n        cmd: python -u my_module.py\n        variables: my_variable_map2\n```\n\n## Executors\n\nFor fully detailed information on executors see: [executors](executors).\n\nEach `task` in your pipelines has a reference to an executor from the `executors` section of your\nliminal.yml file. If not specified, the appropriate default executor for that task type is used.\n\n```yaml\nexecutors:\n  - executor: my_kubernetes_executor\n    type: kubernetes\n    resources:\n      request_memory: 128Mi\npipelines:\n  pipeline: my_pipeline\n  tasks:\n    - task: my_python_task\n      type: python\n      image: myorg/myrepo:mypythonapp\n      executor: my_kubernetes_executor\n      cmd: python -u my_module.py\n```\n\nIn the example above we define an `executor` of type `kubernetes` with custom resources\nconfiguration.\n\n`executors` is a section in the root of your liminal.yml file and is a list of `executor`s defined\nby the following attributes:\n\n### executor attributes\n\n`executor`: name of your executor.\n\n`type`: type of the executor. The current available image types are: `kubernetes` and `emr`.\n\nDifferent executor types support their own additional configuration.\n\n## Task Defaults\n\n`task_defaults` is a section in the root of your liminal.yml file in which default attributes can be\nset for each `task`\ntype.\n\n```yaml\ntask_defaults:\n  python:\n    executor: my_kubernetes_executor\n    env_vars:\n      env: {{env}}\n      foo: bar\n```\n\nIn the example above we set default attributes for any `python` task in any pipeline in the liminal\nsystem defined by our liminal.yml file. Each `python` task that does not set these attributes will\ndefault to the setting in `task_defaults`.\n\nIf the same attribute is defined in both `task_defaults` and in the `task`, definitions from the\n`task` take precedence. If a map (dictionary) of values (for example `env_vars`) is defined in both\n`task_defaults` the two maps will be merged, with definitions in the `task` taking precedence in\ncase key is defined in both maps.\n\n## Pipeline Defaults\n\n`pipeline_defaults` is a section in the root of your liminal.yml file in which default attributes\ncan be set for each `pipeline` in the liminal system defined in your liminal.yml file.\n\n```yaml\npipeline_defaults:\n  start_date: 2021-01-01\n  timeout_minutes: 30\n```\n\nIn the example above we set default attributes for any `pipeline` defined in our liminal.yml file.\nEach `pipeline` that does not set these attributes will default to the setting in\n`pipeline_defaults`.\n\nIf the same attribute is defined in both `pipeline_defaults` and in the `pipeline`, definitions from\nthe `pipeline` take precedence.\n\nIf `tasks` section is defined in `pipeline_defaults` each pipeline defined in our liminal.yml file\nwill have the tasks defined in `pipeline_defaults`.\n\nA special `task` type `pipeline` may be used in `pipeline_defaults` `tasks` section. This task type\nis interpreted as \"\ntasks defined in pipeline go here\". This allows flexibility of defining common tasks to be run\nbefore and after the tasks of each pipeline defined in our liminal.yml file. For example:\n\n```yaml\npipeline_defaults:\n  before_tasks:\n    - task: my_common_setup_task\n      type: python\n      image: myorg/myrepo:mypythonapp\n      cmd: python -u my_setup_module.py\n  after_tasks:\n    - task: my_common_teardown_task1\n      type: python\n      image: myorg/myrepo:mypythonapp\n      cmd: python -u my_teardown_module1.py\n    - task: my_common_teardown_task2\n      type: python\n      image: myorg/myrepo:mypythonapp\n      cmd: python -u my_teardown_module2.py\n```\n\nIn the example above we set a list of `tasks` in `pipeline_defaults` which leads to each pipeline\ndefined in our liminal.yml file will have `my_common_setup_task` run before its tasks and\n`my_common_teardown_task1` and `my_common_teardown_task2` after its tasks.\n\n## Inheritence\n\nEach liminal.yml defines a subliminal layer of a liminal system. A subliminal layer can inherit\nattributes of a superliminal layer by specifying `super: name_of_super` in the root of the yml.\n\n```yaml\nname: my_system\nsuper: my_super\n```\n\nA super is found by this name if a liminal.yml file in your environment exists with that name. A\nsuperliminal layer liminal.yml file needs to define its `type` as `super`:\n\n```yaml\nname: my_super\ntype: super\n```\n\nIf not specified, the `type` of a liminal.yml file defaults to `sub` (subliminal).\n\nA superliminal can also inherit from another superliminal by defining its own `super` attribute.\n\n```yaml\nname: my_super\ntype: super\nsuper: my_other_super\n```\n\nA liminal system can have 1 subliminal layer but many superliminal layers using inheritence. This\nallows us to chain common behaviors of our systems into several superliminal layers.\n\nIf no `super` is defined for a liminal.yml file then it defaults to having the\n[base](https://github.com/incubator-liminal/liminal/core/config/defaults/base/liminal.yml)\nbe its super.\n\n### superliminal attribtues\n\nA superliminal layer can define the following attributes:\n\n```\nvariables\nexecutors\nmonitoring\ntask_defaults\npipeline_defaults\n```\n\nIf the same attribute section is defined in both a superliminal and a lower layer of the system they\nwill be merged, with key collisions favoring the lower level layer.\n"
  },
  {
    "path": "docs/liminal/executors/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Executors\n\nEach `task` in your pipelines has a reference to an executor from the `executors` section of your\nliminal.yml file. If not specified, the appropriate default executor for that task type is used.\n\n```yaml\nexecutors:\n  - executor: my_kubernetes_executor\n    type: kubernetes\n    resources:\n      request_memory: 128Mi\npipelines:\n  pipeline: my_pipeline\n  tasks:\n    - task: my_python_task\n      type: python\n      image: myorg/myrepo:mypythonapp\n      executor: my_kubernetes_executor\n      cmd: python -u my_module.py\n```\n\nIn the example above we define an `executor` of type `kubernetes` with custom resources\nconfiguration.\n\n`executors` is a section in the root of your liminal.yml file and is a list of `executor`s defined\nby the following attributes:\n\n## executor attributes\n\n`executor`: name of your executor.\n\n`type`: type of the executor. The current available image types are: `kubernetes` and `emr`.\n\nDifferent executor types support their own additional configuration.\n\n## task types\n\n1. [kubernetes](kubernetes.md)\n2. [emr](emr.md)\n"
  },
  {
    "path": "docs/liminal/executors/emr.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# emr executor\n\nThe `emr` executor allows you to run tasks on AWS EMR cluster.\n\n```yaml\n  - executor: my_emr_cluster\n    type: emr\n    cluser_name: liminal-cluster\n```\n\n## attributes\n\n`cluster_name`: the name of the cluster uses by the executor\n\nOR\n\n`cluster_id`: the unique identifier of the cluster used by the executor.\n\n`aws_conn_id`: a reference to the emr connection. default: `aws_default`\n\n`cluster_states`: the cluster state filters to apply when searching for an existing cluster.\ndefault: `['RUNNING', 'WAITING']`\n\n`properties`: any attribute of [aws-resource-emr-step-properties](\nhttps://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-emr-step.html#aws-resource-emr-step-properties)\n(Optional)\n\n## supported task types\n- [spark](../tasks/spark.md)\n- [sql](../tasks/sql.md)\n"
  },
  {
    "path": "docs/liminal/executors/index.rst",
    "content": ".. Apchae Liminal documentation master file, created by\n   sphinx-quickstart on Sun Nov 15 08:45:27 2020.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\nExecutors\n=========\n\nEach ``task`` in your pipelines has a reference to an executor from the ``executors`` section of\nyour liminal.yml file. If not specified, the appropriate default executor for that task type is\nused.\n\n.. code-block:: yaml\n\n   executors:\n     - executor: my_kubernetes_executor\n       type: kubernetes\n       resources:\n         request_memory: 128Mi\n   pipelines:\n     pipeline: my_pipeline\n     tasks:\n       - task: my_python_task\n         type: python\n         image: myorg/myrepo:mypythonapp\n         executor: my_kubernetes_executor\n         cmd: python -u my_module.py\n\n..\n\nIn the example above we define an ``executor`` of type ``kubernetes`` with custom resources\nconfiguration.\n\n``executors`` is a section in the root of your liminal.yml file and is a list of ``executor`` s\ndefined by the following attributes:\n\nexecutor attributes\n'''''''''''''''''''\n\n``executor``: name of your executor.\n\n``type``: type of the executor. The current available image types are: ``kubernetes`` and ``emr``.\n\nDifferent executor types support their own additional configuration.\n\nexecutor types\n''''''''''''''\n\n.. toctree::\n   :maxdepth: 1\n\n   kubernetes\n"
  },
  {
    "path": "docs/liminal/executors/kubernetes.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# kubernetes executor\n\nThe `kubernetes` executor allows you to run tasks on kubernetes.\n\n```yaml\n  - executor: my_kubernetes_executor\n    type: kubernetes\n```\n\n## attributes\n\nIf running pipelines on Airflow, any attribute of\n[KubernetesPodOperator](https://airflow.apache.org/docs/apache-airflow/1.10.12/_api/airflow/contrib/operators/kubernetes_pod_operator/index.html)\ncan be set as an attribute of the executor.\n\nNote that some of these attributes are set in the `task`:\n```\nimage\ncmd\nmounts\nname\nenv_vars\n```\nFor example, see: [python task](../tasks/python.md)\n"
  },
  {
    "path": "docs/liminal/extensibility.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Extensibility\n\nThe extensibility designed to have an easy way for the users to add their tasks, executors, image\nbuilders and extend the library so that it fits the level of abstraction that suits the user\nenvironment.\n\n## Location\n\nTasks folder: `{LIMINAL_HOME}/plugins/tasks`\n\nExecutors folder: `{LIMINAL_HOME}/plugins/executors`\n\nImage builders folder: `{LIMINAL_HOME}/plugins/images`\n\n## Example\n\n### Prerequisites\n\nApache Liminal\n\n### Guide\n\nCheck out the examples for each one of the extensible item\nin [examples/extensibility](../../examples/extensibility)\n\nCopy the extensible items to the plugin location:\n\n```shell\ncp -r ../../examples/extensibility/executors/* $LIMINAL_HOME/liminal/plugins/executors/\n```\n\n```shell\ncp -r ../../examples/extensibility/tasks/* $LIMINAL_HOME/liminal/plugins/tasks/\n```\n\n```shell\ncp -r ../../examples/extensibility/images/* $LIMINAL_HOME/liminal/plugins/images/\n```\n\n```shell\nliminal build .\nliminal deploy --clean\nliminal start\n```\n"
  },
  {
    "path": "docs/liminal/images/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Images\n\nIn the `images` section you can configure how to pack your code into docker images. This can be\nachieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:\n\n```yaml\nimages:\n  - image: myorg/myrepo:mypythonapp\n    type: python\n    source: .\n  - image: myorg/myrepo:myserver\n    type: python_server\n    source: path/to/my/server/code\n    endpoints:\n      - endpoint: /myendpoint\n        module: my_module\n```\n\n`images` is a section in the root lof your liminal.yml file and is a list of `image`s, defined\nby the following attributes:\n\n## image attributes\n\n`image`: name of your image, usually will be in the form of `user/repository:tag` common in docker\nimage repositories such as Docker Hub, but for local development this can be any string.\n\n`type`: type of the image. Usually this will match your coding language, or your coding langauge\nfollowed by an `_` and a specific style of application (such as a server).\n\n`source`: location of source files to include in the image, this can be a relative path within your\nproject, or even `.` which means the entire project should be packaged in the image.\n\n`build_cmd`: certain languages require to be built or compiled, here you can provide your build\ncommand to be executed in order to build your code.\n\nOther image types might require additional configuration, for example, servers require `endpoints`\nto be configured.\n\n## image types\n\n1. [python](python.md)\n2. [python server](python_server.md)\n"
  },
  {
    "path": "docs/liminal/images/index.rst",
    "content": ".. Apchae Liminal documentation master file, created by\n   sphinx-quickstart on Sun Nov 15 08:45:27 2020.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\nImages\n======\n\nIn the ``images`` section you can configure how to pack your code into docker images. This can be\nachieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:\n\n.. code-block:: yaml\n\n   images:\n     - image: myorg/myrepo:mypythonapp\n       type: python\n       source: .\n     - image: myorg/myrepo:myserver\n       type: python_server\n       source: path/to/my/server/code\n       endpoints:\n         - endpoint: /myendpoint\n           module: my_module\n..\n\n``images`` is a section in the root lof your liminal.yml file and is a list of\n``image`` s, defined by the following attributes:\n\nimage attributes\n''''''''''''''''\n\n``image``: name of your image, usually will be in the form of ``user/repository:tag`` common in\ndocker image repositories such as Docker Hub, but for local development this can be any string.\n\n``type``: type of the image. Usually this will match your coding language, or your coding langauge\nfollowed by an ``_`` and a specific style of application (such as a server).\n\n``source``: location of source files to include in the image, this can be a relative path within\nyour project, or even ``.`` which means the entire project should be packaged in the image.\n\n``build_cmd``: certain languages require to be built or compiled, here you can provide your build\ncommand to be executed in order to build your code.\n\nOther image types might require additional configuration, for example, servers require ``endpoints``\nto be configured.\n\nimage types\n'''''''''''\n\n.. toctree::\n   :maxdepth: 1\n\n   python\n   python_server\n"
  },
  {
    "path": "docs/liminal/images/python.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# python image\n\nThe `python` image builder packages your python code as a docker image.\n\n```yaml\n  - image: myorg/myrepo:mypythonapp\n    type: python\n    source: relative/path/to/source\n```\n\n## attributes\n\n`image`: name of your image.\n\n`source`: location of source files to include in the image, this can be a relative path within your\nproject, or even `.` which means the entire project should be packaged in the image.\n"
  },
  {
    "path": "docs/liminal/images/python_server.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# python_server image\n\nThe `python_server` image builder packages your python code as a docker image which starts a web\nserver to serve your code via HTTP calls.\n\n```yaml\n  - image: myorg/myrepo:mypythonserver\n    type: python_server\n    source: relative/path/to/source\n    endpoints:\n      - endpoint: /myendpoint\n        module: my_module\n        function: my_function\n      - endpoint: /myotherendpoint\n        module: my_module2\n        function: my_function2\n```\n\n## attributes\n\n`image`: name of your image.\n\n`source`: location of source files to include in the image, this can be a relative path within your\nproject, or even `.` which means the entire project should be packaged in the image.\n\n`endpoint`: list of `endpoint`s to expose in the server.\n\n## endpoint attributes\n\n`endpoint`: path to your endpoint in the server.\n\n`module`: module containing your user code.\n\n`function`: name of the function within the module that should be called when handling this\nendpoint. the function should expect a dictionary as an input (or None if no input) and should\nreturn a string to return in the response to the HTTP call.\n"
  },
  {
    "path": "docs/liminal/index.rst",
    "content": ".. Apchae Liminal documentation master file, created by\n   sphinx-quickstart on Sun Nov 15 08:45:27 2020.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\nApache Liminal Documentation\n============================\n\nChapters:\n\n.. toctree::\n   :maxdepth: 1\n\n   liminal.yml\n   images/index.rst\n   pipelines\n   tasks/index.rst\n   services\n   monitoring\n   metrics_backends/index.rst\n   advanced.liminal.yml\n   executors/index.rst\n"
  },
  {
    "path": "docs/liminal/kubernetes/secret_util.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Secret Utils\n\nEach `task` in your pipelines has a reference to an secret from the `secrets` section of your\nliminal.yml file.\n\n```yaml\n---\nname: k8s_secret_example\nsecrets:\n  - secret: aws\n    local_path_file: \"~/.aws/credentials\"\nexecutors:\n  - executor: k8s\n    type: kubernetes\nvariables:\n  AWS_CONFIG_FILE: /mnt/credentials\ntask_defaults:\n  python:\n    executor: k8s\n    image: amazon/aws-cli:2.7.23\n    executors: 2\n    env_vars:\n      AWS_CONFIG_FILE: \"/secret/credentials\"\n    secrets:\n      - secret: aws\n        remote_path: \"/secret\"\npipelines:\n  - pipeline: k8s_secret_example\n    owner: Bosco Albert Baracus\n    start_date: 1970-01-01\n    timeout_minutes: 10\n    schedule: 0 * 1 * *\n    tasks:\n      - task: my_python_task\n        type: python\n        cmd: aws s3 ls\n```\n\nThat example manifest defines a Secret Opaque for AWS credentials used. The values are Base64 strings in the manifest; however, when you use the Secret with a Pod then the kubelet provides the decoded data to the Pod and its containers.\n\nIn order to make use of the AWS credentials we define an environment variable `AWS_CONFIG_FILE` to authenticate our requests.\n\nFor example, the files generated by the AWS CLI for a default profile configured with aws configure looks similar to the following.\n\n`~/.aws/credentials`\n```\n[default]\naws_access_key_id=<aws_access_key_id>\naws_secret_access_key=<aws_secret_access_key>\nregion = us-east-1\naws_session_token=<aws_session_token>\n```\n"
  },
  {
    "path": "docs/liminal/liminal.yml.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n## liminal.yml\n\nDefinition of your own liminal system is done via yml configuration file named liminal.yml in\nyour project. This yml is the one place in which you define all characteristics and behavior of your\nliminal system.\nIn this section we'll go over the anatomy of the liminal.yml file.\n\n### root attributes\n\nAt the root level of your limimal.yml you can define the most basic root characteristics of your\nliminal system such as name and owner:\n\n```yaml\nname: MyLiminalSystem\nowner: Bosco Albert Baracus\n```\n\n### images\n\nFor fully detailed information on pipelines see: [images](images).\n\nIn the `images` section you can configure how to pack your code into docker images. This can be\nachieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:\n\n```yaml\nimages:\n  - image: myorg/myrepo:mypythonapp\n    type: python\n    source: .\n  - image: myorg/myrepo:myserver\n    type: python_server\n    source: path/to/my/server/code\n    endpoints:\n      - endpoint: /myendpoint\n        module: my_module\n        function: my_function\n```\n\n`images` is a section in the root lof your liminal.yml file and is a list of `image`s, defined\nby the following attributes:\n\n#### image attributes\n\n`image`: name of your image, usually will be in the form of `user/repository:tag` common in docker\nimage repositories such as Docker Hub, but for local development this can be any string.\n\n`type`: type of the image. Usually this will match your coding language, or your coding langauge\nfollowed by an `_` and a specific style of application (such as a server).\nThe current available image types are: `python`.\n\n`source`: location of source files to include in the image, this can be a relative path within your\nproject, or even `.` which means the entire project should be packaged in the image.\n\n`build_cmd`: certain languages require to be built or compiled, here you can provide your build\ncommand to be executed in order to build your code.\n\nOther image types might require additional configuration, for example, servers require `endpoints`\nto be configured.\n\n### pipelines\n\nFor fully detailed information on pipelines see: [pipelines](pipelines.md).\n\nIn the `pipelines` section you can configure scheduled/manually run pipelines of tasks to be\nexecuted sequentially:\n\n```yaml\npipelines:\n  - pipeline: my_data_pipeline\n    timeout_minutes: 30\n    start_date: 2020-03-01\n    schedule: 0 10 * * *\n    tasks:\n      - task: my_sql_task\n        type: sql\n        query: \"SELECT * FROM\n        {{my_database_name}}.{{my_table_name}}\n        WHERE event_date_prt >=\n              '{{yesterday_ds}}'\"\n              AND cms_platform = 'xsite'\n        output_table: my_db.my_out_table\n        output_path: s3://my_bky/{{env}}/mydir\n      - task: my_python_task\n        type: python\n        image: myorg/myrepo:pythonapp\n        cmd: python my_python_app.py\n        env_vars:\n          env: {{env}}\n          fizz: buzz\n      - task: my_python_task\n        type: python\n        image: myorg/myrepo:mypythonapp\n        cmd: python -u my_module.py\n        env_vars:\n          env: {{env}}\n          fizz: buzz\n```\n\n`pipelines` is a section in the root lof your liminal.yml file and is a list of `pipeline`s defined\nby the following attributes:\n\n#### pipeline attributes\n\n`pipeline`: name of your pipeline (must be unique per liminal server).\n\n`timeout_minutes`: maximum allowed pipeline run time in minutes, if run exceeds this time, pipeline\nand all running tasks will fail.\n\n`start_date`: start date for the pipeline.\n\n`schedule`: to be configured if the pipeline should run on a schedule. Format is\n[cron expression](https://en.wikipedia.org/wiki/Cron#CRON_expression).\n\n`tasks`: list of `task`s, defined by the following attributes:\n\n##### task attributes\n\nFor fully detailed information on tasks see: [tasks](tasks).\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`type`: type of the task. Examples of available task types are: `python`.\nand more..\n\nDifferent task types require their own additional configuration. For example, `python` task requires\n`image` to be configured.\n\n### services\n\nFor fully detailed information on services see: [services](services.md).\n\nIn the `services` section you can configure constantly running applications such as\nservers.\n\n```yaml\nservices:\n  - service: my_server\n    image: myorg/myrepo:myserver\n```\n\n`services` is a section in the root lof your liminal.yml file and is a list of `service`s, defined\nby the following attributes:\n\n#### service attributes\n\n`service`: name of your service.\n\n`image`: the service's docker image.\n\n### monitoring\n\nFor fully detailed information on monitoring see: [monitoring](monitoring.md).\n\nIn the `monitoring` section you can configure monitoring for your pipelines and services.\n\n```yaml\nmonitoring:\n  metrics_backends:\n    - metrics_backend: cloudwatch_metrics\n      type: aws_cloudwatch\n      namespace: DataPipeline\n      AWS_REGION_NAME: us-east-1\n  alerts_backends:\n    - alerts_backend: cloudwatch_alerts\n      type: aws_cloudwatch\n      metrics_backend: cloudwatch_metrics\n      ok_actions: ['arn:aws:sns:...']\n      alarm_actions: ['arn:aws:sns:...']\n```\n\n`monitoring` is a section in the root lof your liminal.yml file and is a list of `metrics_backend`s and\na  list of `alerts_backend`s.\n\n`metrics_backend`s are where metrics for your pipelines are automatically sent.\n\n`alerts_backends`s automatically register alerts based on `metrics_backend` they are paired with.\n\n#### metrics_backend attributes\n\n`metrics_backend`: name of your metrics backend\n\n`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.\n\nDifferent metrics backend types require their own additional configuration. For example,\n`aws_cloudwatch` metrics backend requires `namespace` to be configured.\n\n#### alerts_backend attributes\n\n`alerts_backend`: name of your alerts backend\n\n`type`: type of the alerts backend. The current available alerts backends are: `aws_cloudwatch`.\n\n`metrics_backend`: name of the metrics backends to register alerts for.\n\nDifferent alerts backend types require their own additional configuration. For example,\n`aws_cloudwatch` alerts backend requires `alarm_actions` to be configured.\n\n### advanced topics\n\nFor documentation of more advanced features of liminal.yml see:\n[advanced liminal.yml](advanced.liminal.yml.md)\n"
  },
  {
    "path": "docs/liminal/metrics_backends/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Metrics Backends\n\n`metrics_backend` is the definition of a metrics backend to which all automatically\ngenerated metrics from your pipeline are sent to and is part of the `metrics_backends` list in the\n`monitoring` section of your liminal.yml\n\n```yaml\n  - metrics_backend: cloudwatch_metrics\n    type: aws_cloudwatch\n    namespace: DataPipeline\n    AWS_REGION_NAME: us-east-1\n```\n\nA `metrics_backend` is defined by the following attribtues:\n\n## metrics_backend attributes\n\n`metrics_backend`: name of your metrics backend\n\n`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.\n\nDifferent metrics backend types require their own additional configuration. For example,\n`aws_cloudwatch` metrics backend requires `namespace` to be configured.\n\n## metrics_backend types\n\n1. [aws cloudwatch](aws_cloudwatch.md)\n"
  },
  {
    "path": "docs/liminal/metrics_backends/aws_cloudwatch.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# aws_cloudwatch metrics backend\n\nThe `aws_cloudwatch` metrics backend allows you to send all automatically generated metrics from\nyour pipelines to AWS CloudWatch.\n\n```yaml\n  - metrics_backend: cloudwatch_metrics\n    type: aws_cloudwatch\n    namespace: DataPipeline\n    AWS_REGION_NAME: us-east-1\n```\n\n## attributes\n\n`metrics_backend`: name of your metrics backend.\n\n`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.\n\n`namespace`: target namespace on AWS CloudWatch.\n\n`AWS_REGION_NAME`: target AWS region to report metrics to.\n\n`AWS_ACCESS_KEY_ID`: AWS access key id (optional).\n\n`AWS_SECRET_ACCESS_KEY`: AWS secret access key (optional).\n"
  },
  {
    "path": "docs/liminal/metrics_backends/index.rst",
    "content": ".. Apchae Liminal documentation master file, created by\n   sphinx-quickstart on Sun Nov 15 08:45:27 2020.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\nMetrics Backends\n================\n\n``metrics_backend`` is the definition of a metrics backend to which all automatically\ngenerated metrics from your pipeline are sent to and is part of the ``metrics_backends`` list in the\n``monitoring`` section of your liminal.yml\n\n.. code-block:: yaml\n\n  - metrics_backend: cloudwatch_metrics\n    type: aws_cloudwatch\n    namespace: DataPipeline\n    AWS_REGION_NAME: us-east-1\n\n..\n\nA ``metrics_backend`` is defined by the following attributes:\n\nmetrics_backend attributes\n''''''''''''''''''''''''''\n\n``metrics_backend``: name of your metrics backend\n\n``type``: type of the metrics backend. The current available metrics backends are: ``aws_cloudwatch``.\n\nDifferent metrics backend types require their own additional configuration. For example,\n``aws_cloudwatch`` metrics backend requires ``namespace`` to be configured.\n\nmetrics_backend types\n'''''''''''''''''''''\n\n.. toctree::\n   :maxdepth: 1\n\n   aws_cloudwatch\n"
  },
  {
    "path": "docs/liminal/monitoring.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Monitoring\n\nIn the `monitoring` section you can configure monitoring for your pipelines and services.\n\n```yaml\nmonitoring:\n  metrics_backends:\n    - metrics_backend: cloudwatch_metrics\n      type: aws_cloudwatch\n      namespace: DataPipeline\n      AWS_REGION_NAME: us-east-1\n```\n\n`monitoring` is a section in the root lof your liminal.yml file and is a list of `metrics_backend`s and\na  list of `alerts_backend`s.\n\n`metrics_backend`s are where metrics for your pipelines are automatically sent.\n\n`alerts_backends`s automatically register alerts based on `metrics_backend` they are paired with.\n\n## metrics_backend attributes\n\nFor fully detailed information on metrics backends see: [metrics backends](metrics_backends).\n\n`metrics_backend`: name of your metrics backend\n\n`type`: type of the metrics backend. The current available metrics backends are: `aws_cloudwatch`.\n\nDifferent metrics backend types require their own additional configuration. For example,\n`aws_cloudwatch` metrics backend requires `namespace` to be configured.\n"
  },
  {
    "path": "docs/liminal/pipelines.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Pipelines\n\nIn the `pipelines` section you can configure scheduled/manually run pipelines of tasks to be\nexecuted sequentially:\n\n```yaml\npipelines:\n  - pipeline: my_data_pipeline\n    timeout_minutes: 30\n    start_date: 2020-03-01\n    schedule: 0 10 * * *\n    tasks:\n      - task: my_sql_task\n        type: sql\n        query: \"SELECT * FROM\n        {{my_database_name}}.{{my_table_name}}\n        WHERE event_date_prt >=\n              '{{yesterday_ds}}'\"\n              AND cms_platform = 'xsite'\n        output_table: my_db.my_out_table\n        output_path: s3://my_bky/{{env}}/mydir\n      - task: my_python_task\n        type: python\n        image: myorg/myrepo:pythonapp\n        cmd: python my_python_app.py\n        env_vars:\n          env: {{env}}\n          fizz: buzz\n```\n\n`pipelines` is a section in the root lof your liminal.yml file and is a list of `pipeline`s defined\nby the following attributes:\n\n## pipeline attributes\n\n`pipeline`: name of your pipeline (must be unique per liminal server).\n\n`timeout_minutes`: maximum allowed pipeline run time in minutes, if run exceeds this time, pipeline\nand all running tasks will fail.\n\n`start_date`: start date for the pipeline.\n\n`schedule`: to be configured if the pipeline should run on a schedule. Format is\n[cron expression](https://en.wikipedia.org/wiki/Cron#CRON_expression).\n\n`tasks`: list of `task`s, defined by the following attributes:\n\n## task attributes\n\nFor fully detailed information on tasks see: [tasks](tasks).\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`type`: type of the task. Examples of available task types are: `python`.\nand more..\n\nDifferent task types require their own additional configuration. For example, `python` task requires\n`image` to be configured.\n"
  },
  {
    "path": "docs/liminal/services.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Services\n\nIn the `services` section you can configure constantly running applications such as\nservers.\n\n```yaml\nservices:\n  - service: my_server\n    image: myorg/myrepo:myserver\n```\n\n`services` is a section in the root lof your liminal.yml file and is a list of `service`s, defined\nby the following attributes:\n\n## service attributes\n\n`service`: name of your service.\n\n`image`: the service's docker image.\n"
  },
  {
    "path": "docs/liminal/tasks/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Tasks\n\n`task` is the definition of a specific step in your pipeline, and is part of the `tasks` list\nin your pipeline definition.\n\nFor fully detailed information on pipelines see: [pipelines](../pipelines.md).\n\n```yaml\n  - task: my_python_task\n    image: myorg/myrepo:mypythonapp\n    cmd: python -u my_module.py\n    env_vars:\n      env: {{env}}\n      fizz: buzz\n```\n\nA `task` is defined by the following attributes:\n\n## task attributes\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`type`: type of the task. Examples of available task types are: `python`\nand more..\n\nDifferent task types require their own additional configuration. For example, `python` task requires\n`image` to be configured.\n\n## task types\n\n1. [python](python.md)\n2. [spark](spark.md)\n3. [create_cloudformation_stack](create_cloudformation_stack.md)\n4. [delete_cloudformation_stack](delete_cloudformation_stack.md)\n"
  },
  {
    "path": "docs/liminal/tasks/create_cloudformation_stack.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# create_cloudformation_stack task\n\nThe `create_cloudformation_stack` task allows you to create cloudformation stack on AWS\n\n```yaml\n  - task: create_emr\n    type: create_cloudformation_stack\n    stack_name: liminal_document_emr\n    properties:\n      OnFailure: DO_NOTHING\n      TimeoutInMinutes: 25\n      Capabilities: [ 'CAPABILITY_NAMED_IAM' ]\n      TemplateURL: s3://liminal-doc/emr/template.yml\n      Parameters:\n        Environment: Production\n        CoreServerCount: '2'\n```\n\n## attributes\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`stack_name`: the name of the stack\n\n`properties`: any attribute of [aws-create-stack-request-parameters](\nhttps://docs.aws.amazon.com/AWSCloudFormation/latest/APIReference/API_CreateStack.html#API_CreateStack_RequestParameters)\n"
  },
  {
    "path": "docs/liminal/tasks/delete_cloudformation_stack.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# delete_cloudformation_stack task\n\nThe `delete_cloudformation_stack` task allows you to delete stack\n\n```yaml\n    - task: delete_emr\n      type: delete_cloudformation_stack\n      stack_name: liminal_emr_stack_name\n      description: delete stack\n```\n\n## attributes\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`stack_name`: the name of the stack to delete\n"
  },
  {
    "path": "docs/liminal/tasks/index.rst",
    "content": ".. Apchae Liminal documentation master file, created by\n   sphinx-quickstart on Sun Nov 15 08:45:27 2020.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\nTasks\n=====\n\n``task`` is the definition of a specific step in your pipeline, and is part of the ``tasks`` list\nin your pipeline definition.\n\nFor fully detailed information on pipelines see: `pipelines`_.\n\n.. _pipelines: ../pipelines.html\n\n.. code-block:: yaml\n\n  - task: my_python_task\n    type: python\n    image: myorg/myrepo:mypythonapp\n    cmd: python -u my_module.py\n    env_vars:\n      env: {{env}}\n      fizz: buzz\n\n..\n\nA ``task`` is defined by the following attributes:\n\nIn the ``images`` section you can configure how to pack your code into docker images. This can be\nachieved in liminal easily, without any Docker knowledge needed by setting just a few attributes:\n\ntask attributes\n''''''''''''''''\n\n``task``: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n``type``: type of the task.\n\nDifferent task types require their own additional configuration. For example, ``python`` task\nrequires ``image`` to be configured.\n\ntask types\n''''''''''\n\n.. toctree::\n   :maxdepth: 1\n\n   python\n"
  },
  {
    "path": "docs/liminal/tasks/python.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# python task\n\nThe `python` task allows you to run python code packaged as docker images.\n\n```yaml\n  - task: my_python_task\n    type: python\n    image: myorg/myrepo:mypythonapp\n    cmd: python my_python_app.py\n    env_vars:\n      env: '{{env}}'\n      fizz: buzz\n    mounts:\n      - mount: mymount\n        volume: myvol1\n        path: /mnt/vol1\n```\n\n## attributes\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`image`: name of image to run.\n\n`cmd`: command to run when running the image.\n\n`env_vars`: environment variables to set when running the image.\n\n`mounts`: list of `mount`s defined by the following attributes:\n\n### mount attributes\n\n`mount`: name of the mount.\n\n`volume`: volume to mount. volumes are defined in the `volumes` section of liminal.yml\n\n`path`: path in which to mount the volume. this is the path accessible to user code.\n"
  },
  {
    "path": "docs/liminal/tasks/spark.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# spark task\n\nThe `spark` task allows you to submit a spark application\n\n```yaml\n  - task: my_spark_app\n    type: spark\n    executor: emr_executor\n    application_source: 'my_app.py',\n    class: 'com.mycompany.MySparkApp',\n    master: 'yarn',\n    conf:\n      spark.driver.memory: '1g',\n      spark.driver.maxResultSize: '1g',\n      spark.yarn.executor.memoryOverhead: '500M'\n    application_arguments:\n      --query: \"select * from my_db.my_input_table where my_date_col >= \"\n                 \"'{{yesterday_ds}}'\",\n      --output: 'my_output_table'\n```\n\n## attributes\n\n`task`: name of your task (must be made of alphanumeric, dash and/or underscore characters only).\n\n`executor`: executor name ([supported executors](#supported-executors))\n\n`application_source`: the location of the spark application (jar location / python file)\n\n`class`: the entry point for your application\n\n`master`: the cluster manager to connect to.See the list\nof [allowed master URL's](https://spark.apache.org/docs/latest/submitting-applications.html#master-urls)\n(Optional)\n\n`conf`: arbitrary Spark configuration properties (Optional)\n\n`application_arguments`: arguments passed to the main method of your main class (Optional)\n\n## supported executors\n- [emr](../executors/emr.md)\n"
  },
  {
    "path": "docs/make.bat",
    "content": "REM\r\nREM Licensed to the Apache Software Foundation (ASF) under one\r\nREM or more contributor license agreements.  See the NOTICE file\r\nREM distributed with this work for additional information\r\nREM regarding copyright ownership.  The ASF licenses this file\r\nREM to you under the Apache License, Version 2.0 (the\r\nREM \"License\"); you may not use this file except in compliance\r\nREM with the License.  You may obtain a copy of the License at\r\nREM\r\nREM   http://www.apache.org/licenses/LICENSE-2.0\r\nREM\r\nREM Unless required by applicable law or agreed to in writing,\r\nREM software distributed under the License is distributed on an\r\nREM \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\r\nREM KIND, either express or implied.  See the License for the\r\nREM specific language governing permissions and limitations\r\nREM under the License.\r\n\r\n@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sphinx-build\r\n)\r\nset SOURCEDIR=.\r\nset BUILDDIR=build\r\n\r\nif \"%1\" == \"\" goto help\r\n\r\n%SPHINXBUILD% >NUL 2>NUL\r\nif errorlevel 9009 (\r\n\techo.\r\n\techo.The 'sphinx-build' command was not found. Make sure you have Sphinx\r\n\techo.installed, then set the SPHINXBUILD environment variable to point\r\n\techo.to the full path of the 'sphinx-build' executable. Alternatively you\r\n\techo.may add the Sphinx directory to PATH.\r\n\techo.\r\n\techo.If you don't have Sphinx installed, grab it from\r\n\techo.http://sphinx-doc.org/\r\n\texit /b 1\r\n)\r\n\r\n%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%\r\ngoto end\r\n\r\n:help\r\n%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%\r\n\r\n:end\r\npopd\r\n"
  },
  {
    "path": "docs/source/How_to_install_liminal_in_airflow_on_kubernetes.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Install Liminal in Airflow\n* [Workflow](#workflow)\n* [Prerequisites](#prerequisites)\n* [One time setup](#One-time-environment-setup)\n* [Deploying your Yaml files](#Deploying-your-Yaml-files)\n* [References and other resources](#references-and-other-resources)\n\n## Workflow\nTo deploy and run liminal on airflow, we need to complete two tasks:\n1. Ensure that the Liminal python package is installed inside each one of Airflow's pods (schedulers, Workers and Web Server). \\\nThis is a standard airflow management task, which can be achieved in multiple ways depending on your setup (we will cover a few later in this guide).\n2. Ensure that the user's Liminal code (i.e. Yamls) are present in each of the pods' DAGs folder (typically located in /opt/airflow/dags). \\\nHere, our recommended approach is to use a folder on a shared filesystem (EFS) to host the airflow DAGs (and Liminal Yamls). \\\nThis folder will be mounted into Airflow pods' DAGs folder - thus enabling each of the pods to pick the files up and run the Liminal DAGs.\n\n\n\n![](assets/liminal_deployment_diagram.png)\n\n\nBelow is a description of how to achieve this task using a commonly used Helm chart for Airflow on Kubernetes.\n\n## Prerequisites\n### Airflow on Kubernetes\nThis guide assumes you have successfully installed Airflow on Kubernetes.\n\nIf you haven't done so yet, we found the following Helm chart useful for achieving this goal:\n[airflow-helm-chart-7.16.0]\n\nYou'll need to add the chart to your Helm installation and follow the instructions from the README.md\n```sh\nhelm repo add airflow-stable https://airflow-helm.github.io/charts\nhelm repo update\n```\n\n### A provisioned EFS file system\nEFS is an AWS solution which offers an NFS as a service. \\\nYou can read up about EFS [here](https://aws.amazon.com/efs/features/) and set it up via AWS console or CLI.\n\n## One time environment setup\n### Adding the Apache Liminal package to Apache Airflow pods\n\nThere are multiple ways to install liminal into the Airflow pods, depending on how you deployed Airflow on Kubernetes. \\\nIn principle, it requires the package to be pip-installed into the Airflow docker - either during build time or in run time.\n\n1. If you are rolling out your own Airflow Docker images based on the [official docker image](https://github.com/apache/airflow),\nyou can add apache-liminal installation during the build:\n```docker build --build-arg ADDITIONAL_PYTHON_DEPS=\"apache-liminal\"```\n\n2. If you already have a running Airflow on Kubernetes, you can run a command which iterates over Airflow pods and executes a ```pip install apache-liminal```\non each of the pods.\n\n3. If you used the [airflow-helm-chart-7.16.0], you just need to specifiy apache-liminal inside the ```requirements.txt``` file as per the instructions [here][airflow-helm-chart-7.16.0]\n\n\n### Preparing a \"deployment box\" and an EFS folder to host the Liminal Yaml files\nThe \"deployment box\" is the machine which has access to the Yamls you want to deploy. \\\nThis machine will also mount the same EFS folder as Airflow, and use ```liminal deploy``` to push the Yamls into that folder.\n\n1. Provision an EC2 instance which has access to the EFS\n\n3. Mount the EFS onto the EC2 instance\n\n```sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport <EFS_ID>:/ /mnt/efs```\n\n3. Create a folder on an EFS drive. \\\nThis will be our target for deployment\n```mkdir -p /mnt/efs/opt/airflow/liminal_home```\n\n4. ```export LIMINAL_HOME=/mnt/efs/opt/airflow/liminal_home```\n\n### Mounting the EFS folder into Airflow pods\n\nNow that we have an EFS drive, and we've created the shared folder on it, it's time to mount it onto the Airflow pods` Dags folder.\nThe [guide][airflowInstallation] includes details on how to expose EFS as a Persistent Volume for kubernetes.\n\nMake sure to point the pods' PV to the right EFS folder, like so:\n\n```\nvolumeMounts:\n    - name: efs-data\n      mountPath: /opt/airflow/dags\n```\n\nwhere efs-data is a PVC pointing to `<EFS_ID>:/opt/airflow/liminal_home`\n\n\n## Deploying your Yaml files\n\nFinally, the one time setup is done, and by this point, you should have:\n1. A deployment box which can deploy Liminal to an EFS folder\n2. Airflow pods which will pick up the Yamls from that folder automatically.\n\nDeploying the actual Yaml files is the simple part.\n\nEach time you want to deploy the Liminal Yamls from the deployment box:\n```\nliminal deploy --path <<<path to liminal user code>>>\n```\n\n## References and other resources\n\n### Setting up Airflow on Kubernetes with EFS\n[Setting up Airflow on Kubernetes with AWS EFS][airflowInstallation]\n\n### Example script\nIn order to make it easier to undernstad all the steps, we include a script which performs the Liminal-specific steps in the deployment.\nThis script should be run from an EC2 machine which has access to the EFS you've provisioned. \\\nThe below steps runs the example project of [liminal-getting-started][liminal-getting-started].\n* Mount the EFS and setting environment parameters:\n  * [`liminal_aws_deploy.sh`][liminal-aws-deploy] -o installation\n* Build dockers from your business logic:\n  * [`liminal_aws_deploy.sh`][liminal-aws-deploy] -o build\n* Deploy the Yaml onto the EFS folder:\n  * [`liminal_aws_deploy.sh`][liminal-aws-deploy] -o deployment\n\n![](assets/liminal_aws_deploy.gif)\n\n##### Seting up AWS ECR with kubernetes\n[How to configure and use AWS ECR with kubernetes][AWS-ECR-with-kubernetes]\n\n1. `liminal_aws_deploy.sh -o build` will build dockers from the given liminal project path. \\\nThe [liminal.yml][liminal-yml] defindes a docker image :\n```\n    tasks:\n      - task: python_hello_world_example\n        type: python\n        description: static input task\n        image: python_hello_world_example_image\n```\nThe Yaml defines task that will pull the docker image from your predefined registry. \\\nTherefore, you need to use a docker registry in your kubernetes cluster. \\\nOne way to achieve it is to use [Amazon ECR][amazon-ecr].\n\n[AWS-ECR-with-kubernetes]: <https://medium.com/@damitj07/how-to-configure-and-use-aws-ecr-with-kubernetes-rancher2-0-6144c626d42c>\n[amazon-ecr]: <https://aws.amazon.com/ecr/>\n[liminal-yml]: <https://github.com/apache/incubator-liminal/blob/master/examples/liminal-getting-started/liminal.yml>\n[liminal-getting-started]: <https://github.com/apache/incubator-liminal/tree/master/examples/liminal-getting-started>\n[airflow-helm-chart-7.16.0]: <https://github.com/airflow-helm/charts/tree/airflow-7.16.0>\n[homebrew-kubectl]: <https://formulae.brew.sh/formula/kubernetes-cli>\n[cluster-access-kubeconfig]: <https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/#context>\n[liminal-aws-deploy]: <https://github.com/apache/incubator-liminal/tree/master/docs/source/liminal_aws_deploy.sh>\n[airflowChart]: <https://github.com/airflow-helm/charts/tree/main/charts/airflow>\n[airflowInstallation]: <https://medium.com/terragoneng/setting-up-airflow-on-kubernetes-with-aws-efs-c659f3a16292>\n[airflowImage]: <https://hub.docker.com/layers/apache/airflow/1.10.12-python3.6/images/sha256-9ea9e5ca66bd17632241889ab248fe3852c9f3c830ed299a8ecaa8a13ac2082f?context=explore>\n"
  },
  {
    "path": "docs/source/liminal_aws_deploy.sh",
    "content": "#! /bin/bash\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nhelp() {\n    echo \"$0: Get Started\"\n    echo \"Usage: $0 -o option\"\n\t\techo \"Liminal available options:\"\n\t\techo \"install\"\n\t\techo \"build\"\n\t\techo \"deploy\"\n}\n\nif [[ \"$#\" -lt 2 ]]; then\n\thelp\n\texit\nfi\n\n# Arguments processing\nwhile getopts \":o:\" opt; do #install, build, deploy\n\tcase $opt in\n\to)\n\t  option=$OPTARG\n\t  case $option in\n\t    install)\n\t      ACTION=\"install\"\n\t    ;;\n\t    build)\n\t      ACTION=\"build\"\n\t    ;;\n\t    deploy)\n\t      ACTION=\"deploy\"\n\t    ;;\n\t  esac\n\tesac\ndone\n\nmount_efs() {\n\tEFS_ID=$1\n\techo \"The EFS_ID is: ${EFS_ID}\"\n\n\techo \"Create /mnt/efs\"\n\tmkdir /mnt/efs\n\n\techo \"Mount the /mnt/efs\"\n\tsudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport ${EFS_ID}:/ /mnt/efs\n}\n\ninstall_liminal() {\n\techo \"Installing liminal on Airflow components\"\n\tdocker ps | grep k8s_airflow | awk '{print $1}' | xargs -I {} docker exec {} bash -c 'pip install apache-liminal'\n\n\techo \"Installing liminal locally\"\n\tpip install apache-liminal\n}\n\ndeploy_yaml() {\n\tDEPLOY_PATH=$1\n\techo \"The DEPLOY_PATH is: ${DEPLOY_PATH}\"\n\techo 'Export the liminal home'\n\texport LIMINAL_HOME=$(find /mnt/efs/ -name \"liminal_home\")\n\n\tliminal deploy --path \"${DEPLOY_PATH}\"\n}\n\nbuild() {\n\tBUILD_PATH=$1\n\techo \"The BUILD_PATH is: ${BUILD_PATH}\"\n\n\tliminal build --path \"${BUILD_PATH}\"\n}\n\ncase $ACTION in\n\tinstall)\n\t\tread -r -p \"Please enter the EFS ID: \" EFS_ID\n\t\tmount_efs $EFS_ID\n\t\tinstall_liminal\n\t\texit 0\n\t;;\n\tbuild)\n\t\tread -r -p \"Please enter the path to the liminal project: \" BUILD_PATH\n\t\tbuild $BUILD_PATH\n\t\texit 0\n\t;;\n\tdeploy)\n\t\tread -r -p \"Please enter the liminal yaml path: \" DEPLOY_PATH\n\t\tdeploy_yaml $DEPLOY_PATH\n\t\texit 0\n\t;;\n*)\n  help\n  exit\n  ;;\nesac\n"
  },
  {
    "path": "examples/aws-ml-app-demo/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/aws-ml-app-demo/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: MyDataScienceApp\nowner: Bosco Albert Baracus\nvolumes:\n  - volume: gettingstartedvol\n    claim_name: gettingstartedvol-pvc\n    local:\n      path: .\nimages:\n  - image: myorg/mydatascienceapp\n    type: python_server\n    source: .\n    endpoints:\n      - endpoint: /predict\n        module: serving\n        function: predict\n      - endpoint: /healthcheck\n        module: serving\n        function: healthcheck\n      - endpoint: /version\n        module: serving\n        function: version\nservices:\n  - service: my_datascience_server\n    image: myorg/mydatascienceapp\npipelines:\n  - pipeline: my_datascience_pipeline\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    metrics:\n      namespace: DataScience\n      backends: []\n    tasks:\n      - task: train\n        type: python\n        description: train model\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py train\n        env:\n          MOUNT_PATH: /mnt/gettingstartedvol\n        mounts:\n          - mount: mymount\n            volume: gettingstartedvol\n            path: /mnt/gettingstartedvol\n      - task: validate\n        type: python\n        description: validate model and deploy\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py validate\n        env:\n          MOUNT_PATH: /mnt/gettingstartedvol\n        mounts:\n          - mount: mymount\n            volume: gettingstartedvol\n            path: /mnt/gettingstartedvol\n"
  },
  {
    "path": "examples/aws-ml-app-demo/manifests/aws-ml-app-demo.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\n\napiVersion: v1\nkind: Pod\nmetadata:\n  name: aws-ml-app-demo\nspec:\n  volumes:\n    - name: task-pv-storage\n      persistentVolumeClaim:\n        claimName: gettingstartedvol-pvc\n  containers:\n    - name: task-pv-container\n      imagePullPolicy: Never\n      image: myorg/mydatascienceapp\n      lifecycle:\n        postStart:\n          exec:\n            command: [/bin/bash, -c, apt update && apt install curl -y]\n      ports:\n        - containerPort: 80\n          name: http-server\n      volumeMounts:\n        - mountPath: /mnt/gettingstartedvol\n          name: task-pv-storage\n"
  },
  {
    "path": "examples/aws-ml-app-demo/model_store.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport glob\nimport os\nimport pickle\nimport time\n\nMOUNT_PATH = os.environ.get('MOUNT_PATH', '/mnt/gettingstartedvol')\nPRODUCTION = 'production'\nCANDIDATE = 'candidate'\n\n_ONE_HOUR = 60 * 60\n\n\nclass ModelStore:\n    def __init__(self, env):\n        self.env = env\n        self._latest_model = None\n        self._latest_version = None\n        self._last_check = time.time()\n\n    def load_latest_model(self, force=False):\n        if not self._latest_model or time.time() - self._last_check > _ONE_HOUR or force:\n            self._latest_model, self._latest_version = self._download_latest_model()\n\n        return self._latest_model, self._latest_version\n\n    def save_model(self, model, version):\n        key = 'model.p'\n        path = f'{MOUNT_PATH}/{self.env}/{version}'\n\n        os.makedirs(f'{path}', exist_ok=True)\n        pickle.dump(model, open(f'{path}/{key}', \"wb\"))\n\n    def _download_latest_model(self):\n        objects = glob.glob(f'{MOUNT_PATH}/{self.env}/**/*')\n        models = list(reversed(sorted(obj for obj in objects if obj.endswith('.p'))))\n        latest_key = models[0]\n        version = latest_key.rsplit('/')[-2]\n        print(f'Loading model version {version}')\n        return pickle.load(open(latest_key, 'rb')), version\n"
  },
  {
    "path": "examples/aws-ml-app-demo/requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nscikit-learn==0.23.2\napache-liminal==0.0.2\nWerkzeug==1.0.1\nitsdangerous==1.1.0\nMarkupSafe==1.1.1\nFlask\n"
  },
  {
    "path": "examples/aws-ml-app-demo/serving.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\n\nimport model_store\nfrom model_store import ModelStore\n\n_MODEL_STORE = ModelStore(model_store.PRODUCTION)\n_PETAL_WIDTH = 'petal_width'\n\n\ndef predict(input_json):\n    try:\n        input_dict = json.loads(input_json)\n        model, version = _MODEL_STORE.load_latest_model()\n        result = str(model.predict_proba([[float(input_dict[_PETAL_WIDTH])]])[0][1])\n        return json.dumps({\"result\": result, \"version\": version})\n\n    except IndexError:\n        return 'Failure: the model is not ready yet'\n\n    except Exception as e:\n        print(e)\n        return 'Failure'\n\n\ndef healthcheck(self):\n    return 'Server is up!'\n\n\ndef version(self):\n    try:\n        model, version = _MODEL_STORE.load_latest_model()\n        print(f'version={version}')\n        return version\n    except Exception as e:\n        return e\n"
  },
  {
    "path": "examples/aws-ml-app-demo/training.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport sys\nimport time\n\nimport model_store\nimport numpy as np\nfrom model_store import ModelStore\nfrom sklearn import datasets\nfrom sklearn.linear_model import LogisticRegression\n\n_CANDIDATE_MODEL_STORE = ModelStore(model_store.CANDIDATE)\n_PRODUCTION_MODEL_STORE = ModelStore(model_store.PRODUCTION)\n\n\ndef train_model():\n    iris = datasets.load_iris()\n\n    X = iris[\"data\"][:, 3:]  # petal width\n    y = (iris[\"target\"] == 2).astype(np.int)\n\n    model = LogisticRegression()\n    model.fit(X, y)\n\n    version = round(time.time())\n\n    print(f'Saving model with version {version} to candidate model store.')\n    _CANDIDATE_MODEL_STORE.save_model(model, version)\n\n\ndef validate_model():\n    model, version = _CANDIDATE_MODEL_STORE.load_latest_model()\n    print(f'Validating model with version {version} to candidate model store.')\n    if not isinstance(model.predict([[1]]), np.ndarray):\n        raise ValueError('Invalid model')\n    print(f'Deploying model with version {version} to production model store.')\n    _PRODUCTION_MODEL_STORE.save_model(model, version)\n\n\nif __name__ == '__main__':\n    cmd = sys.argv[1]\n    if cmd == 'train':\n        train_model()\n    elif cmd == 'validate':\n        validate_model()\n    else:\n        raise ValueError(f\"Unknown command {cmd}\")\n"
  },
  {
    "path": "examples/extensibility/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/extensibility/executors/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/extensibility/executors/custom_executor.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nfrom plugins.tasks import custom_task\n\nfrom liminal.runners.airflow.model import executor\n\n\nclass CustomExecutor(executor.Executor):\n    supported_task_types = [custom_task.CustomTask]\n\n    def _apply_executor_task_to_dag(self, **kwargs):\n        print(\"Hello from custom executor\")\n        return kwargs['task'].custom_task_logic()\n"
  },
  {
    "path": "examples/extensibility/images/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/extensibility/images/custom_image.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport os\n\nfrom liminal.build.image_builder import ImageBuilder\n\n\nclass CustomImageBuilder(ImageBuilder):\n    def __init__(self, config, base_path, relative_source_path, tag):\n        super().__init__(config, base_path, relative_source_path, tag)\n\n    @staticmethod\n    def _dockerfile_path():\n        return os.path.join(os.path.dirname(__file__), 'custom_image_builder/Dockerfile')\n"
  },
  {
    "path": "examples/extensibility/images/custom_image_builder/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nFROM alpine:3.4\n\nWORKDIR /app\n\nCOPY . /app/\n"
  },
  {
    "path": "examples/extensibility/images/custom_image_builder/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/extensibility/liminal.yml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n---\nname: Extensability\nimages:\n  - image: custom_image_example\n    type: custom_image\nexecutors:\n  - executor: my_custom_exec\n    type: custom_executor\npipelines:\n  - pipeline: getting_started_pipeline_extensibility\n    owner: Bosco Albert Baracus\n    start_date: 1970-01-01\n    timeout_minutes: 10\n    schedule: 0 * 1 * *\n    tasks:\n      - task: custom_task_example\n        type: custom_task\n        executor: my_custom_exec\n"
  },
  {
    "path": "examples/extensibility/tasks/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/extensibility/tasks/custom_task.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nfrom airflow.operators.bash import BashOperator\n\nfrom liminal.runners.airflow.model import task\n\n\nclass CustomTask(task.Task):\n    def custom_task_logic(self):\n        hello_world = BashOperator(\n            task_id='hello_world',\n            bash_command='echo \"hello from liminal custom task\"',\n        )\n\n        if self.parent:\n            self.parent.set_downstream(hello_world)\n\n        return hello_world\n"
  },
  {
    "path": "examples/liminal-getting-started/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/liminal-getting-started/hello_world.json",
    "content": "{\n  \"\": [\n    \"Licensed to the Apache Software Foundation (ASF) under one\",\n    \"or more contributor license agreements.  See the NOTICE file\",\n    \"distributed with this work for additional information\",\n    \"regarding copyright ownership.  The ASF licenses this file\",\n    \"to you under the Apache License, Version 2.0 (the\",\n    \"\\\"License\\\"); you may not use this file except in compliance\",\n    \"with the License.  You may obtain a copy of the License at\",\n    \"\",\n    \"  http://www.apache.org/licenses/LICENSE-2.0\",\n    \"\",\n    \"Unless required by applicable law or agreed to in writing,\",\n    \"software distributed under the License is distributed on an\",\n    \"\\\"AS IS\\\"BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\",\n    \"KIND, either express or implied.  See the License for the\",\n    \"specific language governing permissions and limitations\",\n    \"under the License.\"\n  ],\n  \"hello\": \"world\"\n}\n"
  },
  {
    "path": "examples/liminal-getting-started/helloworld/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/liminal-getting-started/helloworld/hello_world.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\nimport os\n\nprint('Hello world!\\n')\nprint('Environment:')\nprint(os.environ)\n\nwith open('/mnt/gettingstartedvol/hello_world_output.json', 'w') as file:\n    file.write(json.dumps({'hello': 1, 'world': 2}))\n"
  },
  {
    "path": "examples/liminal-getting-started/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: GettingStartedPipeline\nvolumes:\n  - volume: gettingstartedvol\n    claim_name: gettingstartedvol-pvc\n    local:\n      path: .\nvariables:\n  myvar: myval\nimages:\n  - image: python_hello_world_example_image\n    type: python\n    source: helloworld\n  - image: liminal_getting_started_server_image\n    type: python_server\n    source: myserver\n    endpoints:\n      - endpoint: /myendpoint1\n        module: my_server\n        function: myendpoint1func\npipelines:\n  - pipeline: getting_started_pipeline\n    owner: Bosco Albert Baracus\n    start_date: 1970-01-01\n    timeout_minutes: 10\n    schedule: 0 * 1 * *\n    default_arg_loaded: check\n    default_array_loaded: [2, 3, 4]\n    default_object_loaded:\n      key1: val1\n      key2: val2\n    metrics:\n      namespace: TestNamespace\n      backends: []\n    tasks:\n      - task: python_hello_world_example\n        type: python\n        image: python_hello_world_example_image\n        env_vars:\n          env1: '{{myvar}}'\n          env2: foo\n        mounts:\n          - mount: mymount\n            volume: gettingstartedvol\n            path: /mnt/gettingstartedvol\n        cmd: python -u hello_world.py\n        executors: 2\n        # cmd: python -u hello_world.py\n      - task: python_hello_world_output_task\n        type: python\n        description: task with input from other task's output\n        image: python_hello_world_example_image\n        env_vars:\n          env1: a\n          env2: b\n        mounts:\n          - mount: mymount\n            volume: gettingstartedvol\n            path: /mnt/gettingstartedvol\n        cmd: python -u hello_world.py\nservices:\n  - service: liminal_getting_started_python_server\n    description: my python server\n    image: liminal_getting_started_server_image\n"
  },
  {
    "path": "examples/liminal-getting-started/myserver/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/liminal-getting-started/myserver/my_server.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\ndef myendpoint1func():\n    return '1'\n"
  },
  {
    "path": "examples/liminal-getting-started/pip.conf",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/liminal-getting-started/requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/sagemaker_example/README.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# sageMaker\n\nEnable access to AWS STS AssumeRole:\n\nGo yo IAM->Role of your user and add under Trust Relationships\n```\n{\n    \"Version\": \"2012-10-17\",\n    \"Statement\": [\n        {\n            \"Effect\": \"Allow\",\n            \"Principal\": {\n                \"Federated\": \"arn:aws:iam::<ACCOUNT_ID>:saml-provider/Okta\"\n            },\n            \"Action\": \"sts:AssumeRoleWithSAML\",\n            \"Condition\": {\n                \"StringEquals\": {\n                    \"SAML:aud\": \"https://signin.aws.amazon.com/saml\"\n                }\n            }\n        },\n        {\n            \"Sid\": \"\",\n            \"Effect\": \"Allow\",\n            \"Principal\": {\n                \"Service\": \"sagemaker.amazonaws.com\"\n            },\n            \"Action\": \"sts:AssumeRole\"\n        }\n    ]\n}\n```\n"
  },
  {
    "path": "examples/sagemaker_example/archetype/liminal.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\n\n# superliminal for local development\nname: InfraSageMaker\nowner: Bosco Albert Baracus\ntype: super\nsecrets:\n  - secret: aws\n    local_path_file: \"~/.aws/credentials\"\nexecutors:\n  - executor: k8s\n    type: kubernetes\nvariables:\n  output_root_dir: \"/mnt/smvolume\"\n  input_root_dir: ''\nimages:\n  - image: my_sm_image\n    source: .\n    type: python\n    no_cache: false\ntask_defaults:\n  python:\n    executor: k8s\n    image: my_sm_image\n    application_source: '{{application}}'\n    env_vars:\n      AWS_CONFIG_FILE: \"/secret/credentials\"\n      COMM_PATH: \"{{output_root_dir}}\"\n    secrets:\n      - secret: aws\n        remote_path: \"/secret\"\n    mounts:\n      - mount: mymount\n        volume: smvolume\n        path: /mnt/smvolume\nvolumes:\n  - volume: smvolume\n    claim_name: smvolume-pvc\n    local:\n      path: .\n"
  },
  {
    "path": "examples/sagemaker_example/data_preparation/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/sagemaker_example/data_preparation/data_preparation.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport argparse\nimport os\nfrom pathlib import Path\n\nimport pandas as pd\nfrom data_preparation.data_uploader import get_uploader\nfrom sklearn.model_selection import train_test_split\n\nTEST_CSV = \"diamonds_test.csv\"\n\nTRAIN_CSV = \"diamonds_train.csv\"\n\nLABEL_COLUMN = 'price'\n\nDATASET_PUBLIC_URL = \"https://www.openml.org/data/get_csv/21792853/dataset\"\n\n\ndef transform(data):\n    X = data.drop(columns=LABEL_COLUMN)\n    y = data[LABEL_COLUMN]\n\n    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n    train = X_train.copy()\n    train[LABEL_COLUMN] = y_train\n\n    test = X_test.copy()\n    test[LABEL_COLUMN] = y_test\n    return train, test\n\n\ndef extract(input_uri):\n    return pd.read_csv(input_uri)\n\n\ndef load(train, test, output_uri_base, data_uploader):\n    train.to_csv(TRAIN_CSV)\n    test.to_csv(TEST_CSV)\n    train_path = data_uploader.upload(TRAIN_CSV, os.path.join(output_uri_base, \"train\"))\n    test_path = data_uploader.upload(TEST_CSV, os.path.join(output_uri_base, \"test\"))\n    return train_path, test_path\n\n\ndef data_pipeline(input_uri, output_uri_base, data_uploader):\n    data = extract(input_uri)\n    train, test = transform(data)\n    return load(train, test, output_uri_base, data_uploader)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--input_uri\", default=DATASET_PUBLIC_URL)\n    parser.add_argument(\n        \"--output_uri_base\",\n        default=f\"file://{Path(__file__).parent.parent.absolute().joinpath('data')}\",\n        help=\"a uri starting with 's3', 'file' or a relative path \" \"which will be treated as sagemaker prefix\",\n    )\n    args = parser.parse_args()\n    data_uploader = get_uploader(args.output_uri_base)\n    data_pipeline(args.input_uri, args.output_uri_base, data_uploader)\n"
  },
  {
    "path": "examples/sagemaker_example/data_preparation/data_uploader.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport shutil\nfrom abc import ABC, abstractmethod\nfrom urllib.parse import urlparse\n\nimport boto3\nimport sagemaker\n\n\ndef get_uploader(output_uri_base):\n    o = urlparse(output_uri_base)\n    if o.scheme == 's3':\n        return S3DataUploader()\n    elif o.scheme == \"file\":\n        return LocalDataUploader()\n    return SagemakerDataUploader()\n\n\nclass DataUploader(ABC):\n    def __init__(self):\n        pass\n\n    @abstractmethod\n    def upload(self, local_path, output_uri_base):\n        pass\n\n\nclass LocalDataUploader(DataUploader):\n    s3_client = None\n\n    def upload(self, local_path, output_uri_base):\n        o = urlparse(output_uri_base)\n        os.makedirs(o.path, exist_ok=True)\n        shutil.copy2(local_path, o.path)\n        return o.path\n\n\nclass S3DataUploader(DataUploader):\n    s3_client = None\n\n    def __init__(self):\n        self.s3_client = boto3.client('s3')\n\n    def upload(self, local_path, output_uri_base):\n        o = urlparse(output_uri_base)\n        response = self.s3_client.upload_file(local_path, o.netloc, o.path)\n\n\nclass SagemakerDataUploader(DataUploader):\n    sm_client = None\n    sm_session = None\n    region = None\n    default_bucket = None\n\n    def __init__(self):\n        self.sm_client = boto3.client(\"sagemaker\")\n        self.sm_session = sagemaker.Session()\n        self.region = self.sm_session.boto_session.region_name\n        self.default_bucket = self.sm_session.default_bucket()\n\n    def upload(self, local_path, output_uri_base):\n        o = urlparse(output_uri_base)\n        bucket = o.netloc or self.default_bucket\n        return self.sm_session.upload_data(path=local_path, bucket=bucket, key_prefix=o.path)\n"
  },
  {
    "path": "examples/sagemaker_example/data_train/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/sagemaker_example/data_train/train.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport argparse\nimport os\nfrom pathlib import Path\n\nimport joblib\nimport numpy as np\nimport pandas as pd\nfrom data_preparation.data_preparation import LABEL_COLUMN, TEST_CSV, TRAIN_CSV\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import OneHotEncoder\n\nfeature_column_names = ['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']\n\n\ncategorical_cols = ['cut', 'clarity', 'color']\n\n# inference functions\nMODEL_JOBLIB_FILENAME = \"model.joblib\"\n\npreprocessor = ColumnTransformer(\n    transformers=[('categorical', OneHotEncoder(), categorical_cols)], remainder='passthrough'\n)\n\n\ndef train(train_df, test_df, n_jobs, model_dir):\n    X_train = train_df[feature_column_names]\n    y_train = train_df[LABEL_COLUMN]\n\n    print(X_train.head(3))\n    X_test = test_df[feature_column_names]\n    y_test = test_df[LABEL_COLUMN]\n\n    # train\n\n    diamond_price_model = LinearRegression(n_jobs=n_jobs)\n\n    my_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', diamond_price_model)])\n\n    my_pipeline.fit(X_train, y_train)\n\n    train_predictions = my_pipeline.predict(X_train)\n    print(train_predictions)\n\n    print(\"validating model\")\n    abs_err = np.abs(my_pipeline.predict(X_test) - y_test)\n    print(f\"Test prediction error:{abs_err} \")\n\n    # persist model\n    path = os.path.join(model_dir, MODEL_JOBLIB_FILENAME)\n    joblib.dump(my_pipeline, path)\n    print(\"model persisted at \" + path)\n    return path\n\n\nif __name__ == '__main__':\n    data_path = Path(__file__).parent.parent.absolute().joinpath('data')\n    parser = argparse.ArgumentParser()\n    parser.add_argument('--model-dir', type=str, default=os.getenv('SM_MODEL_DIR', f\"file://{data_path}\"))\n    parser.add_argument('--n-jobs', default=2)\n    parser.add_argument(\n        \"--train\", type=str, default=os.getenv(\"SM_CHANNEL_TRAIN\", f\"file://{data_path.joinpath('train')}\")\n    )\n    parser.add_argument(\n        \"--test\", type=str, default=os.getenv(\"SM_CHANNEL_TEST\", f\"file://{data_path.joinpath('test')}\")\n    )\n    parser.add_argument(\"--train-file\", type=str, default=TRAIN_CSV)\n    parser.add_argument(\"--test-file\", type=str, default=TEST_CSV)\n\n    args = parser.parse_args()\n\n    train_df = pd.read_csv(os.path.join(args.train, args.train_file))\n    test_df = pd.read_csv(os.path.join(args.test, args.test_file))\n\n    train(train_df=train_df, test_df=test_df, n_jobs=args.n_jobs, model_dir=args.model_dir)\n"
  },
  {
    "path": "examples/sagemaker_example/inference.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport argparse\nimport os\nfrom io import BytesIO\n\nimport joblib\nimport numpy as np\nimport pandas as pd\n\nfeature_column_names = ['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']\n\n\ncategorical_cols = ['cut', 'clarity', 'color']\n\nMODEL_JOBLIB_FILENAME = \"model.joblib\"\n\n\ndef model_fn(model_dir):\n    clf = joblib.load(os.path.join(model_dir, MODEL_JOBLIB_FILENAME))\n    return clf\n\n\ndef input_fn(input_data, content_type):\n    if content_type == \"application/x-npy\":\n        load_bytes = BytesIO(input_data)\n        input_np = np.load(load_bytes, allow_pickle=True)\n        df = pd.DataFrame(data=input_np, columns=feature_column_names)\n        return df\n    else:\n        raise ValueError(\n            f\"content type {content_type} is not supported by this inference endpoint. Please send a legal application/x-npy payload\"\n        )\n\n\ndef predict_fn(input_data, model):\n    prediction = model.predict(input_data[feature_column_names])\n    return prediction\n\n\ndef df_to_inference_input():\n    X_train = df[feature_column_names]\n    rows = X_train.head(10)\n    inference_input = rows.to_numpy()\n    np_bytes = BytesIO()\n    np.save(np_bytes, inference_input, allow_pickle=True)\n    input_data = input_fn(np_bytes.getvalue(), \"application/x-npy\")\n    return input_data\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--model-dir\", type=str, default=os.getenv(\"SM_MODEL_DIR\", \"../data\"))\n    parser.add_argument(\"--data-path\", type=str, default=f\"../data/test/diamonds_test.csv\")\n    args = parser.parse_args()\n    model = model_fn(args.model_dir)\n    df = pd.read_csv(args.data_path)\n    input_data = df_to_inference_input()\n    predictions = predict_fn(input_data, model)\n    print(predictions)\n"
  },
  {
    "path": "examples/sagemaker_example/liminal.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: SageMakerExample\nsuper: InfraSageMaker\nowner: Bosco Albert Baracus\npipelines:\n  - pipeline: sagemaker_diamonds_train_and_inference\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    tasks:\n      - task: data_preprocessing\n        type: python\n        description: prepare the data for training\n        cmd: python -u liminal_sm.py --action data_prep --output_uri_base liminal-sm-example/data\n      - task: train\n        type: python\n        description: train model\n        cmd: python -u liminal_sm.py --action train --base_job_name liminal-base-training-job --train_instance_type ml.m5.large --n_jobs 5\n      - task: deploy\n        type: python\n        description: Deploy model\n        cmd: python -u liminal_sm.py --action deploy --model_name liminal-sm-diamonds-model --deploy_instance_type ml.m5.large\n      - task: validate\n        type: python\n        description: validate model post deployment\n        cmd: python -u liminal_sm.py --action validate\n"
  },
  {
    "path": "examples/sagemaker_example/liminal_sm.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport tempfile\n\nfrom sagemaker.deserializers import NumpyDeserializer\nfrom sagemaker.predictor import Predictor\nfrom sagemaker.serializers import NumpySerializer\nfrom sm_ops import create_sm_args_parser, sm_data_prep, sm_deploy, sm_train, sm_validate\n\nCOMM_PATH = os.getenv(\"COMM_PATH\", tempfile.mkdtemp())\nos.makedirs(COMM_PATH, exist_ok=True)\n\n\ndef read_message(name):\n    with open(os.path.join(COMM_PATH, name)) as f:\n        lines = f.readlines()\n    return [x.strip() for x in lines]\n\n\ndef forward_message(name, lines):\n    if not isinstance(lines, list):\n        lines = [lines]\n    lines = '\\n'.join(lines) + '\\n'\n    with open(os.path.join(COMM_PATH, name), 'w') as f:\n        f.writelines(lines)\n\n\nif __name__ == '__main__':\n    choices = ['data_prep', 'train', 'deploy', 'validate', 'all']\n    parser = create_sm_args_parser()\n    parser.add_argument(\"--action\", choices=choices, default='all')\n    args = parser.parse_args()\n    steps = [args.action]\n    if args.action == 'all':\n        steps = choices[:-1]\n    for step in steps:\n        if step == 'data_prep':\n            train, test = sm_data_prep(args.input_uri, args.output_uri_base)\n            forward_message('data_prep', [train, test])\n        elif step == 'train':\n            lines = read_message('data_prep')\n            train, test = lines[0], lines[1]\n            artifact = sm_train(\n                train,\n                test,\n                base_job_name=args.base_job_name,\n                instance_type=args.train_instance_type,\n                n_jobs=args.n_jobs,\n            )\n            forward_message('train', artifact)\n        elif step == 'deploy':\n            artifact = read_message('train')[0]\n            predictor = sm_deploy(\n                artifact=artifact, model_name=args.model_name, instance_type=args.deploy_instance_type\n            )\n            forward_message('deploy', predictor.endpoint)\n        else:\n            endpoint = read_message('deploy')[0]\n            train, test = read_message('data_prep')\n            predictor = Predictor(\n                endpoint_name=endpoint, serializer=NumpySerializer(), deserializer=NumpyDeserializer()\n            )\n            result = sm_validate(predictor, test)\n            print(f\"avg abs error:{result}\")\n"
  },
  {
    "path": "examples/sagemaker_example/requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nsagemaker==2.57.0\nscikit-learn==0.23.2\ns3fs==2022.5.0\nprotobuf==3.20.*\n"
  },
  {
    "path": "examples/sagemaker_example/sm_ops.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport argparse\nimport os\nimport time\n\nimport boto3\nimport numpy as np\nimport pandas as pd\nimport sagemaker\nfrom data_preparation.data_preparation import (\n    DATASET_PUBLIC_URL,\n    LABEL_COLUMN,\n    data_pipeline,\n)\nfrom data_preparation.data_uploader import SagemakerDataUploader\nfrom data_train.train import feature_column_names\nfrom sagemaker.sklearn.estimator import SKLearn\nfrom sagemaker.sklearn.model import SKLearnModel\n\nFRAMEWORK_VERSION = \"0.23-1\"\n\nsm_boto3 = boto3.client(\"sagemaker\")\n\n\ndef get_role():\n    try:\n        role = sagemaker.get_execution_role()\n    except ValueError:\n        role = os.getenv(\"SM_ROLE\")\n    return role\n\n\ndef sm_train(train_path, test_path, base_job_name=\"Liminal-sm-training-job\", instance_type=\"ml.m5.large\", n_jobs=1):\n    sklearn_estimator = SKLearn(\n        entry_point=\"data_train/train.py\",\n        source_dir=\"./\",\n        role=get_role(),\n        instance_count=1,\n        instance_type=instance_type,\n        framework_version=FRAMEWORK_VERSION,\n        base_job_name=base_job_name,\n        hyperparameters={\n            \"n-jobs\": n_jobs,\n        },\n    )\n    sklearn_estimator.fit({\"train\": train_path, \"test\": test_path})\n    artifact = sm_boto3.describe_training_job(TrainingJobName=sklearn_estimator.latest_training_job.name)[\n        \"ModelArtifacts\"\n    ][\"S3ModelArtifacts\"]\n\n    print(\"Model artifact persisted at \" + artifact)\n    return artifact\n\n\ndef sm_deploy(artifact, model_name=\"Liminal-sm-demo-model\", instance_type=\"ml.m5.large\"):\n\n    timestamp = time.strftime(\"-%Y-%m-%d-%H-%M-%S\", time.gmtime())\n    model = SKLearnModel(\n        name=f\"{model_name}-{timestamp}\",\n        entry_point=\"inference.py\",\n        model_data=artifact,\n        role=get_role(),\n        framework_version=FRAMEWORK_VERSION,\n    )\n\n    predictor = model.deploy(instance_type=instance_type, initial_instance_count=1, wait=True)\n    return predictor\n\n\ndef sm_validate(predictor, test_path):\n    if not test_path.startswith(\"s3\"):\n        test_path = f\"s3://{test_path}\"\n\n    df = pd.read_csv(test_path)\n    X_test_inference = df[feature_column_names].to_numpy()\n    predictions = predictor.predict(data=X_test_inference)\n    print(\"validating model\")\n    avg_err = np.average(np.abs(predictions - df[LABEL_COLUMN]))\n    print(f\"average abs error:{avg_err}\")\n    print(predictions)\n    return avg_err\n\n\ndef sm_data_prep(input_uri, output_uri_base):\n    return data_pipeline(\n        input_uri=input_uri or DATASET_PUBLIC_URL,\n        output_uri_base=output_uri_base,\n        data_uploader=SagemakerDataUploader(),\n    )\n\n\ndef sm_main(args):\n    test_path = 'sagemaker-us-east-1-468326661668/liminal-sm-example/test/diamonds_test.csv'\n    train_path, test_path = sm_data_prep(args.input_uri, args.output_uri_base)\n    artifact = sm_train(\n        train_path,\n        test_path,\n        base_job_name=args.base_job_name,\n        instance_type=args.train_instance_type,\n        n_jobs=args.n_jobs,\n    )\n    # artifact = 's3://sagemaker-us-east-1-468326661668/Liminal-sm-training-job-2022-06-06-14-30-39-827/output/model.tar.gz'\n    predictor = sm_deploy(artifact, model_name=args.model_name, instance_type=args.deploy_instance_type)\n    # predictor =  Predictor(endpoint_name=\"Liminal-sm-demo-model-2022-06-06-14-35-48-079\", serializer=NumpySerializer(), deserializer=NumpyDeserializer())\n    sm_validate(predictor, test_path)\n\n\ndef create_sm_args_parser():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--input_uri\", default=DATASET_PUBLIC_URL)\n    parser.add_argument(\"--output_uri_base\", default=\"liminal-sm-example\")\n    parser.add_argument(\"--base_job_name\", default=\"Liminal-sm-training-job\")\n    parser.add_argument(\"--train_instance_type\", default=\"ml.m5.large\")\n    parser.add_argument(\"--n_jobs\", default=1)\n    parser.add_argument(\"--model_name\", default=\"Liminal-sm-demo-model\")\n    parser.add_argument(\"--deploy_instance_type\", default=\"ml.m5.large\")\n    return parser\n\n\nif __name__ == '__main__':\n    parser = create_sm_args_parser()\n    args = parser.parse_args()\n    sm_main(args)\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/archetype/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\n# superliminal for local development\nname: InfraSpark\nowner: Bosco Albert Baracus\ntype: super\nexecutors:\n  - executor: k8s\n    type: kubernetes\nvariables:\n  output_root_dir: /mnt/gettingstartedvol\n  input_root_dir: ''\nimages:\n  - image: my_spark_image\n    source: .\n    type: spark\n    no_cache: true\ntask_defaults:\n  spark:\n    executor: k8s\n    image: my_spark_image\n    executors: 2\n    application_source: '{{application}}'\n    mounts:\n      - mount: mymount\n        volume: gettingstartedvol\n        path: /mnt/gettingstartedvol\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/data/iris.csv",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n#\n# source: https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data\nsepallength,sepalwidth,petallength,petalwidth,class\n5.1,3.5,1.4,0.2,Iris-setosa\n4.9,3.0,1.4,0.2,Iris-setosa\n4.7,3.2,1.3,0.2,Iris-setosa\n4.6,3.1,1.5,0.2,Iris-setosa\n5.0,3.6,1.4,0.2,Iris-setosa\n5.4,3.9,1.7,0.4,Iris-setosa\n4.6,3.4,1.4,0.3,Iris-setosa\n5.0,3.4,1.5,0.2,Iris-setosa\n4.4,2.9,1.4,0.2,Iris-setosa\n4.9,3.1,1.5,0.1,Iris-setosa\n5.4,3.7,1.5,0.2,Iris-setosa\n4.8,3.4,1.6,0.2,Iris-setosa\n4.8,3.0,1.4,0.1,Iris-setosa\n4.3,3.0,1.1,0.1,Iris-setosa\n5.8,4.0,1.2,0.2,Iris-setosa\n5.7,4.4,1.5,0.4,Iris-setosa\n5.4,3.9,1.3,0.4,Iris-setosa\n5.1,3.5,1.4,0.3,Iris-setosa\n5.7,3.8,1.7,0.3,Iris-setosa\n5.1,3.8,1.5,0.3,Iris-setosa\n5.4,3.4,1.7,0.2,Iris-setosa\n5.1,3.7,1.5,0.4,Iris-setosa\n4.6,3.6,1.0,0.2,Iris-setosa\n5.1,3.3,1.7,0.5,Iris-setosa\n4.8,3.4,1.9,0.2,Iris-setosa\n5.0,3.0,1.6,0.2,Iris-setosa\n5.0,3.4,1.6,0.4,Iris-setosa\n5.2,3.5,1.5,0.2,Iris-setosa\n5.2,3.4,1.4,0.2,Iris-setosa\n4.7,3.2,1.6,0.2,Iris-setosa\n4.8,3.1,1.6,0.2,Iris-setosa\n5.4,3.4,1.5,0.4,Iris-setosa\n5.2,4.1,1.5,0.1,Iris-setosa\n5.5,4.2,1.4,0.2,Iris-setosa\n4.9,3.1,1.5,0.1,Iris-setosa\n5.0,3.2,1.2,0.2,Iris-setosa\n5.5,3.5,1.3,0.2,Iris-setosa\n4.9,3.1,1.5,0.1,Iris-setosa\n4.4,3.0,1.3,0.2,Iris-setosa\n5.1,3.4,1.5,0.2,Iris-setosa\n5.0,3.5,1.3,0.3,Iris-setosa\n4.5,2.3,1.3,0.3,Iris-setosa\n4.4,3.2,1.3,0.2,Iris-setosa\n5.0,3.5,1.6,0.6,Iris-setosa\n5.1,3.8,1.9,0.4,Iris-setosa\n4.8,3.0,1.4,0.3,Iris-setosa\n5.1,3.8,1.6,0.2,Iris-setosa\n4.6,3.2,1.4,0.2,Iris-setosa\n5.3,3.7,1.5,0.2,Iris-setosa\n5.0,3.3,1.4,0.2,Iris-setosa\n7.0,3.2,4.7,1.4,Iris-versicolor\n6.4,3.2,4.5,1.5,Iris-versicolor\n6.9,3.1,4.9,1.5,Iris-versicolor\n5.5,2.3,4.0,1.3,Iris-versicolor\n6.5,2.8,4.6,1.5,Iris-versicolor\n5.7,2.8,4.5,1.3,Iris-versicolor\n6.3,3.3,4.7,1.6,Iris-versicolor\n4.9,2.4,3.3,1.0,Iris-versicolor\n6.6,2.9,4.6,1.3,Iris-versicolor\n5.2,2.7,3.9,1.4,Iris-versicolor\n5.0,2.0,3.5,1.0,Iris-versicolor\n5.9,3.0,4.2,1.5,Iris-versicolor\n6.0,2.2,4.0,1.0,Iris-versicolor\n6.1,2.9,4.7,1.4,Iris-versicolor\n5.6,2.9,3.6,1.3,Iris-versicolor\n6.7,3.1,4.4,1.4,Iris-versicolor\n5.6,3.0,4.5,1.5,Iris-versicolor\n5.8,2.7,4.1,1.0,Iris-versicolor\n6.2,2.2,4.5,1.5,Iris-versicolor\n5.6,2.5,3.9,1.1,Iris-versicolor\n5.9,3.2,4.8,1.8,Iris-versicolor\n6.1,2.8,4.0,1.3,Iris-versicolor\n6.3,2.5,4.9,1.5,Iris-versicolor\n6.1,2.8,4.7,1.2,Iris-versicolor\n6.4,2.9,4.3,1.3,Iris-versicolor\n6.6,3.0,4.4,1.4,Iris-versicolor\n6.8,2.8,4.8,1.4,Iris-versicolor\n6.7,3.0,5.0,1.7,Iris-versicolor\n6.0,2.9,4.5,1.5,Iris-versicolor\n5.7,2.6,3.5,1.0,Iris-versicolor\n5.5,2.4,3.8,1.1,Iris-versicolor\n5.5,2.4,3.7,1.0,Iris-versicolor\n5.8,2.7,3.9,1.2,Iris-versicolor\n6.0,2.7,5.1,1.6,Iris-versicolor\n5.4,3.0,4.5,1.5,Iris-versicolor\n6.0,3.4,4.5,1.6,Iris-versicolor\n6.7,3.1,4.7,1.5,Iris-versicolor\n6.3,2.3,4.4,1.3,Iris-versicolor\n5.6,3.0,4.1,1.3,Iris-versicolor\n5.5,2.5,4.0,1.3,Iris-versicolor\n5.5,2.6,4.4,1.2,Iris-versicolor\n6.1,3.0,4.6,1.4,Iris-versicolor\n5.8,2.6,4.0,1.2,Iris-versicolor\n5.0,2.3,3.3,1.0,Iris-versicolor\n5.6,2.7,4.2,1.3,Iris-versicolor\n5.7,3.0,4.2,1.2,Iris-versicolor\n5.7,2.9,4.2,1.3,Iris-versicolor\n6.2,2.9,4.3,1.3,Iris-versicolor\n5.1,2.5,3.0,1.1,Iris-versicolor\n5.7,2.8,4.1,1.3,Iris-versicolor\n6.3,3.3,6.0,2.5,Iris-virginica\n5.8,2.7,5.1,1.9,Iris-virginica\n7.1,3.0,5.9,2.1,Iris-virginica\n6.3,2.9,5.6,1.8,Iris-virginica\n6.5,3.0,5.8,2.2,Iris-virginica\n7.6,3.0,6.6,2.1,Iris-virginica\n4.9,2.5,4.5,1.7,Iris-virginica\n7.3,2.9,6.3,1.8,Iris-virginica\n6.7,2.5,5.8,1.8,Iris-virginica\n7.2,3.6,6.1,2.5,Iris-virginica\n6.5,3.2,5.1,2.0,Iris-virginica\n6.4,2.7,5.3,1.9,Iris-virginica\n6.8,3.0,5.5,2.1,Iris-virginica\n5.7,2.5,5.0,2.0,Iris-virginica\n5.8,2.8,5.1,2.4,Iris-virginica\n6.4,3.2,5.3,2.3,Iris-virginica\n6.5,3.0,5.5,1.8,Iris-virginica\n7.7,3.8,6.7,2.2,Iris-virginica\n7.7,2.6,6.9,2.3,Iris-virginica\n6.0,2.2,5.0,1.5,Iris-virginica\n6.9,3.2,5.7,2.3,Iris-virginica\n5.6,2.8,4.9,2.0,Iris-virginica\n7.7,2.8,6.7,2.0,Iris-virginica\n6.3,2.7,4.9,1.8,Iris-virginica\n6.7,3.3,5.7,2.1,Iris-virginica\n7.2,3.2,6.0,1.8,Iris-virginica\n6.2,2.8,4.8,1.8,Iris-virginica\n6.1,3.0,4.9,1.8,Iris-virginica\n6.4,2.8,5.6,2.1,Iris-virginica\n7.2,3.0,5.8,1.6,Iris-virginica\n7.4,2.8,6.1,1.9,Iris-virginica\n7.9,3.8,6.4,2.0,Iris-virginica\n6.4,2.8,5.6,2.2,Iris-virginica\n6.3,2.8,5.1,1.5,Iris-virginica\n6.1,2.6,5.6,1.4,Iris-virginica\n7.7,3.0,6.1,2.3,Iris-virginica\n6.3,3.4,5.6,2.4,Iris-virginica\n6.4,3.1,5.5,1.8,Iris-virginica\n6.0,3.0,4.8,1.8,Iris-virginica\n6.9,3.1,5.4,2.1,Iris-virginica\n6.7,3.1,5.6,2.4,Iris-virginica\n6.9,3.1,5.1,2.3,Iris-virginica\n5.8,2.7,5.1,1.9,Iris-virginica\n6.8,3.2,5.9,2.3,Iris-virginica\n6.7,3.3,5.7,2.5,Iris-virginica\n6.7,3.0,5.2,2.3,Iris-virginica\n6.3,2.5,5.0,1.9,Iris-virginica\n6.5,3.0,5.2,2.0,Iris-virginica\n6.2,3.4,5.4,2.3,Iris-virginica\n5.9,3.0,5.1,1.8,Iris-virginica\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/data_cleanup.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport argparse\n\nimport pyspark.sql.functions as F\nfrom pyspark.ml import Pipeline\nfrom pyspark.ml.feature import StandardScaler, StringIndexer, VectorAssembler\nfrom pyspark.ml.functions import vector_to_array\nfrom pyspark.sql import SparkSession\n\n\ndef transform(data):\n    columns_to_scale = data.columns[:-1]\n    vectorizer = VectorAssembler(inputCols=columns_to_scale, outputCol=\"features\")\n    scaler = StandardScaler(inputCol=\"features\", outputCol=\"scaled_features\", withStd=True, withMean=True)\n    labeler = StringIndexer(inputCol=data.columns[-1], outputCol='label')\n    pipeline = Pipeline(stages=[vectorizer, scaler, labeler])\n    fitted = pipeline.fit(data)\n    transformed = fitted.transform(data)\n\n    result = transformed.withColumn(\"feature_arr\", vector_to_array(\"scaled_features\")).select(\n        [F.col(\"feature_arr\")[i].alias(columns_to_scale[i]) for i in range(len(columns_to_scale))] + ['label']\n    )\n\n    return result\n\n\ndef extract(spark, input_uri):\n    return spark.read.csv(input_uri, header=True, inferSchema=True, comment=\"#\")\n\n\ndef load(data, output_uri):\n    data.coalesce(1).write.mode(\"overwrite\").csv(output_uri, header=True)\n\n\ndef data_pipeline(input_uri, output_uri):\n    spark = SparkSession.builder.appName(\"Prepare Iris Data\").getOrCreate()\n\n    input = extract(spark, input_uri)\n    data = transform(input)\n    load(data, output_uri)\n    spark.stop()\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--input_uri\")\n    parser.add_argument(\"--output_uri\")\n    args = parser.parse_args()\n    data_pipeline(args.input_uri, args.output_uri)\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: MyFirstLiminalSparkApp\nsuper: InfraSpark\nowner: Bosco Albert Baracus\nvariables:\n  training_data_path: '{{output_root_dir}}/iris/'\n  application: data_cleanup.py\nimages:\n  - image: myorg/mydatascienceapp\n    type: python_server\n    source: .\n    endpoints:\n      - endpoint: /predict\n        module: serving\n        function: predict\n      - endpoint: /healthcheck\n        module: serving\n        function: healthcheck\n      - endpoint: /version\n        module: serving\n        function: version\ntask_defaults:\n  python:\n    mounts:\n      - mount: mymount\n        volume: gettingstartedvol\n        path: /mnt/gettingstartedvol\npipelines:\n  - pipeline: my_first_spark_pipeline\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    tasks:\n      - task: data_preprocessing\n        type: spark\n        description: prepare the data for training\n        application_arguments:\n          - --input_uri\n          - '{{input_root_dir}}data/iris.csv'\n          - --output_uri\n          - '{{training_data_path}}'\n      - task: train\n        type: python\n        description: train model\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py --action train --input_uri '{{training_data_path}}'\n        env:\n          MOUNT_PATH: /mnt/gettingstartedvol\n      - task: validate\n        type: python\n        description: validate model and deploy\n        image: myorg/mydatascienceapp\n        cmd: python -u training.py --action validate --input_uri '{{training_data_path}}'\n        env:\n          MOUNT_PATH: /mnt/gettingstartedvol\nvolumes:\n  - volume: gettingstartedvol\n    claim_name: gettingstartedvol-pvc\n    local:\n      path: .\nservices:\n  - service:\n    name: my_datascience_server\n    type: python_server\n    description: my ds server\n    image: myorg/mydatascienceapp\n    source: .\n    endpoints:\n      - endpoint: /predict\n        module: serving\n        function: predict\n      - endpoint: /healthcheck\n        module: serving\n        function: healthcheck\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/manifests/spark-app-demo.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\n\napiVersion: v1\nkind: Pod\nmetadata:\n  name: spark-app-demo\nspec:\n  volumes:\n    - name: task-pv-storage\n      persistentVolumeClaim:\n        claimName: gettingstartedvol-pvc\n  containers:\n    - name: task-pv-container\n      imagePullPolicy: Never\n      image: myorg/mydatascienceapp\n      lifecycle:\n        postStart:\n          exec:\n            command: [/bin/bash, -c, apt update && apt install curl -y]\n      ports:\n        - containerPort: 80\n          name: http-server\n      volumeMounts:\n        - mountPath: /mnt/gettingstartedvol\n          name: task-pv-storage\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/model_store.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport glob\nimport os\nimport pickle\nimport time\n\nMOUNT_PATH = os.environ.get('MOUNT_PATH', '/mnt/gettingstartedvol')\nPRODUCTION = 'production'\nCANDIDATE = 'candidate'\n\n_ONE_HOUR = 60 * 60\n\n\nclass ModelStore:\n    def __init__(self, env):\n        self.env = env\n        self._latest_model = None\n        self._latest_version = None\n        self._last_check = time.time()\n\n    def load_latest_model(self, force=False):\n        if not self._latest_model or time.time() - self._last_check > _ONE_HOUR or force:\n            self._latest_model, self._latest_version = self._download_latest_model()\n\n        return self._latest_model, self._latest_version\n\n    def save_model(self, model, version):\n        key = 'model.p'\n        path = f'{MOUNT_PATH}/{self.env}/{version}'\n\n        os.makedirs(f'{path}', exist_ok=True)\n        pickle.dump(model, open(f'{path}/{key}', \"wb\"))\n\n    def _download_latest_model(self):\n        objects = glob.glob(f'{MOUNT_PATH}/{self.env}/**/*')\n        models = list(reversed(sorted(obj for obj in objects if obj.endswith('.p'))))\n        latest_key = models[0]\n        version = latest_key.rsplit('/')[-2]\n        print(f'Loading model version {version}')\n        return pickle.load(open(latest_key, 'rb')), version\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nscikit-learn==0.24.1\napache-liminal==0.0.2\npyspark==3.1.3\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/serving.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\n\nimport model_store\nfrom model_store import ModelStore\n\n_MODEL_STORE = ModelStore(model_store.PRODUCTION)\n_PETAL_WIDTH = 'petal_width'\n\n\ndef predict(input_json):\n    try:\n        input_dict = json.loads(input_json)\n        model, version = _MODEL_STORE.load_latest_model()\n        result = str(model.predict_proba([list(input_dict[_PETAL_WIDTH])])[0][1])\n        return json.dumps({\"result\": result, \"version\": version})\n\n    except IndexError:\n        return 'Failure: the model is not ready yet'\n\n    except Exception as e:\n        print(e)\n        return 'Failure'\n\n\ndef healthcheck(self):\n    return 'Server is up!'\n\n\ndef version(self):\n    try:\n        model, version = _MODEL_STORE.load_latest_model()\n        print(f'version={version}')\n        return version\n    except Exception as e:\n        return e\n"
  },
  {
    "path": "examples/spark-app-demo/k8s/training.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport os\nimport sys\nimport time\n\nimport model_store\nimport numpy as np\nimport pandas as pd\nfrom model_store import ModelStore\nfrom sklearn import datasets, model_selection\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\n\n_CANDIDATE_MODEL_STORE = ModelStore(model_store.CANDIDATE)\n_PRODUCTION_MODEL_STORE = ModelStore(model_store.PRODUCTION)\n\nimport argparse\nimport csv\n\nimport numpy as np\n\n\ndef load_iris_from_csv_file(f):\n    df = pd.read_csv(f, header=0).reset_index(drop=True)\n    types = {col: np.float64 for col in df.columns[:-1]}\n    types['label'] = np.int32\n    df = df.astype(types)\n    return df\n\n\ndef get_dataset(d):\n    print(f\"searching for csv files in {d}\")\n    for root, dirs, files in os.walk(d):\n\n        for file in files:\n            if file.endswith(\".csv\"):\n                return os.path.join(d, file)\n    return None\n\n\ndef load_and_split(input_uri):\n    csv_file = get_dataset(input_uri)\n    if csv_file:\n        print(f\"found {csv_file} dataset\")\n\n    iris = load_iris_from_csv_file(csv_file)\n    return train_test_split(iris, test_size=0.2, random_state=8)\n\n\ndef train_model(input_uri):\n    train, test = load_and_split(input_uri)\n    y = train.pop(\"label\")\n    X = train.loc[:, train.columns]\n\n    model = LogisticRegression(max_iter=500)\n    model.fit(X, y)\n\n    scoring = 'accuracy'\n    results = model_selection.cross_val_score(model, X, y, cv=5, scoring=scoring)\n    print(f'Accuracy in cross validation: {results.mean()*100} % ({results.std()} std)')\n    version = round(time.time())\n\n    print(f'Saving model with version {version} to candidate model store.')\n    _CANDIDATE_MODEL_STORE.save_model(model, version)\n\n\ndef validate_model(input_uri):\n    model, version = _CANDIDATE_MODEL_STORE.load_latest_model()\n    print(f'Validating model with version {version} to candidate model store.')\n    if not isinstance(model.predict([[1, 1, 1, 1]]), np.ndarray):\n        raise ValueError('Invalid model')\n    train, test = load_and_split(input_uri)\n    y = test.pop(\"label\")\n    X = test.loc[:, test.columns]\n    result = model.score(X, y)\n    print(f'model accuracy {result*100}')\n    if result < 0.85:\n        raise ValueError('model accuracy under threshold (0.85  ). Model is not promoted to production')\n    print(f'Deploying model with version {version} to production model store.')\n    _PRODUCTION_MODEL_STORE.save_model(model, version)\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--action\", choices=['train', 'validate'])\n    parser.add_argument(\"--input_uri\")\n    args = parser.parse_args()\n    if args.action == 'train':\n        train_model(args.input_uri)\n    else:\n        validate_model(args.input_uri)\n"
  },
  {
    "path": "install.sh",
    "content": "#!/bin/sh\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nDIR=\"$( cd \"$( dirname \"${BASH_SOURCE[0]}\" )\" >/dev/null 2>&1 && pwd )\"\n\nyes | pip uninstall apache-liminal\n\ncd \"$DIR\" || exit\n\nrm -rf build\nrm -rf dist\nrm \"${LIMINAL_HOME:-${HOME}/liminal_home}\"/*.whl\n\npython setup.py sdist bdist_wheel\n\npip install dist/*.whl\n\nmkdir -p \"${LIMINAL_HOME:-${HOME}/liminal_home}\"\n\ncp dist/*.whl \"${LIMINAL_HOME:-${HOME}/liminal_home}\"\n\ncd - || exit\n"
  },
  {
    "path": "liminal/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nfrom liminal import settings\n\nsettings.initialize()\n"
  },
  {
    "path": "liminal/build/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/build/image/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/build/image/python/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n# Use an official Python runtime as a parent image\nFROM {{python}}\n\n# Install aptitude build-essential\nRUN apt-get update && apt-get install -y --reinstall build-essential \\\n                                                   make \\\n                                                   gcc\n\n# Set the working directory to /app\nWORKDIR /app\n\n# Order of operations is important here for docker's caching & incremental build performance.    !\n# Be careful when changing this code.                                                            !\n\n# Install any needed packages specified in requirements.txt\nCOPY ./requirements.txt /app/\n\n# mount the secret in the correct location, then run pip install\nRUN python -m pip install --upgrade pip\nRUN {{mount}} pip install -r requirements.txt\n\n# Copy the current directory contents into the container at /app\nRUN echo \"Copying source code..\"\nCOPY . /app/\n"
  },
  {
    "path": "liminal/build/image/python/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/build/image/python/python.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\n\nfrom liminal.build.python import BasePythonImageBuilder\n\n\nclass PythonImageBuilder(BasePythonImageBuilder):\n    def __init__(self, config, base_path, relative_source_path, tag):\n        super().__init__(config, base_path, relative_source_path, tag)\n\n    @staticmethod\n    def _dockerfile_path():\n        return os.path.join(os.path.dirname(__file__), 'Dockerfile')\n"
  },
  {
    "path": "liminal/build/image/python_server/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n# Use an official Python runtime as a parent image\nFROM {{python}}\n\n# Install aptitude build-essential\n#RUN apt-get install -y --reinstall build-essential\n\n# Set the working directory to /app\nWORKDIR /app\n\n# Order of operations is important here for docker's caching & incremental build performance.    !\n# Be careful when changing this code.                                                            !\n\n# Install any needed packages specified in python_server_requirements.txt and requirements.txt\nRUN pip install --upgrade pip\n\nCOPY ./python_server_requirements.txt /app/\nRUN pip install -r python_server_requirements.txt\n\nCOPY ./requirements.txt /app/\nRUN {{mount}} pip install -r requirements.txt\n\n# Copy the current directory contents into the container at /app\nRUN echo \"Copying source code..\"\nCOPY . /app/\n\nCMD python -u liminal_python_server.py\n"
  },
  {
    "path": "liminal/build/image/python_server/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/build/image/python_server/liminal_python_server.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport yaml\nfrom flask import Blueprint, Flask, request\n\n\ndef __get_module(kls):\n    parts = kls.split('.')\n    module = \".\".join(parts)\n    m = __import__(module)\n    for comp in parts[1:]:\n        m = getattr(m, comp)\n    return m\n\n\ndef __get_endpoint_function(endpoint_config):\n    module = __get_module(endpoint_config['module'])\n    return module.__getattribute__(endpoint_config['function'])\n\n\nif __name__ == '__main__':\n    with open('service.yml') as stream:\n        config = yaml.safe_load(stream)\n\n    endpoints = {\n        endpoint_config['endpoint'][1:]: __get_endpoint_function(endpoint_config)\n        for endpoint_config in config['endpoints']\n    }\n\n    blueprint = Blueprint('liminal_python_server_blueprint', __name__)\n\n    @blueprint.route('/favicon.ico', methods=('GET', 'POST'))\n    def no_content():\n        return '', 204\n\n    @blueprint.route('/', defaults={'endpoint': ''}, methods=('GET', 'POST'))\n    @blueprint.route('/<endpoint>', methods=('GET', 'POST'))\n    def show(endpoint):\n        if endpoint in endpoints:\n            return endpoints[endpoint](request.get_data())\n        else:\n            return 'Page not found.', 404\n\n    app = Flask(__name__)\n    app.register_blueprint(blueprint)\n\n    app.run(host='0.0.0.0', threaded=False, port=80)\n"
  },
  {
    "path": "liminal/build/image/python_server/python_server.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\n\nimport yaml\n\nfrom liminal.build.python import BasePythonImageBuilder\n\n\nclass PythonServerImageBuilder(BasePythonImageBuilder):\n    def __init__(self, config, base_path, relative_source_path, tag):\n        super().__init__(config, base_path, relative_source_path, tag)\n\n    @staticmethod\n    def _dockerfile_path():\n        return os.path.join(os.path.dirname(__file__), 'Dockerfile')\n\n    @staticmethod\n    def _additional_files_from_paths():\n        return [\n            os.path.join(os.path.dirname(__file__), 'liminal_python_server.py'),\n            os.path.join(os.path.dirname(__file__), 'python_server_requirements.txt'),\n        ]\n\n    def _additional_files_from_filename_content_pairs(self):\n        return super()._additional_files_from_filename_content_pairs() + [('service.yml', yaml.safe_dump(self.config))]\n"
  },
  {
    "path": "liminal/build/image/python_server/python_server_requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nFlask\npyyaml\n"
  },
  {
    "path": "liminal/build/image/spark/Dockerfile",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nFROM bitnami/spark:3.1.2-debian-10-r23\n\nUSER root\n\nWORKDIR /app\n\nCOPY . /app/\n\nRUN {{mount}} pip install -r requirements.txt\n"
  },
  {
    "path": "liminal/build/image/spark/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/build/image/spark/spark.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport os\n\nfrom liminal.build.python import BasePythonImageBuilder\n\n\nclass SparkImageBuilder(BasePythonImageBuilder):\n    def __init__(self, config, base_path, relative_source_path, tag):\n        super().__init__(config, base_path, relative_source_path, tag)\n\n    @staticmethod\n    def _dockerfile_path():\n        return os.path.join(os.path.dirname(__file__), 'Dockerfile')\n"
  },
  {
    "path": "liminal/build/image_builder.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport shutil\nimport subprocess\nimport tempfile\nimport time\n\n\nclass ImageBuilder:\n    \"\"\"\n    Builds an image from source code\n    \"\"\"\n\n    __NO_CACHE = 'no_cache'\n\n    def __init__(self, config, base_path, relative_source_path, tag):\n        \"\"\"\n        :param config: task/service config\n        :param base_path: directory containing liminal yml\n        :param relative_source_path: source path relative to liminal yml\n        :param tag: image tag\n        \"\"\"\n        self.base_path = base_path\n        self.relative_source_path = relative_source_path\n        self.tag = tag\n        self.config = config\n\n    def build(self):\n        \"\"\"\n        Builds source code into an image.\n        \"\"\"\n        logging.info(f'[ ] Building image: {self.tag}')\n\n        temp_dir = self.__temp_dir()\n\n        self.__copy_source_code(temp_dir)\n        self._write_additional_files(temp_dir)\n\n        no_cache = ''\n        if self.__NO_CACHE in self.config and self.config[self.__NO_CACHE]:\n            no_cache = '--no-cache=true'\n\n        docker = 'docker' if shutil.which('docker') is not None else '/usr/local/bin/docker'\n        docker_build_command = f'{docker} build {no_cache} ' + f'--tag {self.tag} '\n\n        docker_build_command += f'--progress=plain {self._build_flags()} '\n\n        docker_build_command += f'{temp_dir}'\n\n        if self._use_buildkit():\n            docker_build_command = f'DOCKER_BUILDKIT=1 {docker_build_command}'\n\n        logging.info(docker_build_command)\n        docker_build_out = []\n        try:\n            build_start = time.time()\n            # Poll process.stdout to show stdout live\n            build_process = subprocess.Popen(\n                docker_build_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT\n            )\n            logging.info('=' * 80)\n            timeout = 960\n            while time.time() < build_start + timeout:\n                output = build_process.stdout.readline()\n                if build_process.poll() is not None:\n                    break\n                if output:\n                    stdout_log_str = output.strip().decode('utf-8')\n                    docker_build_out.append(stdout_log_str)\n                    logging.info(stdout_log_str)\n            logging.info('=' * 80)\n            build_process.stdout.close()\n            build_process.wait(time.time() - (build_start + timeout))\n        except subprocess.CalledProcessError as e:\n            for line in str(e.output)[2:-3].split('\\\\n'):\n                logging.info(line)\n            raise e\n\n        self.__remove_dir(temp_dir)\n\n        logging.info(f'[X] Building image: {self.tag} (Success).')\n\n        return '\\n'.join(docker_build_out)\n\n    def __copy_source_code(self, temp_dir):\n        self.__copy_dir(os.path.join(self.base_path, self.relative_source_path), temp_dir)\n\n    def _write_additional_files(self, temp_dir):\n        for file in [self._dockerfile_path()] + self._additional_files_from_paths():\n            self.__copy_file(file, temp_dir)\n\n        for filename, content in self._additional_files_from_filename_content_pairs():\n            with open(os.path.join(temp_dir, filename), 'w') as file:\n                file.write(content)\n\n    def __temp_dir(self):\n        temp_dir = tempfile.mkdtemp()\n        # Delete dir for shutil.copytree to work\n        self.__remove_dir(temp_dir)\n        return temp_dir\n\n    @staticmethod\n    def __remove_dir(temp_dir):\n        shutil.rmtree(temp_dir)\n\n    @staticmethod\n    def __copy_dir(source_path, destination_path):\n        shutil.copytree(source_path, destination_path)\n\n    @staticmethod\n    def __copy_file(source_file_path, destination_file_path):\n        shutil.copy2(source_file_path, destination_file_path)\n\n    @staticmethod\n    def _dockerfile_path():\n        \"\"\"\n        Path to Dockerfile\n        \"\"\"\n        raise NotImplementedError()\n\n    @staticmethod\n    def _additional_files_from_paths():\n        \"\"\"\n        List of paths to additional files\n        \"\"\"\n        return []\n\n    def _additional_files_from_filename_content_pairs(self):\n        \"\"\"\n        File name and content pairs to create files from\n        \"\"\"\n        return []\n\n    def _build_flags(self):\n        \"\"\"\n        Additional build flags to add to docker build command.\n        \"\"\"\n        return ''\n\n    def _use_buildkit(self):\n        \"\"\"\n        overwrite with True to use docker buildkit\n        \"\"\"\n        return False\n"
  },
  {
    "path": "liminal/build/liminal_apps_builder.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\n\nfrom liminal.core.config.config import ConfigUtil\nfrom liminal.core.util import class_util, extensible, files_util\n\n\ndef build_liminal_apps(path):\n    \"\"\"\n    Build images for liminal apps in path.\n    \"\"\"\n    config_util = ConfigUtil(path)\n    configs = config_util.safe_load(is_render_variables=True, soft_merge=True)\n\n    for liminal_config in configs:\n        base_path = os.path.dirname(files_util.resolve_pipeline_source_file(liminal_config['name']))\n        if 'images' in liminal_config:\n            for image in liminal_config['images']:\n                image_name = image['image']\n\n                if 'source' in image:\n                    image_type = image['type']\n                    builder_class = __get_image_builder_class(image_type)\n                    if builder_class:\n                        __build_image(base_path, image, builder_class)\n                    else:\n                        raise ValueError(f'No such image type: {image_type}')\n                else:\n                    logging.warning(f'No source configured for image {image_name}.')\n\n\ndef __build_image(base_path, builder_config, builder):\n    builder_instance = builder(\n        config=builder_config,\n        base_path=base_path,\n        relative_source_path=builder_config['source'],\n        tag=builder_config['image'],\n    )\n    builder_instance.build()\n\n\ndef __get_image_builder_class(task_type):\n    return image_builder_types.get(task_type, None)\n\n\nlogging.info(f'Loading image builder implementations..')\n\nimage_builder_types = extensible.load_image_builders()\n\nlogging.info(f'Finished loading image builder implementations: {image_builder_types}')\nlogging.info(f'Loading service image builder implementations..')\n"
  },
  {
    "path": "liminal/build/python.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\n\nfrom liminal.build.image_builder import ImageBuilder\n\n\nclass PythonImageVersions:\n    \"\"\"\n    Handles the python versions for python images.\n    \"\"\"\n\n    @property\n    def default_version(self):\n        return '3.7'\n\n    @property\n    def supported_versions(self):\n        return '3.6', '3.7', '3.8', '3.9'\n\n    def get_image_name(self, python_version):\n        \"\"\"\n        :param python_version: The python version that would be installed in\n            the docker image. For example '3.8', '3.8.1' etc.\n        :type python_version: str\n        :return: The name of the base (slim) python image\n        :rtype: str\n        \"\"\"\n        if not python_version:\n            python_version = self.default_version\n        else:\n            python_version = str(python_version)\n        if python_version[:3] not in self.supported_versions:\n            raise ValueError(\n                f'liminal supports the following python versions: '\n                f'{self.supported_versions} but {python_version} '\n                f'were passed'\n            )\n        return f'python:{python_version}-slim'\n\n\nclass BasePythonImageBuilder(ImageBuilder):\n    \"\"\"\n    Base class for building python images.\n    \"\"\"\n\n    __PIP_CONF = 'pip_conf'\n    __PYTHON_VERSION = 'python_version'\n\n    def __init__(self, config, base_path, relative_source_path, tag, base_image=PythonImageVersions()):\n        super().__init__(config, base_path, relative_source_path, tag)\n        self._base_image = base_image\n\n    @staticmethod\n    def _dockerfile_path():\n        raise NotImplementedError()\n\n    def _write_additional_files(self, temp_dir):\n        requirements_file_path = os.path.join(temp_dir, 'requirements.txt')\n        if not os.path.exists(requirements_file_path):\n            with open(requirements_file_path, 'w'):\n                pass\n\n        super()._write_additional_files(temp_dir)\n\n    def _additional_files_from_filename_content_pairs(self):\n        with open(self._dockerfile_path()) as original:\n            data = original.read()\n\n        data = self.__mount_pip_conf(data)\n        data = self.__add_python_base_version(data)\n\n        return [('Dockerfile', data)]\n\n    def __mount_pip_conf(self, data):\n        new_data = data\n\n        if self.__PIP_CONF in self.config:\n            new_data = '# syntax = docker/dockerfile:1.0-experimental\\n' + data\n            new_data = new_data.replace('{{mount}}', '--mount=type=secret,id=pip_config,dst=/etc/pip.conf \\\\\\n')\n        else:\n            new_data = new_data.replace('{{mount}} ', '')\n\n        return new_data\n\n    def __add_python_base_version(self, data):\n        python_version = self.config.get(self.__PYTHON_VERSION)\n        base_image = self._base_image.get_image_name(python_version)\n        return data.replace('{{python}}', base_image)\n\n    def _build_flags(self):\n        if self.__PIP_CONF in self.config:\n            return f'--secret id=pip_config,src={self.config[self.__PIP_CONF]}'\n        else:\n            return ''\n\n    def _use_buildkit(self):\n        if self.__PIP_CONF in self.config:\n            return True\n"
  },
  {
    "path": "liminal/core/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/core/config/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/core/config/config.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport traceback\n\nfrom liminal.core import environment\nfrom liminal.core.config.defaults import base, default_configs\nfrom liminal.core.util import dict_util, files_util\n\n\nclass ConfigUtil:\n    \"\"\"\n    Load and enrich config files under configs_path.\n    \"\"\"\n\n    __BASE = 'base'\n    __PIPELINES = 'pipelines'\n    __SUPER = 'super'\n    __TYPE = 'type'\n    __SUB = 'sub'\n    __SERVICES = 'services'\n    __TASKS = 'tasks'\n    __PIPELINE_DEFAULTS = 'pipeline_defaults'\n    __TASK_DEFAULTS = 'task_defaults'\n    __BEFORE_TASKS = 'before_tasks'\n    __AFTER_TASKS = 'after_tasks'\n    __EXECUTORS = 'executors'\n    __IMAGES = 'images'\n    __BASE = \"base\"\n    __PIPELINES = \"pipelines\"\n    __SUPER = \"super\"\n    __TYPE = \"type\"\n    __SUB = \"sub\"\n    __SERVICES = \"services\"\n    __TASKS = \"tasks\"\n    __PIPELINE_DEFAULTS = \"pipeline_defaults\"\n    __TASK_DEFAULTS = \"task_defaults\"\n    __BEFORE_TASKS = \"before_tasks\"\n    __AFTER_TASKS = \"after_tasks\"\n    __EXECUTORS = \"executors\"\n\n    def __init__(self, configs_path):\n        self.configs_path = configs_path\n        self.config_files = files_util.load(configs_path)\n        self.base = base.BASE\n        self.loaded_subliminals = []\n        self.snapshot_path = os.path.join(environment.get_airflow_home_dir(), '../liminal_config_files')\n\n    def safe_load(self, is_render_variables, soft_merge=False):\n        \"\"\"\n        :returns list of config files after enrich with defaults and supers\n        \"\"\"\n        if self.loaded_subliminals:\n            return self.loaded_subliminals\n\n        configs = self.config_files.values()\n        enriched_configs = []\n\n        for subliminal in [config for config in configs if self.__is_subliminal(config)]:\n            name = subliminal.get('name')\n            logging.info(f'Loading yml {name}')\n            # noinspection PyBroadException\n            try:\n                superliminal = self.__get_superliminal(subliminal, soft_merge)\n                enriched_config = self.__merge_configs(subliminal, superliminal, is_render_variables, soft_merge)\n                enriched_configs.append(enriched_config)\n            except Exception:\n                logging.error(f'Failed to load yml {name}')\n                traceback.print_exc()\n\n        self.loaded_subliminals = enriched_configs\n\n        return self.loaded_subliminals\n\n    def __merge_configs(self, subliminal, superliminal, is_render_variables, soft_merge):\n        if not superliminal:\n            return subliminal\n\n        sub = subliminal.copy()\n        supr = superliminal.copy()\n\n        merged_superliminal = self.__merge_configs(\n            supr, self.__get_superliminal(supr, soft_merge), is_render_variables, soft_merge\n        )\n\n        sub[self.__EXECUTORS] = self.__merge_section(sub, merged_superliminal, self.__EXECUTORS)\n        sub[self.__IMAGES] = self.__merge_section(sub, merged_superliminal, self.__IMAGES)\n\n        if self.__is_subliminal(sub):\n            return self.__merge_sub_and_super(sub, merged_superliminal, is_render_variables)\n        else:\n            return self.__merge_superliminals(sub, merged_superliminal)\n\n    def __get_superliminal(self, liminal, soft_merge):\n        superliminal = {}\n        if not self.__is_base_config(liminal):\n            superliminal_name = liminal.get(self.__SUPER, '')\n            if not superliminal_name:\n                superliminal = self.base\n            else:\n                superliminal = self.__get_config(superliminal_name)\n                if not superliminal:\n                    supr_is_missing_msg = (\n                        f\"superliminal '{superliminal_name}' \" + f\"is missing from '{self.configs_path}'\"\n                    )\n                    if soft_merge:\n                        logging.warning(supr_is_missing_msg)\n                    else:\n                        raise FileNotFoundError(supr_is_missing_msg)\n\n        return superliminal\n\n    def __get_base_config(self):\n        return self.base\n\n    def __is_base_config(self, config):\n        return config.get('name', '') == self.__BASE\n\n    def __is_subliminal(self, config):\n        is_subliminal = config.get(self.__TYPE, self.__SUB) != self.__SUPER\n        if is_subliminal:\n            config[self.__TYPE] = self.__SUB\n        return is_subliminal\n\n    def __get_config(self, config_name):\n        return self.config_files.get(config_name)\n\n    def __merge_sub_and_super(self, sub, supr, is_render_variables):\n        merged_pipelines = list()\n\n        for pipeline in sub.get(self.__PIPELINES, {}):\n            final_pipeline = self.__apply_pipeline_defaults(sub, supr, pipeline)\n            merged_pipelines.append(final_pipeline)\n\n        sub[self.__PIPELINES] = merged_pipelines\n        sub[self.__SERVICES] = default_configs.apply_service_defaults(sub, supr)\n\n        sub = dict_util.merge_dicts(supr.copy(), sub)\n\n        return default_configs.apply_variable_substitution(sub, supr, is_render_variables)\n\n    def __merge_superliminals(self, super1, super2):\n        super1_pipeline_defaults = super1.get(self.__PIPELINE_DEFAULTS, {}).copy()\n        super2_pipeline_defaults = super2.get(self.__PIPELINE_DEFAULTS, {}).copy()\n\n        super1[self.__PIPELINE_DEFAULTS] = super1_pipeline_defaults\n        super1[self.__PIPELINE_DEFAULTS][self.__BEFORE_TASKS] = super2_pipeline_defaults.pop(\n            self.__BEFORE_TASKS, []\n        ) + super1_pipeline_defaults.pop(self.__BEFORE_TASKS, [])\n\n        super2[self.__PIPELINE_DEFAULTS] = super2_pipeline_defaults\n        super1[self.__PIPELINE_DEFAULTS][self.__AFTER_TASKS] = super1_pipeline_defaults.pop(\n            self.__AFTER_TASKS, []\n        ) + super2_pipeline_defaults.pop(self.__AFTER_TASKS, [])\n\n        # merge supers tasks\n        return dict_util.merge_dicts(super1, super2, True)\n\n    def snapshot_final_liminal_configs(self):\n        files_util.dump_liminal_configs(liminal_configs=self.loaded_subliminals, path=self.snapshot_path)\n\n    def __merge_section(self, subliminal, superliminal, section):\n        return self.__deep_list_keyword_merge(section[:-1], subliminal.get(section, []), superliminal.get(section, []))\n\n    @staticmethod\n    def __apply_pipeline_defaults(subliminal, superliminal, pipeline):\n        return default_configs.apply_pipeline_defaults(subliminal, superliminal, pipeline)\n\n    @staticmethod\n    def __deep_list_keyword_merge(unique_key_name, subliminal_list_conf, superliminal_list_conf):\n        subliminal_key_map = {item[unique_key_name]: item for item in subliminal_list_conf}\n        superliminal_key_map = {item[unique_key_name]: item for item in superliminal_list_conf}\n\n        return list(dict_util.merge_dicts(superliminal_key_map, subliminal_key_map, recursive=True).values())\n"
  },
  {
    "path": "liminal/core/config/defaults/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/core/config/defaults/base/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport os\n\nfrom liminal.core.util import files_util\n\nBASE = files_util.load(os.path.dirname(__file__))['base'].copy()\n"
  },
  {
    "path": "liminal/core/config/defaults/base/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: base\ntype: super\nexecutors:\n  - executor: default_k8s\n    type: kubernetes\n  - executor: airflow_executor\n    type: airflow\nservice_defaults:\n  description: add defaults parameters for all services\ntask_defaults:\n  description: add defaults parameters for all tasks separate by task type\n  python:\n    executor: default_k8s\n  spark:\n    executor: default_k8s\n  job_end:\n    executor: airflow_executor\n  job_start:\n    executor: airflow_executor\npipeline_defaults:\n  description: add defaults parameters for all pipelines\n  before_tasks:\n    - task: start\n      type: job_start\n  after_tasks:\n    - task: end\n      type: job_end\n"
  },
  {
    "path": "liminal/core/config/defaults/default_configs.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom liminal.core.util import dict_util\nfrom liminal.core.util.dict_util import merge_dicts\n\n__SERVICES = \"services\"\n__TASKS = \"tasks\"\n__BEFORE_TASKS = \"before_tasks\"\n__AFTER_TASKS = \"after_tasks\"\n\n\ndef apply_variable_substitution(subliminal, superliminal, is_render_variables=False):\n    \"\"\"\n    if is_render_variables is True\n       Replace all {{variable.key}} in subliminal with variable.value variable in\n       subliminal.variables + superliminal.variables\n    else\n       merge subliminal.variables with superliminal.variables without replace\n       placeholders\n    \"\"\"\n    keyword = \"variables\"\n    merged_variables = dict_util.merge_dicts(subliminal.get(keyword, {}), superliminal.get(keyword, {}), True)\n    if is_render_variables:\n        for k, v in merged_variables.items():\n            if isinstance(v, str) or (not isinstance(v, dict) and not isinstance(v, list)):\n                merged_variables[k] = dict_util.replace_placholders_in_string(str(v), merged_variables)\n\n        merged_variables = dict_util.replace_placeholders(merged_variables, merged_variables)\n        return dict_util.replace_placeholders(subliminal, merged_variables)\n    else:\n        subliminal[keyword] = merged_variables\n        return subliminal\n\n\ndef apply_service_defaults(subliminal, superliminal):\n    \"\"\"Apply defaults services\n    :param subliminal: subliminal config\n    :param superliminal: superliminal config\n    :returns: enriched services with superliminal.service_defaults & subliminal.service_defaults\n    \"\"\"\n    keyword = \"service_defaults\"\n    superliminal_service_defaults = superliminal.get(keyword, {})\n    subliminal_service_defaults = subliminal.get(keyword, {})\n\n    merged_service_defaults = merge_dicts(subliminal_service_defaults, superliminal_service_defaults, recursive=True)\n\n    return [merge_dicts(service, merged_service_defaults, True) for service in subliminal.get(__SERVICES, {})]\n\n\ndef apply_pipeline_defaults(subliminal, superliminal, pipeline):\n    \"\"\"Apply defaults values on given pipeline\n    :param subliminal: subliminal config\n    :param superliminal: superliminal config\n    :param  pipeline: to apply defaults on\n\n    :returns: enriched pipeline with superliminal.pipeline_defaults & subliminal.pipeline_defaults\n    \"\"\"\n    keyword = \"pipeline_defaults\"\n    superliminal_pipe_defaults = superliminal.get(keyword, {}).copy()\n    subliminal_pipe_defaults = subliminal.get(keyword, {}).copy()\n    superliminal_before_tasks = superliminal_pipe_defaults.pop(__BEFORE_TASKS, [])\n    superliminal_after_tasks = superliminal_pipe_defaults.pop(__AFTER_TASKS, [])\n    merged_pipeline_defaults = merge_dicts(subliminal_pipe_defaults, superliminal_pipe_defaults, True)\n    pipeline = merge_dicts(pipeline, merged_pipeline_defaults, True)\n\n    return apply_task_defaults(\n        subliminal,\n        superliminal,\n        pipeline,\n        superliminal_before_tasks=superliminal_before_tasks,\n        superliminal_after_tasks=superliminal_after_tasks,\n    )\n\n\ndef apply_task_defaults(subliminal, superliminal, pipeline, superliminal_before_tasks, superliminal_after_tasks):\n    \"\"\"Apply defaults task values on given pipeline\n    :param subliminal: subliminal config\n    :param superliminal: superliminal config\n    :param  pipeline: where 'tasks' list can be found it\n    :param  superliminal_before_tasks: superliminal before subtasks list\n    :param  superliminal_after_tasks: superliminal after subtasks list\n\n    :returns: pipeline after enrich with superliminal.tasks and superliminal.task_defaults\n     and subliminal.task_defaults\n    \"\"\"\n    pipeline[__TASKS] = __apply_task_defaults(\n        subliminal, superliminal, pipeline.get(__TASKS, []), superliminal_before_tasks, superliminal_after_tasks\n    )\n\n    return pipeline\n\n\ndef __apply_task_defaults(\n    subliminal, superliminal, subliminal_tasks, superliminal_before_tasks, superliminal_after_tasks\n):\n    keyword = \"task_defaults\"\n    subliminal_tasks_defaults = subliminal.get(keyword, {})\n    superliminal_tasks_defaults = superliminal.get(keyword, {})\n    merged_task_defaults = merge_dicts(subliminal_tasks_defaults, superliminal_tasks_defaults, recursive=True)\n\n    return __enrich_tasks(subliminal_tasks, superliminal_before_tasks, superliminal_after_tasks, merged_task_defaults)\n\n\ndef __enrich_tasks(sub_tasks, super_before_tasks, super_after_tasks, task_defaults):\n    merged_tasks = super_before_tasks + sub_tasks + super_after_tasks\n\n    return [merge_dicts(task, task_defaults.get(task.get('type', ''), {}), recursive=True) for task in merged_tasks]\n"
  },
  {
    "path": "liminal/core/environment.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport subprocess\n\nDEFAULT_DAGS_ZIP_NAME = 'liminal.zip'\nDEFAULT_LIMINAL_HOME = os.path.expanduser('~/liminal_home')\nDEFAULT_PIPELINES_SUBDIR = \"pipelines\"\nLIMINAL_HOME_PARAM_NAME = \"LIMINAL_HOME\"\nLIMINAL_HOME_CLOUD_PARAM_NAME = \"AIRFLOW__CORE__LIMINAL_HOME\"\nLIMINAL_VERSION_PARAM_NAME = 'LIMINAL_VERSION'\n\n\ndef get_liminal_home():\n    if LIMINAL_HOME_CLOUD_PARAM_NAME in os.environ:\n        return os.environ.get(LIMINAL_HOME_CLOUD_PARAM_NAME)\n    if not os.environ.get(LIMINAL_HOME_PARAM_NAME):\n        logging.info(\"no environment parameter called LIMINAL_HOME detected\")\n        logging.info(f\"registering {DEFAULT_LIMINAL_HOME} as the LIMINAL_HOME directory\")\n        os.environ[LIMINAL_HOME_PARAM_NAME] = DEFAULT_LIMINAL_HOME\n    return os.environ.get(LIMINAL_HOME_PARAM_NAME, DEFAULT_LIMINAL_HOME)\n\n\ndef get_dags_dir():\n    # if we are inside airflow, we will take it from the configured dags folder\n    base_dir = os.environ.get(\"AIRFLOW__CORE__DAGS_FOLDER\", get_liminal_home())\n    return os.path.join(base_dir, DEFAULT_PIPELINES_SUBDIR)\n\n\ndef get_liminal_version():\n    result = os.environ.get(LIMINAL_VERSION_PARAM_NAME, None)\n    if not result:\n        output = subprocess.run(\n            ['pip freeze | grep \\'apache-liminal\\''], capture_output=True, env=os.environ, shell=True\n        )\n        pip_res = output.stdout.decode('UTF-8').strip()\n        liminal_home = get_liminal_home()\n        whl_files = [file for file in os.listdir(liminal_home) if file.endswith(\".whl\")]\n        if whl_files:\n            value = 'file://' + os.path.join(liminal_home, whl_files[0])\n        elif ' @ ' in pip_res:\n            value = pip_res[pip_res.index(' @ ') + 3 :]\n        else:\n            value = pip_res\n        logging.info(f'LIMINAL_VERSION not set. Setting it to currently installed version: {value}')\n        os.environ[LIMINAL_VERSION_PARAM_NAME] = value\n    return os.environ.get(LIMINAL_VERSION_PARAM_NAME, 'apache-liminal')\n\n\ndef get_airflow_home_dir():\n    # if we are inside airflow, we will take it from the configured dags folder\n    base_dir = os.environ.get(\"AIRFLOW__CORE__DAGS_FOLDER\", get_liminal_home())\n    return os.path.join(base_dir)\n"
  },
  {
    "path": "liminal/core/util/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/core/util/class_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport importlib.util\nimport pkgutil\n\n\ndef find_subclasses_in_packages(packages, parent_class):\n    \"\"\"\n    Finds all subclasses of given parent class within given packages\n    :return: map of module ref -> class\n    \"\"\"\n    module_content = {}\n    for p in packages:\n        module_content.update(import_module(p))\n\n    subclasses = set()\n    work = [parent_class]\n    while work:\n        parent = work.pop()\n        for child in parent.__subclasses__():\n            if child not in subclasses:\n                work.append(child)\n                # verify that the found class is in the relevant module\n                for p in packages:\n                    if p in child.__module__:\n                        subclasses.add(child)\n                        break\n\n    return {sc.__module__.split(\".\")[-1]: sc for sc in subclasses}\n\n\ndef import_module(package, recursive=True):\n    \"\"\"\n    Import all submodules of a module\n\n    :param package: package (name or actual module)\n    :type package: str | module\n    :rtype: dict[str, types.ModuleType]\n\n    :param recursive: search recursively (default: True)\n    :type recursive: bool\n    \"\"\"\n    if isinstance(package, str):\n        package = importlib.import_module(package)\n    results = {}\n    for loader, name, is_pkg in pkgutil.walk_packages(package.__path__):\n        if not name == 'liminal_python_server':\n            full_name = package.__name__ + '.' + name\n            results[full_name] = importlib.import_module(full_name)\n            if recursive and is_pkg:\n                results.update(import_module(full_name))\n    return results\n\n\ndef __get_class(the_module, the_class):\n    m = __import__(the_module)\n    for comp in the_module.split('.')[1:] + [the_class]:\n        m = getattr(m, comp)\n    return m\n"
  },
  {
    "path": "liminal/core/util/dict_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport re\nfrom collections import OrderedDict\n\nfrom sqlalchemy.util import OrderedSet\n\nfrom liminal.runners.airflow.config import standalone_variable_backend\n\n\ndef merge_dicts(dict1, dict2, recursive=False):\n    \"\"\"\n    :returns dict1 enriched by dict2\n    \"\"\"\n    if not recursive:\n        return {**dict1, **dict2}\n\n    return __merge_dicts(dict1, dict2)\n\n\ndef __merge_dicts(dict1, dict2):\n    # recursive merge\n    merged_dicts = OrderedDict()\n    dict_1_keys = dict1.keys()\n    dict_2_keys = dict2.keys()\n    for k in OrderedSet(dict_1_keys).union(dict_2_keys):\n        if k in dict1 and k in dict2:\n            if isinstance(dict1[k], dict) and isinstance(dict2[k], dict):\n                merged_dicts[k] = dict(__merge_dicts(dict1[k], dict2[k]))\n            else:\n                merged_dicts[k] = dict1[k]\n        elif k in dict_1_keys:\n            merged_dicts[k] = dict1[k]\n        else:\n            merged_dicts[k] = dict2[k]\n    return merged_dicts\n\n\n__PLACE_HOLDER_PATTERN = r\"{{\\s*([a-zA-Z0-9._-]+)\\s*}}\"\n\n\ndef replace_placeholders(dct, variables):\n    \"\"\"\n    Replace all {{variable.key}} in dct with variable.value variable in variables\n    \"\"\"\n    return dict(__replace_placeholders(dct, variables))\n\n\ndef replace_placholders_in_string(string_value, variables, pattern=__PLACE_HOLDER_PATTERN):\n    return re.sub(pattern, lambda m: __repl(m, variables), string_value, flags=re.IGNORECASE)\n\n\ndef __replace_placeholders(dct, variables):\n    dct_items = dct.items()\n    for k, v in dct_items:\n        if isinstance(v, str):\n            yield k, replace_placholders_in_string(v, variables)\n        elif isinstance(v, dict):\n            yield k, dict(__replace_placeholders(v, variables))\n        elif isinstance(v, list):\n            yield k, list(__replace_placeholder_in_list(v, variables))\n        else:\n            yield k, v\n\n\ndef __replace_placeholder_in_list(lst, variables):\n    for v in lst:\n        if isinstance(v, str):\n            yield replace_placholders_in_string(v, variables)\n        elif isinstance(v, dict):\n            yield dict(__replace_placeholders(v, variables))\n        elif isinstance(v, list):\n            yield __replace_placeholder_in_list(v, variables)\n        else:\n            yield v\n\n\ndef __repl(matched, variables):\n    origin = matched.group(0)\n    key = matched.group(1)\n    return variables.get(key, __try_backend_variables(key, default=origin))\n\n\ndef __try_backend_variables(key, default):\n    return standalone_variable_backend.get_variable(key, default)\n"
  },
  {
    "path": "liminal/core/util/env_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\n\n\ndef is_running_on_jenkins():\n    jenkins_url = os.getenv('JENKINS_URL')\n    return jenkins_url is not None and jenkins_url\n"
  },
  {
    "path": "liminal/core/util/extensible.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport sys\n\nfrom liminal.build.image_builder import ImageBuilder\nfrom liminal.core.util import class_util\nfrom liminal.runners.airflow.model import executor\nfrom liminal.runners.airflow.model.task import Task\n\n__PLUGINS = 'plugins'\n\n\ndef __generate_extra_paths(plugin_type, extra_paths):\n    return (extra_paths or []) + ([f'{__PLUGINS}.{plugin_type}'] if f'{__PLUGINS}.{plugin_type}' in sys.path else [])\n\n\ndef load_executors(extra_paths=None):\n    \"\"\"\n    Load all Executor extensions\n    \"\"\"\n    package_paths = ['liminal.runners.airflow.executors'] + __generate_extra_paths('executors', extra_paths)\n    return class_util.find_subclasses_in_packages(package_paths, executor.Executor)\n\n\ndef load_tasks(extra_paths=None):\n    \"\"\"\n    Load all Task extensions\n    \"\"\"\n    package_paths = ['liminal.runners.airflow.tasks'] + __generate_extra_paths('tasks', extra_paths)\n    return class_util.find_subclasses_in_packages(package_paths, Task)\n\n\ndef load_image_builders(extra_paths=None):\n    \"\"\"\n    Load all ImageBuilder extensions\n    \"\"\"\n    package_paths = ['liminal.build.image'] + __generate_extra_paths('images', extra_paths)\n    return class_util.find_subclasses_in_packages(package_paths, ImageBuilder)\n"
  },
  {
    "path": "liminal/core/util/files_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\n\nimport yaml\n\n# { path -> { name -> config_file } }\ncached_files = dict()\ncached_source_files = dict()\n\n\ndef resolve_pipeline_source_file(config_name):\n    \"\"\"\n    Return the source (file) path of the given config name\n    \"\"\"\n    return cached_source_files[config_name]\n\n\ndef load(path):\n    \"\"\"\n    :param path: config path\n    :returns dict of {name -> loaded config file} from all liminal.y[a]ml files under given path\n    \"\"\"\n\n    if cached_files.get(path):\n        return cached_files[path]\n\n    config_entities = {}\n\n    for file_data in find_config_files(path):\n        with open(file_data) as data:\n            config_file = yaml.safe_load(data)\n            config_entities[config_file['name']] = config_file\n            cached_source_files[config_file['name']] = file_data\n\n    cached_files[path] = config_entities\n    return config_entities\n\n\ndef find_config_files(path):\n    \"\"\"\n    :param path: config path\n    :returns list of all liminal.y[a]ml files under config path\n    \"\"\"\n    files = []\n    logging.info(path)\n    for r, d, f in os.walk(path):\n        for file in f:\n            if os.path.basename(file) in ['liminal.yml', 'liminal.yaml']:\n                logging.info(os.path.join(r, file))\n                files.append(os.path.join(r, file))\n    return files\n\n\ndef dump_liminal_configs(liminal_configs, path):\n    if not (os.path.exists(path)):\n        os.mkdir(path)\n\n    logging.info(f\"Starting to dump liminal configs into {path}\")\n\n    for liminal_config in liminal_configs:\n        dump_liminal_config(liminal_config, f'{path}/{liminal_config[\"name\"]}.yml')\n\n\ndef dump_liminal_config(liminal_config, file_path):\n    with open(file_path, 'w') as config_file:\n        logging.info(f\"Dumping {liminal_config['name']} into {file_path}\")\n        yaml.dump(liminal_config, config_file, default_flow_style=False)\n"
  },
  {
    "path": "liminal/docker/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/kubernetes/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/kubernetes/secret_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport base64\nimport logging\nimport os\nimport sys\nfrom pathlib import Path\nfrom time import sleep\n\nfrom kubernetes import client, config\nfrom kubernetes.client import V1Secret\n\n# noinspection PyBroadException\ntry:\n    config.load_kube_config()\nexcept Exception:\n    msg = \"Kubernetes is not running\\n\"\n    sys.stdout.write(f\"INFO: {msg}\")\n\n_LOG = logging.getLogger('volume_util')\n_LOCAL_VOLUMES = set()\n_kubernetes = client.CoreV1Api()\n\n\ndef get_secret_configs(liminal_config):\n    secrets_config = liminal_config.get('secrets', [])\n\n    for volume_config in secrets_config:\n        if 'secret' in volume_config and 'local_path_file' not in volume_config:\n            secret_path = f\"{os.getcwd()}/credentials-{volume_config['secret']}.txt\"\n            open(secret_path, 'a').close()\n            volume_config['local_path_file'] = secret_path\n    return secrets_config\n\n\ndef create_local_secrets(liminal_config):\n    secrets_config = get_secret_configs(liminal_config)\n\n    for secret_config in secrets_config:\n        logging.info(f'Creating local kubernetes secret if needed: {secret_config}')\n        create_secret(secret_config)\n\n\ndef create_secret(conf, namespace='default') -> None:\n    name = conf['secret']\n\n    _LOG.info(f'Requested secret {name}')\n\n    if name not in _LOCAL_VOLUMES:\n        matching_secrets = _kubernetes.list_namespaced_secret(\n            namespace, field_selector=f'metadata.name={name}'\n        ).to_dict()['items']\n\n    if len(matching_secrets) == 0:\n        _create_secret(namespace, conf, name)\n        sleep(5)\n\n        _LOCAL_VOLUMES.add(name)\n\n\ndef _create_secret(namespace, conf, name):\n    _LOG.info(f'Creating kubernetes secret {name} with spec {conf}')\n\n    _kubernetes.create_namespaced_secret(\n        namespace,\n        V1Secret(\n            api_version='v1',\n            kind='Secret',\n            metadata={\n                'name': name,\n                'labels': {\"apache/incubator-liminal\": \"liminal.apache.org\"},\n            },\n            data={\n                'credentials': base64.b64encode(\n                    Path(os.path.expanduser(conf['local_path_file'])).read_text().encode('ascii')\n                ).decode('ascii')\n            },\n        ),\n    )\n\n\ndef delete_local_secrets(liminal_config, base_dir):\n    secrets_config = get_secret_configs(liminal_config, base_dir)\n\n    for secret_config in secrets_config:\n        logging.info(f'Delete local secret if needed: {secret_config}')\n        delete_local_secret(secret_config)\n\n\ndef delete_local_secret(name, namespace='default'):\n    matching_secrets = _kubernetes.list_namespaced_secret(namespace, field_selector=f'metadata.name={name}').to_dict()[\n        'items'\n    ]\n\n    if len(matching_secrets) > 0:\n        _LOG.info(f'Deleting secret {name}')\n        _kubernetes.delete_namespaced_secret(name, namespace)\n\n    if name in _LOCAL_VOLUMES:\n        _LOCAL_VOLUMES.remove(name)\n"
  },
  {
    "path": "liminal/kubernetes/volume_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport sys\nfrom time import sleep\n\nfrom kubernetes import client, config\nfrom kubernetes.client import V1PersistentVolume, V1PersistentVolumeClaim\n\n# noinspection PyBroadException\ntry:\n    config.load_kube_config()\nexcept Exception:\n    msg = \"Kubernetes is not running\\n\"\n    sys.stdout.write(f\"INFO: {msg}\")\n\n_LOG = logging.getLogger('volume_util')\n_LOCAL_VOLUMES = set()\n_kubernetes = client.CoreV1Api()\n\n\ndef get_volume_configs(liminal_config, base_dir):\n    volumes_config = liminal_config.get('volumes', [])\n\n    for volume_config in volumes_config:\n        if 'local' in volume_config:\n            path = volume_config['local']['path']\n            if path.startswith(\"..\"):\n                path = os.path.join(base_dir, path)\n            if path.startswith(\".\"):\n                path = os.path.join(base_dir, path[1:])\n            volume_config['local']['path'] = path\n    return volumes_config\n\n\ndef create_local_volumes(liminal_config, base_dir):\n    volumes_config = get_volume_configs(liminal_config, base_dir)\n\n    for volume_config in volumes_config:\n        logging.info(f'Creating local kubernetes volume if needed: {volume_config}')\n        create_local_volume(volume_config)\n\n\ndef create_local_volume(conf, namespace='default') -> None:\n    name = conf['volume']\n\n    _LOG.info(f'Requested volume {name}')\n\n    if name not in _LOCAL_VOLUMES:\n        matching_volumes = _kubernetes.list_persistent_volume(field_selector=f'metadata.name={name}').to_dict()[\n            'items'\n        ]\n\n        while len(matching_volumes) == 0:\n            _create_local_volume(conf, name)\n            sleep(5)\n            matching_volumes = _kubernetes.list_persistent_volume(field_selector=f'metadata.name={name}').to_dict()[\n                'items'\n            ]\n\n        pvc_name = conf.get('claim_name', f'{name}-pvc')\n\n        matching_claims = _kubernetes.list_persistent_volume_claim_for_all_namespaces(\n            field_selector=f'metadata.name={pvc_name}'\n        ).to_dict()['items']\n\n        while len(matching_claims) == 0:\n            _create_persistent_volume_claim(pvc_name, name, namespace)\n            sleep(5)\n            matching_claims = _kubernetes.list_persistent_volume_claim_for_all_namespaces(\n                field_selector=f'metadata.name={pvc_name}'\n            ).to_dict()['items']\n\n        _LOCAL_VOLUMES.add(name)\n\n\ndef delete_local_volumes(liminal_config, base_dir):\n    volumes_config = get_volume_configs(liminal_config, base_dir)\n\n    for volume_config in volumes_config:\n        logging.info(f'Delete local kubernetes volume if needed: {volume_config}')\n        delete_local_volume(volume_config['volume'])\n\n\ndef delete_local_volume(name, namespace='default'):\n    pvc_name = f'{name}-pvc'\n\n    matching_claims = _list_persistent_volume_claims(pvc_name)\n\n    if len(matching_claims) > 0:\n        _LOG.info(f'Deleting persistent volume claim {pvc_name}')\n        _kubernetes.delete_namespaced_persistent_volume_claim(pvc_name, namespace)\n\n    while len(matching_claims) > 0:\n        matching_claims = _list_persistent_volume_claims(pvc_name)\n\n    matching_volumes = _list_persistent_volumes(name)\n\n    if len(matching_volumes) > 0:\n        _LOG.info(f'Deleting persistent volume {name}')\n        _kubernetes.delete_persistent_volume(name)\n\n    while len(matching_volumes) > 0:\n        matching_volumes = _list_persistent_volumes(name)\n\n    if name in _LOCAL_VOLUMES:\n        _LOCAL_VOLUMES.remove(name)\n\n\ndef _list_persistent_volume_claims(name):\n    return _kubernetes.list_persistent_volume_claim_for_all_namespaces(\n        field_selector=f'metadata.name={name}'\n    ).to_dict()['items']\n\n\ndef _list_persistent_volumes(name):\n    return _kubernetes.list_persistent_volume(field_selector=f'metadata.name={name}').to_dict()['items']\n\n\ndef _create_persistent_volume_claim(pvc_name, volume_name, namespace):\n    _LOG.info(f'Creating persistent volume claim {pvc_name} with volume {volume_name}')\n    spec = {\n        'volumeName': volume_name,\n        'volumeMode': 'Filesystem',\n        'storageClassName': 'local-storage',\n        'accessModes': ['ReadWriteOnce'],\n        'resources': {'requests': {'storage': '100Gi'}},\n    }\n\n    _kubernetes.create_namespaced_persistent_volume_claim(\n        namespace,\n        V1PersistentVolumeClaim(\n            api_version='v1', kind='PersistentVolumeClaim', metadata={'name': pvc_name}, spec=spec\n        ),\n    )\n\n\ndef _create_local_volume(conf, name):\n    _LOG.info(f'Creating persistent volume {name} with spec {conf}')\n    spec = {\n        'capacity': {'storage': '100Gi'},\n        'volumeMode': 'Filesystem',\n        'accessModes': ['ReadWriteOnce'],\n        'persistentVolumeReclaimPolicy': 'Retain',\n        'storageClassName': 'local-storage',\n        'nodeAffinity': {\n            'required': {\n                'nodeSelectorTerms': [\n                    {'matchExpressions': [{'key': 'kubernetes.io/hostname', 'operator': 'NotIn', 'values': ['']}]}\n                ]\n            }\n        },\n    }\n\n    spec.update(conf)\n\n    _kubernetes.create_persistent_volume(\n        V1PersistentVolume(api_version='v1', kind='PersistentVolume', metadata={'name': name}, spec=spec)\n    )\n"
  },
  {
    "path": "liminal/logging/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/logging/logging_setup.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nfrom logging.handlers import RotatingFileHandler\n\nfrom liminal.core import environment\n\nLOGS_DIR = 'logs'\nLOG_FILENAME = 'liminal.log'\nMAX_FILE_SIZE = 10485760  # 10 MB\n\n\ndef logging_initialization():\n    root_logger = logging.getLogger()\n\n    log_formatter = logging.Formatter(\n        '[%(asctime)s] [%(filename)s:%(lineno)d] %(levelname)s - %(message)s', '%m-%d %H:%M:%S'\n    )\n\n    logs_dir = os.path.join(environment.get_liminal_home(), LOGS_DIR)\n    os.makedirs(logs_dir, exist_ok=True)\n\n    file_handler = RotatingFileHandler(os.path.join(logs_dir, LOG_FILENAME), maxBytes=MAX_FILE_SIZE, backupCount=3)\n\n    root_logger.addHandler(file_handler)\n    root_logger.setLevel(logging.INFO)\n\n    [h.setFormatter(log_formatter) for h in root_logger.handlers]\n\n    logging.info('Logging initialization completed')\n"
  },
  {
    "path": "liminal/monitoring/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/airflow/__init__.py",
    "content": "##\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom datetime import datetime\nfrom unittest.mock import MagicMock\n\nimport pytz\n\n\nclass DummyDagRun:\n    def __init__(self) -> None:\n        self.start_date = pytz.utc.localize(datetime.utcnow())\n        self.conf = None\n        self.run_id = \"run_id\"\n\n    @staticmethod\n    def get_task_instances():\n        return []\n\n    def get_task_instance(self, _):\n        return self\n\n\nfrom datetime import datetime\n\nTASK_ID_SEPARATOR = '.'\n\n\nclass DummyDag:\n    def __init__(self, dag_id, task_id):\n        self.dag_id = dag_id\n        self.task_id = task_id\n        self.try_number = 0\n        self.is_subdag = False\n        self.execution_date = '2017-05-21T00:00:00'\n        self.dag_run_id = 'dag_run_id'\n        self.owner = ['owner1', 'owner2']\n        self.email = ['email1@test.com']\n        self.task = MagicMock(name='task', owner=self.owner, email=self.email)\n        self.context = {\n            'dag': self,\n            'task': self,\n            'ti': self,\n            'ts': datetime.now().timestamp(),\n            'dag_run': DummyDagRun(),\n        }\n\n    def get_dagrun(self):\n        return self.context['dag_run']\n\n    @staticmethod\n    def xcom_push(key, value):\n        return key, value\n"
  },
  {
    "path": "liminal/runners/airflow/config/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/airflow/config/standalone_variable_backend.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nfrom os import environ\nfrom sqlite3 import OperationalError\n\nfrom airflow.models import Variable\n\nLIMINAL_STAND_ALONE_MODE_KEY = \"LIMINAL_STAND_ALONE_MODE\"\n\n\n# noinspection PyBroadException\ndef get_variable(key, default_val):\n    if liminal_local_mode():\n        return os.environ.get(key, default_val)\n    else:\n        try:\n            return Variable.get(key, default_var=default_val)\n        except OperationalError as e:\n            logging.warning(\n                f'Failed to find variable {key} in Airflow variables table.'\n                f' Error: {e.__class__.__module__}.{e.__class__.__name__}'\n            )\n        except Exception as e:\n            logging.warning(f'Failed to find variable {key} in Airflow variables table. Error: {e}')\n            return default_val\n\n\ndef liminal_local_mode():\n    stand_alone = environ.get(LIMINAL_STAND_ALONE_MODE_KEY, \"False\")\n    return stand_alone.strip().lower() == \"true\"\n"
  },
  {
    "path": "liminal/runners/airflow/dag/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport os\n\nfrom liminal.core import environment as env\nfrom liminal.runners.airflow.dag import liminal_register_dags\n\nBASE_PATH = os.path.join(env.get_airflow_home_dir(), env.DEFAULT_PIPELINES_SUBDIR)\n\n\ndef register_dags():\n    return liminal_register_dags.register_dags(BASE_PATH)\n"
  },
  {
    "path": "liminal/runners/airflow/dag/liminal_dags.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport traceback\n\nfrom airflow import DAG\n\nimport liminal.runners.airflow.dag as liminal\n\npipelines = liminal.register_dags()\n\nfor pipeline, dag in pipelines:\n    try:\n        globals()[pipeline] = dag\n    except Exception:\n        traceback.print_exc()\n"
  },
  {
    "path": "liminal/runners/airflow/dag/liminal_register_dags.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport logging\nimport os\nimport traceback\nfrom datetime import datetime, timedelta\n\nfrom airflow import DAG\n\nfrom liminal.core.config.config import ConfigUtil\nfrom liminal.core.util import extensible\nfrom liminal.runners.airflow.executors import airflow\n\n__DEPENDS_ON_PAST = 'depends_on_past'\n\n\n# noinspection PyBroadException\ndef register_dags(configs_path):\n    \"\"\"\n    Registers pipelines in liminal yml files found in given path (recursively) as airflow DAGs.\n    \"\"\"\n    logging.info(f'Registering DAGs from path: {configs_path}')\n    config_util = ConfigUtil(configs_path)\n    configs = config_util.safe_load(is_render_variables=False)\n\n    if os.getenv('POD_NAMESPACE') != \"jenkins\":\n        config_util.snapshot_final_liminal_configs()\n\n    dags = []\n    logging.info(f'found {len(configs)} liminal configs in path: {configs_path}')\n    for config in configs:\n        name = config['name'] if 'name' in config else None\n        try:\n            if not name:\n                raise ValueError('liminal.yml missing field `name`')\n\n            logging.info(f\"Registering DAGs for {name}\")\n\n            owner = config.get('owner')\n\n            trigger_rule = 'all_success'\n            if 'always_run' in config and config['always_run']:\n                trigger_rule = 'all_done'\n\n            executors = __initialize_executors(config)\n\n            default_executor = airflow.AirflowExecutor(\"default_executor\", liminal_config=config, executor_config={})\n\n            for pipeline in config['pipelines']:\n                default_args = __default_args(pipeline)\n                dag = __initialize_dag(default_args, pipeline, owner)\n\n                parent = None\n\n                for task in pipeline['tasks']:\n                    task_type = task['type']\n                    task_instance = get_task_class(task_type)(\n                        task_id=task['task'],\n                        dag=dag,\n                        parent=parent,\n                        trigger_rule=trigger_rule,\n                        liminal_config=config,\n                        pipeline_config=pipeline,\n                        task_config=task,\n                        variables=config.get('variables', {}),\n                    )\n\n                    executor_id = task.get('executor')\n                    if executor_id:\n                        executor = executors[executor_id]\n                    else:\n                        logging.info(\n                            f\"Did not find `executor` in ${task['task']} config.\"\n                            f\" Using the default executor (${type(default_executor)})\"\n                            f\" instead.\"\n                        )\n                        executor = default_executor\n\n                    parent = executor.apply_task_to_dag(task=task_instance)\n\n                logging.info(f'registered DAG {dag.dag_id}: {dag.tasks}')\n\n                dags.append((pipeline['pipeline'], dag))\n\n        except Exception:\n            logging.error(f'Failed to register DAGs for {name}')\n            traceback.print_exc()\n\n    return dags\n\n\ndef __initialize_executors(liminal_config):\n    executors = {}\n    for executor_config in liminal_config.get('executors', {}):\n        executors[executor_config['executor']] = get_executor_class(executor_config['type'])(\n            executor_config['executor'], liminal_config, executor_config\n        )\n    return executors\n\n\ndef __initialize_dag(default_args, pipeline, owner):\n    pipeline_name = pipeline['pipeline']\n\n    schedule_interval = default_args.get('schedule_interval', None)\n    if not schedule_interval:\n        schedule_interval = default_args.get('schedule', None)\n\n    if owner and 'owner' not in default_args:\n        default_args['owner'] = owner\n\n    start_date = pipeline.get('start_date', datetime.min.time())\n    if not isinstance(start_date, datetime):\n        start_date = datetime.combine(start_date, datetime.min.time())\n\n    default_args.pop('tasks', None)\n    default_args.pop('schedule', None)\n    default_args.pop('monitoring', None)\n    default_args.pop('schedule_interval', None)\n\n    dag = DAG(\n        dag_id=pipeline_name,\n        default_args=default_args,\n        dagrun_timeout=timedelta(minutes=pipeline['timeout_minutes']),\n        start_date=start_date,\n        schedule_interval=schedule_interval,\n        catchup=False,\n    )\n\n    return dag\n\n\ndef __default_args(pipeline):\n    default_args = {k: v for k, v in pipeline.items()}\n    override_args = {\n        'start_date': datetime.combine(pipeline['start_date'], datetime.min.time()),\n        __DEPENDS_ON_PAST: default_args[__DEPENDS_ON_PAST] if __DEPENDS_ON_PAST in default_args else False,\n    }\n    default_args.update(override_args)\n    return default_args\n\n\nlogging.info(f'Loading task implementations..')\n\ntasks_by_liminal_name = extensible.load_tasks()\n\nlogging.info(f'Finished loading task implementations: {tasks_by_liminal_name.keys()}')\n\nexecutors_by_liminal_name = extensible.load_executors()\n\nlogging.info(f'Finished loading executor implementations: {executors_by_liminal_name.keys()}')\n\n\ndef get_task_class(task_type):\n    return tasks_by_liminal_name[task_type]\n\n\ndef get_executor_class(executor_type):\n    return executors_by_liminal_name[executor_type]\n"
  },
  {
    "path": "liminal/runners/airflow/executors/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/airflow/executors/airflow.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom liminal.runners.airflow.model import executor\nfrom liminal.runners.airflow.tasks import airflow\n\n\nclass AirflowExecutor(executor.Executor):\n    \"\"\"\n    Execute the task steps\n    \"\"\"\n\n    supported_task_types = [airflow.AirflowTask]\n\n    def _apply_executor_task_to_dag(self, **kwargs):\n        return kwargs['task'].apply_task_to_dag()\n"
  },
  {
    "path": "liminal/runners/airflow/executors/emr.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom airflow.providers.amazon.aws.operators.emr_add_steps import EmrAddStepsOperator\nfrom airflow.providers.amazon.aws.sensors.emr_step import EmrStepSensor\n\nfrom liminal.runners.airflow.model import executor\nfrom liminal.runners.airflow.tasks import hadoop\n\n\nclass EMRExecutor(executor.Executor):\n    \"\"\"\n    Executes a EMR steps\n    \"\"\"\n\n    supported_task_types = [hadoop.HadoopTask]\n\n    def __init__(self, executor_id, liminal_config, executor_config):\n        super().__init__(executor_id, liminal_config, executor_config)\n        self.aws_conn_id = self.executor_config.get('aws_conn_id', 'aws_default')\n        self.cluster_states = self.executor_config.get('cluster_state', ['RUNNING', 'WAITING'])\n        self.job_flow_id = self.executor_config.get('cluster_id', None)\n        self.job_flow_name = self.executor_config.get('cluster_name', None)\n\n    def _apply_executor_task_to_dag(self, **kwargs):\n        task = kwargs['task']\n        parent = task.parent\n\n        self._validate_task_type(task)\n\n        # assuming emr already exists\n        add_step = executor.add_variables_to_operator(\n            EmrAddStepsOperator(\n                task_id=f'{task.task_id}_add_step',\n                job_flow_id=self.job_flow_id,\n                job_flow_name=self.job_flow_name,\n                aws_conn_id=self.aws_conn_id,\n                steps=self.__generate_emr_step(task.task_id, [str(x) for x in task.get_runnable_command()]),\n                cluster_states=self.cluster_states,\n            ),\n            task,\n        )\n\n        if task.parent:\n            parent.set_downstream(add_step)\n\n        emr_sensor_step = executor.add_variables_to_operator(\n            EmrStepSensor(\n                task_id=f'{task.task_id}_watch_step',\n                job_flow_id=\"{{ task_instance.xcom_pull('\" + add_step.task_id + \"', key='job_flow_id') }}\",\n                step_id=\"{{ task_instance.xcom_pull('\" + add_step.task_id + \"', key='return_value')[0] }}\",\n                aws_conn_id=self.aws_conn_id,\n            ),\n            task,\n        )\n\n        add_step.set_downstream(emr_sensor_step)\n\n        return emr_sensor_step\n\n    def __generate_emr_step(self, task_id, args):\n        return [\n            {\n                'Name': task_id,\n                **self.executor_config.get('properties', {}),\n                'HadoopJarStep': {'Jar': 'command-runner.jar', 'Args': args},\n            }\n        ]\n"
  },
  {
    "path": "liminal/runners/airflow/executors/kubernetes.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport copy\nimport datetime\nimport logging\nimport os\n\nfrom airflow.providers.cncf.kubernetes.operators.kubernetes_pod import (\n    KubernetesPodOperator,\n)\nfrom kubernetes.client import V1Volume, V1VolumeMount\nfrom kubernetes.client import models as k8s\n\nfrom liminal.core.util import env_util\nfrom liminal.runners.airflow.config.standalone_variable_backend import get_variable\nfrom liminal.runners.airflow.model import executor\nfrom liminal.runners.airflow.tasks import containerable\nfrom liminal.runners.airflow.tasks.containerable import ContainerTask\n\n_LOG = logging.getLogger(__name__)\n\n\nclass KubernetesPodExecutor(executor.Executor):\n    \"\"\"\n    Kubernetes pod executor\n    \"\"\"\n\n    supported_task_types = [containerable.ContainerTask]\n\n    def __init__(self, task_id, liminal_config, executor_config):\n        super().__init__(task_id, liminal_config, executor_config)\n\n        self.task_name = self.executor_config['executor']\n        self.volumes = self._volumes()\n\n    def _apply_executor_task_to_dag(self, **kwargs):\n        task = kwargs['task']\n        parent = task.parent\n\n        self._validate_task_type(task)\n\n        pod_task = executor.add_variables_to_operator(\n            KubernetesPodOperator(trigger_rule=task.trigger_rule, **self.__kubernetes_kwargs(task)), task\n        )\n\n        if parent:\n            parent.set_downstream(pod_task)\n\n        return pod_task\n\n    def _volumes(self):\n        volumes_config = self.liminal_config.get('volumes', [])\n        secrets_config = self.liminal_config.get('secrets', [])\n        volumes = []\n        for volume_config in volumes_config:\n            name = volume_config['volume']\n            claim_name = volume_config.get('claim_name')\n            if not claim_name and 'local' in volume_config:\n                claim_name = f'{name}-pvc'\n            volume = V1Volume(name=name, persistent_volume_claim={'claimName': claim_name})\n            volumes.append(volume)\n\n        for secret_config in secrets_config:\n            name = secret_config['secret']\n            secret = V1Volume(name=name, secret={'secretName': name})\n            volumes.append(secret)\n\n        return volumes\n\n    def __kubernetes_kwargs(self, task: ContainerTask):\n        config = copy.deepcopy(self.executor_config)\n\n        kubernetes_kwargs = {\n            'task_id': task.task_id,\n            'image': task.image,\n            'arguments': task.arguments,\n            'namespace': os.environ.get('AIRFLOW__KUBERNETES__NAMESPACE', 'default'),\n            'name': task.task_id.replace('_', '-'),\n            'in_cluster': os.environ.get('AIRFLOW__KUBERNETES__IN_CLUSTER', False),\n            'image_pull_policy': get_variable('image_pull_policy', default_val='IfNotPresent'),\n            'get_logs': config.pop('get_logs', True),\n            'is_delete_operator_pod': config.pop('is_delete_operator_pod', True),\n            'startup_timeout_seconds': config.pop('startup_timeout_seconds', 1200),\n            'env_vars': [k8s.V1EnvVar(name=x, value=v) for x, v in task.env_vars.items()],\n            'do_xcom_push': task.task_config.get('do_xcom_push', False),\n            'image_pull_secrets': config.pop('image_pull_secrets', 'regcred'),\n            'volumes': self.volumes,\n            'config_file': os.environ.get('AIRFLOW__KUBERNETES__CONFIG_FILE'),\n            'cluster_context': os.environ.get('AIRFLOW__KUBERNETES__CLUSTER_CONTEXT', None),\n            'cmds': task.cmds,\n            'volume_mounts': [\n                V1VolumeMount(\n                    name=mount['volume'],\n                    mount_path=mount['path'],\n                    sub_path=mount.get('sub_path'),\n                    read_only=mount.get('read_only', False),\n                )\n                for mount in task.mounts\n            ]\n            + [\n                V1VolumeMount(\n                    name=secret['secret'],\n                    mount_path=secret['remote_path'],\n                    sub_path=secret.get('sub_path'),\n                    read_only=secret.get('read_only', False),\n                )\n                for secret in task.secrets\n            ],\n        }\n\n        config.pop('in_cluster', None)\n        config.pop('volumes', None)\n        config.pop('volume_mounts', None)\n        config.pop('executor', None)\n        config.pop('type', None)\n\n        kubernetes_kwargs.update(config)\n\n        if env_util.is_running_on_jenkins():\n            kubernetes_kwargs['affinity'] = self.__jenkins_kubernetes_affinity()\n            kubernetes_kwargs['namespace'] = 'jenkins'\n\n        if not task.dag:\n            kubernetes_kwargs.update(\n                {\n                    'start_date': datetime.datetime(1970, 1, 1),\n                }\n            )\n\n        return kubernetes_kwargs\n\n    @staticmethod\n    def __jenkins_kubernetes_affinity():\n        return {\n            \"podAffinity\": {\n                \"requiredDuringSchedulingIgnoredDuringExecution\": [\n                    {\n                        \"labelSelector\": {\n                            \"matchExpressions\": [{\"key\": \"liminal\", \"operator\": \"In\", \"values\": [\"unittest\"]}]\n                        },\n                        \"namespaces\": [\"jenkins\"],\n                        \"topologyKey\": \"kubernetes.io/hostname\",\n                    }\n                ]\n            }\n        }\n"
  },
  {
    "path": "liminal/runners/airflow/model/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/airflow/model/executor.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom abc import ABC, abstractmethod\n\nfrom airflow.models import BaseOperator\n\nfrom liminal.runners.airflow.operators.operator_with_variable_resolving import (\n    OperatorWithVariableResolving,\n)\n\n\ndef add_variables_to_operator(operator, task) -> BaseOperator:\n    \"\"\"\n    :param operator: Airflow operator\n    :type operator: BaseOperator\n    :param task: Task instance\n    :returns: OperatorWithVariableResolving wrapping given operator\n    \"\"\"\n    return OperatorWithVariableResolving(\n        dag=task.dag,\n        task_config=task.task_config,\n        variables=task.variables,\n        liminal_task_instance=task,\n        operator=operator,\n    )\n\n\nclass Executor(ABC):\n    \"\"\"\n    Executor Task.\n    \"\"\"\n\n    # list of task types supported by the executor\n    supported_task_types = []\n\n    def __init__(self, executor_id, liminal_config, executor_config):\n        self.liminal_config = liminal_config\n        self.executor_id = executor_id\n        self.executor_config = executor_config\n\n    def apply_task_to_dag(self, **kwargs):\n        task = kwargs['task']\n\n        self._validate_task_type(task)\n\n        return self._apply_executor_task_to_dag(task=task)\n\n    @abstractmethod\n    def _apply_executor_task_to_dag(self, **kwargs):\n        pass\n\n    def _validate_task_type(self, task):\n        assert any(\n            [isinstance(task, tYp) for tYp in self.supported_task_types]\n        ), f'supported task types: {self.supported_task_types}'\n"
  },
  {
    "path": "liminal/runners/airflow/model/task.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\"\"\"\nBase task.\n\"\"\"\nimport json\nfrom abc import ABC\n\nfrom airflow.models import BaseOperator\n\nfrom liminal.runners.airflow.model import executor\n\n\nclass Task(ABC):\n    \"\"\"\n    Task.\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        self.liminal_config = liminal_config\n        self.dag = dag\n        self.pipeline_config = pipeline_config\n        self.task_id = task_id\n        self.parent = parent\n        self.trigger_rule = trigger_rule\n        self.task_config = task_config\n        self.variables = variables\n\n    def serialize(self) -> str:\n        \"\"\"\n        :returns: JSON string representation of this task\n        \"\"\"\n        data = {\n            'task_id': self.task_id,\n            'dag': None,\n            'parent': self.parent,\n            'trigger_rule': self.trigger_rule,\n            'liminal_config': self.liminal_config,\n            'pipeline_config': self.pipeline_config,\n            'task_config': self.task_config,\n            'variables': self.variables,\n        }\n\n        return json.dumps(data, default=str)\n\n    def _add_variables_to_operator(self, operator) -> BaseOperator:\n        \"\"\"\n        :param operator: Airflow operator\n        :type operator: BaseOperator\n        :returns: OperatorWithVariableResolving wrapping given operator\n        \"\"\"\n        return executor.add_variables_to_operator(operator, self)\n"
  },
  {
    "path": "liminal/runners/airflow/operators/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/airflow/operators/cloudformation.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\"\"\"\nThis module contains CloudFormation create/delete stack operators.\nCan be removed when Airflow 2.0.0 is released.\n\"\"\"\nfrom typing import List\n\nfrom airflow.models import BaseOperator\nfrom airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook\nfrom airflow.sensors.base import BaseSensorOperator\nfrom botocore.exceptions import ClientError\n\n\n# noinspection PyAbstractClass\nclass CloudFormationHook(AwsBaseHook):\n    \"\"\"\n    Interact with AWS CloudFormation.\n    \"\"\"\n\n    def __init__(self, region_name=None, *args, **kwargs):\n        self.region_name = region_name\n        self.conn = None\n        super().__init__(*args, client_type='cloudformation', **kwargs)\n\n    def get_conn(self):\n        self.conn = self.get_client_type('cloudformation', self.region_name)\n        return self.conn\n\n\nclass BaseCloudFormationOperator(BaseOperator):\n    \"\"\"\n    Base operator for CloudFormation operations.\n\n    :param params: parameters to be passed to CloudFormation.\n    :type dict\n    :param aws_conn_id: aws connection to uses\n    :type aws_conn_id: str\n    \"\"\"\n\n    template_fields: List[str] = []\n    template_ext = ()\n    ui_color = '#1d472b'\n    ui_fgcolor = '#FFF'\n\n    def __init__(self, params, aws_conn_id='aws_default', *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.params = params\n        self.aws_conn_id = aws_conn_id\n\n    def execute(self, context):\n        self.log.info('Parameters: %s', self.params)\n\n        self.cloudformation_op(CloudFormationHook(aws_conn_id=self.aws_conn_id).get_conn())\n\n    def cloudformation_op(self, cloudformation):\n        \"\"\"\n        This is the main method to run CloudFormation operation.\n        \"\"\"\n        raise NotImplementedError()\n\n\nclass CloudFormationCreateStackOperator(BaseCloudFormationOperator):\n    \"\"\"\n    An operator that creates a CloudFormation stack.\n\n    :param params: parameters to be passed to CloudFormation. For possible arguments see:\n            https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.create_stack\n    :type dict\n    :param aws_conn_id: aws connection to uses\n    :type aws_conn_id: str\n    \"\"\"\n\n    template_fields: List[str] = []\n    template_ext = ()\n    ui_color = '#6b9659'\n\n    def __init__(self, params, aws_conn_id='aws_default', *args, **kwargs):\n        super().__init__(params=params, aws_conn_id=aws_conn_id, *args, **kwargs)\n\n    def cloudformation_op(self, cloudformation):\n        cloudformation.create_stack(**self.params)\n\n\nclass CloudFormationDeleteStackOperator(BaseCloudFormationOperator):\n    \"\"\"\n    An operator that deletes a CloudFormation stack.\n\n    :param params: parameters to be passed to CloudFormation. For possible arguments see:\n            https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation.html#CloudFormation.Client.delete_stack\n    :type dict\n    :param aws_conn_id: aws connection to uses\n    :type aws_conn_id: str\n    \"\"\"\n\n    template_fields: List[str] = []\n    template_ext = ()\n    ui_color = '#1d472b'\n    ui_fgcolor = '#FFF'\n\n    def __init__(self, params, aws_conn_id='aws_default', *args, **kwargs):\n        super().__init__(params=params, aws_conn_id=aws_conn_id, *args, **kwargs)\n\n    def cloudformation_op(self, cloudformation):\n        cloudformation.delete_stack(**self.params)\n\n\nclass BaseCloudFormationSensor(BaseSensorOperator):\n    \"\"\"\n    Waits for a stack operation to complete on AWS CloudFormation.\n\n    :param stack_name: The name of the stack to wait for (templated)\n    :type stack_name: str\n    :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are\n        stored\n    :type aws_conn_id: str\n    :param poke_interval: Time in seconds that the job should wait between each try\n    :type poke_interval: int\n    \"\"\"\n\n    def __init__(\n        self,\n        stack_name,\n        complete_status,\n        in_progress_status,\n        aws_conn_id='aws_default',\n        poke_interval=30,\n        *args,\n        **kwargs,\n    ):\n        super().__init__(poke_interval=poke_interval, *args, **kwargs)\n        self.aws_conn_id = aws_conn_id\n        self.stack_name = stack_name\n        self.complete_status = complete_status\n        self.in_progress_status = in_progress_status\n        self.hook = None\n\n    def poke(self, context):\n        \"\"\"\n        Checks for existence of the stack in AWS CloudFormation.\n        \"\"\"\n        cloudformation = self.get_hook().get_conn()\n\n        self.log.info('Poking for stack %s', self.stack_name)\n\n        try:\n            stacks = cloudformation.describe_stacks(StackName=self.stack_name)['Stacks']\n            stack_status = stacks[0]['StackStatus']\n            if stack_status == self.complete_status:\n                return True\n            elif stack_status == self.in_progress_status:\n                return False\n            else:\n                raise ValueError(f'Stack {self.stack_name} in bad state: {stack_status}')\n        except ClientError as e:\n            if 'does not exist' in str(e):\n                if not self.allow_non_existing_stack_status():\n                    raise ValueError(f'Stack {self.stack_name} does not exist')\n                else:\n                    return True\n            else:\n                raise e\n\n    def get_hook(self):\n        \"\"\"\n        Gets the AwsGlueCatalogHook\n        \"\"\"\n        if not self.hook:\n            self.hook = CloudFormationHook(aws_conn_id=self.aws_conn_id)\n\n        return self.hook\n\n    def allow_non_existing_stack_status(self):\n        \"\"\"\n        Boolean value whether or not sensor should allow non existing stack responses.\n        \"\"\"\n        return False\n\n\nclass CloudFormationCreateStackSensor(BaseCloudFormationSensor):\n    \"\"\"\n    Waits for a stack to be created successfully on AWS CloudFormation.\n\n    :param stack_name: The name of the stack to wait for (templated)\n    :type stack_name: str\n    :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are\n        stored\n    :type aws_conn_id: str\n    :param poke_interval: Time in seconds that the job should wait between each try\n    :type poke_interval: int\n    \"\"\"\n\n    template_fields = ['stack_name']\n    ui_color = '#C5CAE9'\n\n    def __init__(self, stack_name, aws_conn_id='aws_default', poke_interval=30, *args, **kwargs):\n        super().__init__(\n            stack_name=stack_name,\n            complete_status='CREATE_COMPLETE',\n            in_progress_status='CREATE_IN_PROGRESS',\n            aws_conn_id=aws_conn_id,\n            poke_interval=poke_interval,\n            *args,\n            **kwargs,\n        )\n\n\nclass CloudFormationDeleteStackSensor(BaseCloudFormationSensor):\n    \"\"\"\n    Waits for a stack to be deleted successfully on AWS CloudFormation.\n\n    :param stack_name: The name of the stack to wait for (templated)\n    :type stack_name: str\n    :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are\n        stored\n    :type aws_conn_id: str\n    :param poke_interval: Time in seconds that the job should wait between each try\n    :type poke_interval: int\n    \"\"\"\n\n    template_fields = ['stack_name']\n    ui_color = '#C5CAE9'\n\n    def __init__(self, stack_name, aws_conn_id='aws_default', poke_interval=30, *args, **kwargs):\n        super().__init__(\n            stack_name=stack_name,\n            complete_status='DELETE_COMPLETE',\n            in_progress_status='DELETE_IN_PROGRESS',\n            aws_conn_id=aws_conn_id,\n            poke_interval=poke_interval,\n            *args,\n            **kwargs,\n        )\n\n    def allow_non_existing_stack_status(self):\n        return True\n"
  },
  {
    "path": "liminal/runners/airflow/operators/job_status_operator.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom datetime import datetime\nfrom typing import Any\n\nimport pytz\nfrom airflow.contrib.hooks.aws_hook import AwsHook\nfrom airflow.exceptions import AirflowException\nfrom airflow.lineage import apply_lineage\nfrom airflow.models import BaseOperator\nfrom airflow.utils.db import provide_session\nfrom airflow.utils.decorators import apply_defaults\nfrom airflow.utils.state import State\n\n\nclass JobStatusOperator(BaseOperator):\n    \"\"\"\n    Base operator for job status operators.\n    \"\"\"\n\n    template_ext = ()\n\n    @apply_defaults\n    def __init__(self, backends, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.backends = backends\n        self.cloudwatch = None\n\n    def execute(self, context):\n        for backend in self.backends:\n            if backend in self.report_functions:\n                for metric in self.metrics(context):\n                    self.report_functions[backend](self, metric)\n            else:\n                raise AirflowException(f'No such metrics backend: {backend}')\n\n    def metrics(self, context):\n        raise NotImplementedError\n\n    def send_metric_to_cloudwatch(self, metric):\n        self.get_cloudwatch().put_metric_data(metric)\n\n    report_functions = {'cloudwatch': send_metric_to_cloudwatch}\n\n    def get_cloudwatch(self):\n        if not self.cloudwatch:\n            self.cloudwatch = CloudWatchHook()\n        return self.cloudwatch\n\n\nclass JobStartOperator(JobStatusOperator):\n    ui_color = '#c5e5e8'\n\n    def __init__(self, namespace, application_name, backends, *args, **kwargs):\n        super().__init__(backends=backends, *args, **kwargs)\n        self.namespace = namespace\n        self.application_name = application_name\n\n    def metrics(self, context):\n        return [Metric(self.namespace, 'JobStarted', 1, [Tag('ApplicationName', self.application_name)])]\n\n\nclass JobEndOperator(JobStatusOperator):\n    ui_color = '#6d8fad'\n\n    def __init__(self, namespace, application_name, backends, *args, **kwargs):\n        super().__init__(backends=backends, *args, **kwargs)\n        self.namespace = namespace\n        self.application_name = application_name\n        self.__job_result = 0\n\n    def execute(self, context):\n        self.__calculate_job_result(context)\n        super().execute(context)\n\n    def metrics(self, context):\n        duration = round(\n            (pytz.utc.localize(datetime.utcnow()) - context['ti'].get_dagrun().start_date).total_seconds()\n        )\n\n        self.log.info('Elapsed time: %s' % duration)\n\n        self.log.info(f'dag final job result: {self.__job_result}')\n\n        return [\n            Metric(self.namespace, 'JobResult', self.__job_result, [Tag('ApplicationName', self.application_name)]),\n            Metric(self.namespace, 'JobDuration', duration, [Tag('ApplicationName', self.application_name)]),\n        ]\n\n    def __log_and_get_state(self, task_instance):\n        state = task_instance.state\n\n        self.log.info(f'Task {task_instance.task_id} finished with state = {state}')\n\n        return state\n\n    def __calculate_job_result(self, context):\n        self.log.info('scanning task instances states.. ')\n        task_instances = context['dag_run'].get_task_instances()\n        task_states = [\n            self.__log_and_get_state(task_instance)\n            for task_instance in task_instances\n            if task_instance.task_id != context['task_instance'].task_id\n        ]\n\n        self.__job_result = 0\n        if all((state == State.SUCCESS or state == State.SKIPPED) for state in task_states):\n            self.__job_result = 1\n\n    @apply_lineage\n    @provide_session\n    def post_execute(self, context: Any, result: Any = None, session=None):\n        if self.__job_result == 0:\n            self.log.info(\"Failing this DAG run due to task failure.\")\n\n            dag_run = context['ti'].get_dagrun()\n            dag_run.end_date = datetime.utcnow()\n            dag_run.state = State.FAILED\n\n            session.merge(dag_run)\n\n\n# noinspection PyAbstractClass\nclass CloudWatchHook(AwsHook):\n    \"\"\"\n    Interact with AWS CloudWatch.\n    \"\"\"\n\n    def __init__(self, region_name=None, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        self.region_name = region_name\n        self.conn = self.get_client_type('cloudwatch', self.region_name)\n\n    def get_conn(self):\n        return self.conn\n\n    def put_metric_data(self, metric):\n        value = metric.value\n\n        cloudwatch = self.get_conn()\n\n        dimensions = [{'Name': tag.name, 'Value': tag.value} for tag in metric.tags]\n\n        cloudwatch.put_metric_data(\n            Namespace=metric.namespace,\n            MetricData=[\n                {\n                    'MetricName': metric.name,\n                    'Dimensions': dimensions,\n                    'Timestamp': datetime.utcnow(),\n                    'Value': value,\n                    'Unit': 'None',\n                }\n            ],\n        )\n\n        self.log.info(f'Published metric: {metric.name} with value: {value}')\n\n\nclass Metric:\n    \"\"\"\n    Metric.\n    :param namespace: namespace.\n    :type name: str\n    :param name: name.\n    :type name: str\n    :param value: value.\n    :type value: float\n    :param tags: list of tags.\n    :type tags: List[str]\n    \"\"\"\n\n    def __init__(self, namespace, name, value, tags):\n        self.namespace = namespace\n        self.name = name\n        self.value = value\n        self.tags = tags\n\n\nclass Tag:\n    def __init__(self, name, value):\n        self.name = name\n        self.value = value\n"
  },
  {
    "path": "liminal/runners/airflow/operators/operator_with_variable_resolving.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport inspect\nimport logging\nimport re\nfrom datetime import datetime\nfrom typing import Any, Dict, Optional, Set\n\nimport jinja2\nfrom airflow.models import BaseOperator\nfrom airflow.settings import Session\nfrom jinja2 import Environment\n\nfrom liminal.runners.airflow.config import standalone_variable_backend\n\n_VAR_REGEX = '(.*){{([^}]*)}}(.*)'\n\n_BASE_OPERATOR_ATTRIBUTES = list(inspect.signature(BaseOperator.__init__).parameters.keys())\n\n\nclass OperatorWithVariableResolving(BaseOperator):\n    \"\"\"\n    Operator delegator that handles liminal variable substitution at run time\n    \"\"\"\n\n    def __init__(self, dag, task_config: dict, variables: dict = None, liminal_task_instance=None, **kwargs):\n        self.operator_delegate: BaseOperator = kwargs.pop('operator')\n        self.liminal_task_instance = liminal_task_instance.serialize() if liminal_task_instance else None\n        if variables:\n            self.variables = variables.copy()\n        else:\n            self.variables = {}\n        self.task_config = task_config\n        super().__init__(task_id=self.operator_delegate.task_id, dag=dag)\n        self._LOG = logging.getLogger(self.__class__.__name__)\n\n    def execute(self, context):\n        attributes = self._get_operator_delegate_attributes()\n        self._LOG.info(f'task_config: {self.task_config}')\n        self._LOG.info(f'variables: {self.variables}')\n        self.operator_delegate.template_fields = set(list(self.operator_delegate.template_fields) + attributes)\n        self.operator_delegate.render_template_fields(context, LiminalEnvironment(self.variables, self.task_config))\n        self.operator_delegate.render_template_fields(context)\n\n        if 'ti' in context:\n            context['ti'].xcom_push(key=\"liminal_task_instance\", value=self.liminal_task_instance)\n\n        return self.operator_delegate.execute(context)\n\n    def post_execute(self, context, result=None):\n        self.operator_delegate.post_execute(context, result)\n\n    def _get_operator_delegate_attributes(self):\n        return [\n            attr\n            for attr in dir(self.operator_delegate)\n            if attr not in _BASE_OPERATOR_ATTRIBUTES\n            and attr not in dir(BaseOperator)\n            and not attr.startswith('_')\n            and attr not in ('args', 'kwargs', 'lineage_data', 'subdag', 'template_fields')\n        ]\n\n    def pre_execute(self, context: Any):\n        return self.operator_delegate.pre_execute(context)\n\n    def on_kill(self) -> None:\n        self.operator_delegate.on_kill()\n\n    def render_template_fields(self, context: Dict, jinja_env: Optional[jinja2.Environment] = None) -> None:\n        pass\n\n    def render_template(\n        self,\n        content: Any,\n        context: Dict,\n        jinja_env: Optional[jinja2.Environment] = None,\n        seen_oids: Optional[Set] = None,\n    ) -> Any:\n        value = self.operator_delegate.render_template(\n            content, context, LiminalEnvironment(self.variables, self.task_config)\n        )\n        return self.operator_delegate.render_template(value, context, jinja_env, seen_oids)\n\n    def get_template_env(self) -> jinja2.Environment:\n        return self.operator_delegate.get_template_env()\n\n    def prepare_template(self) -> None:\n        self.operator_delegate.prepare_template()\n\n    def resolve_template_files(self) -> None:\n        self.operator_delegate.resolve_template_files()\n\n    def clear(\n        self,\n        start_date: Optional[datetime] = None,\n        end_date: Optional[datetime] = None,\n        upstream: bool = False,\n        downstream: bool = False,\n        session: Session = None,\n    ):\n        return self.operator_delegate.clear(start_date, end_date, upstream, downstream, session)\n\n    def run(\n        self,\n        start_date: Optional[datetime] = None,\n        end_date: Optional[datetime] = None,\n        ignore_first_depends_on_past: bool = True,\n        ignore_ti_state: bool = False,\n        mark_success: bool = False,\n    ) -> None:\n        self.operator_delegate.run(start_date, end_date, ignore_first_depends_on_past, ignore_ti_state, mark_success)\n\n\nclass LiminalEnvironment(Environment):\n    def __init__(self, variables, task_config=None):\n        super().__init__()\n        self.val = None\n        self.variables = variables.copy()\n        logging.info(f'variables: {variables}')\n        if task_config and 'variables' in task_config:\n            task_variables = task_config['variables']\n            if isinstance(task_variables, dict):\n                self.variables.update(task_variables)\n            elif isinstance(task_variables, str):\n                variables_key = self.from_string(task_variables).render()\n                if variables_key in variables:\n                    self.variables.update(variables[variables_key])\n\n    def from_string(self, val, **kwargs):\n        self.val = val\n        return self\n\n    def render(self, *_, **kwargs):\n        \"\"\"\n        Implements jinja2.environment.Template.render\n        \"\"\"\n        conf = kwargs['dag_run'].conf if 'dag_run' in kwargs else {}\n        return self.__render(self.val, conf, set())\n\n    def __render(self, val: str, dag_run_conf: dict, unresolved_tags: set):\n        token = re.match(_VAR_REGEX, val)\n        if token and token[2].strip() not in unresolved_tags:\n            tag_name = token[2].strip()\n            prefix = self.__render(token[1], dag_run_conf, unresolved_tags)\n            suffix = self.__render(token[3], dag_run_conf, unresolved_tags)\n            if dag_run_conf and tag_name in dag_run_conf:\n                return self.__render(prefix + str(dag_run_conf[tag_name]) + suffix, dag_run_conf, unresolved_tags)\n            elif tag_name in self.variables:\n                return self.__render(prefix + str(self.variables[tag_name]) + suffix, dag_run_conf, unresolved_tags)\n            else:\n                backend_value = standalone_variable_backend.get_variable(tag_name, None)\n                if backend_value:\n                    return self.__render(prefix + backend_value + suffix, dag_run_conf, unresolved_tags)\n                else:\n                    unresolved_tags.add(tag_name)\n                    return self.__render(prefix + '{{' + token[2] + '}}' + suffix, dag_run_conf, unresolved_tags)\n        else:\n            return val\n\n\ndef add_variables_to_operator(operator, task) -> BaseOperator:\n    \"\"\"\n    :param operator: Airflow operator\n    :type operator: BaseOperator\n    :param task: Task instance\n    :type task: Task\n    :returns: OperatorWithVariableResolving wrapping given operator\n    \"\"\"\n    return OperatorWithVariableResolving(\n        dag=task.dag,\n        task_config=task.task_config,\n        variables=task.variables,\n        liminal_task_instance=task,\n        operator=operator,\n    )\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/airflow.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom abc import ABC, abstractmethod\n\nfrom liminal.runners.airflow.model import task\n\n\nclass AirflowTask(task.Task, ABC):\n    \"\"\"\n    Airflow task\n    \"\"\"\n\n    @abstractmethod\n    def apply_task_to_dag(self):\n        pass\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/containerable.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\nimport logging\nimport os\nfrom abc import ABC\nfrom typing import Dict\n\nfrom liminal.core import environment\nfrom liminal.runners.airflow.config import standalone_variable_backend\nfrom liminal.runners.airflow.config.standalone_variable_backend import get_variable\nfrom liminal.runners.airflow.model import task\n\n_LOG = logging.getLogger(__name__)\nENV = 'env'\nDEFAULT = 'default'\n\n\nclass ContainerTask(task.Task, ABC):\n    \"\"\"\n    K8S Containerable task\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)\n        env = standalone_variable_backend.get_variable(ENV, DEFAULT)\n        self.env_vars = self.__env_vars(env)\n        self.image = self.task_config['image']\n        self.mounts = self.task_config.get('mounts', [])\n        self.secrets = self.task_config.get('secrets', [])\n        self.cmds, self.arguments = self._kubernetes_cmds_and_arguments()\n\n    def _kubernetes_cmds_and_arguments(self):\n        cmds = ['/bin/sh', '-c']\n\n        arguments = [self.task_config['cmd']]\n\n        return cmds, arguments\n\n    @staticmethod\n    def __get_local_env_params_from_env_file():\n        env_file = f'{environment.get_liminal_home()}/env'\n        if os.path.isfile(env_file):\n            _LOG.info(f'found env file at {env_file}')\n            result = {}\n            with open(env_file) as f:\n                lines = f.readlines()\n                for line in lines:\n                    if line and line.strip() and line.strip()[0:1] != '#':\n                        parts = line.strip().split('=')\n                        if len(parts) == 2:\n                            result[parts[0]] = parts[1]\n            return result\n        else:\n            return {}\n\n    def __env_vars(self, env) -> Dict:\n        env_vars = dict(self.task_config['env_vars']) if 'env_vars' in self.task_config else {}\n        env_vars.update(self.__get_local_env_params_from_env_file())\n        airflow_configuration_variable = get_variable(\n            f'''{self.pipeline_config['pipeline']}_dag_configuration''', default_val=None\n        )\n\n        if airflow_configuration_variable:\n            airflow_configs = json.loads(airflow_configuration_variable)\n            environment_variables_key = f\"{self.pipeline_config['pipeline']}_environment_variables\"\n            if environment_variables_key in airflow_configs:\n                env_vars = airflow_configs[environment_variables_key]\n\n        if ENV not in env_vars:\n            env_vars[ENV] = env\n\n        return {k: str(v) for k, v in env_vars.items()}\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/create_cloudformation_stack.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom airflow.operators.dummy import DummyOperator\nfrom airflow.operators.python import BranchPythonOperator\nfrom flatdict import FlatDict\n\nfrom liminal.runners.airflow.operators.cloudformation import (\n    CloudFormationCreateStackOperator,\n    CloudFormationCreateStackSensor,\n    CloudFormationHook,\n)\nfrom liminal.runners.airflow.operators.operator_with_variable_resolving import (\n    OperatorWithVariableResolving,\n)\nfrom liminal.runners.airflow.tasks import airflow\n\n\nclass CreateCloudFormationStackTask(airflow.AirflowTask):\n    \"\"\"\n    Creates cloud_formation stack.\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)\n        self.stack_name = task_config['stack_name']\n\n    def apply_task_to_dag(self):\n        check_cloudformation_stack_exists_task = self._add_variables_to_operator(\n            BranchPythonOperator(\n                templates_dict={'stack_name': self.stack_name},\n                task_id=f'is-cloudformation-{self.task_id}-running',\n                python_callable=self.__cloudformation_stack_running_branch,\n                provide_context=True,\n            )\n        )\n\n        create_cloudformation_stack_task = self._add_variables_to_operator(\n            CloudFormationCreateStackOperator(\n                task_id=f'create-cloudformation-{self.task_id}', params={**self.__reformatted_params()}\n            )\n        )\n\n        create_stack_sensor_task = self._add_variables_to_operator(\n            CloudFormationCreateStackSensor(\n                task_id=f'cloudformation-watch-{self.task_id}-create',\n                stack_name=self.stack_name,\n            )\n        )\n\n        stack_creation_end_task = DummyOperator(\n            task_id=f'creation-end-{self.task_id}', trigger_rule='all_done', dag=self.dag\n        )\n\n        if self.parent:\n            self.parent.set_downstream(check_cloudformation_stack_exists_task)\n\n        create_stack_sensor_task.set_downstream(stack_creation_end_task)\n        create_cloudformation_stack_task.set_downstream(create_stack_sensor_task)\n        check_cloudformation_stack_exists_task.set_downstream(create_cloudformation_stack_task)\n        check_cloudformation_stack_exists_task.set_downstream(stack_creation_end_task)\n\n        return stack_creation_end_task\n\n    def __cloudformation_stack_running_branch(self, stack_name):\n        cloudformation = CloudFormationHook().get_conn()\n        try:\n            stack_status = cloudformation.describe_stacks(StackName=stack_name)['Stacks'][0]['StackStatus']\n            if stack_status in ['CREATE_COMPLETE', 'DELETE_FAILED']:\n                print(f'Stack {stack_name} is running')\n                return f'creation-end-{self.task_id}'\n            else:\n                print(f'Stack {stack_name} is not running')\n        except Exception as e:\n            if 'does not exist' in str(e):\n                print(f'Stack {stack_name} does not exist')\n                return f'create-cloudformation-{self.task_id}'\n            else:\n                raise e\n\n        return f'create-cloudformation-{self.task_id}'\n\n    # noinspection PyUnusedLocal\n    def __reformatted_params(self, **kwargs):\n        return {\n            'StackName': self.stack_name,\n            **self.task_config['properties'],\n            'Parameters': [\n                {'ParameterKey': x, 'ParameterValue': str(y)}\n                for (x, y) in FlatDict(self.task_config['properties']['Parameters']).items()\n            ],\n        }\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/delete_cloudformation_stack.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom airflow.operators.dummy_operator import DummyOperator\nfrom airflow.operators.python_operator import BranchPythonOperator\nfrom airflow.utils.trigger_rule import TriggerRule\n\nfrom liminal.runners.airflow.operators.cloudformation import (\n    CloudFormationDeleteStackOperator,\n    CloudFormationDeleteStackSensor,\n)\nfrom liminal.runners.airflow.tasks import airflow\n\n\nclass DeleteCloudFormationStackTask(airflow.AirflowTask):\n    \"\"\"\n    Deletes cloud_formation stack.\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)\n        self.stack_name = task_config['stack_name']\n\n    def apply_task_to_dag(self):\n        check_dags_queued_task = BranchPythonOperator(\n            task_id=f'{self.task_id}-is-dag-queue-empty',\n            python_callable=self.__queued_dag_runs_exists,\n            provide_context=True,\n            trigger_rule=TriggerRule.ALL_DONE,\n            dag=self.dag,\n        )\n\n        delete_stack_task = self._add_variables_to_operator(\n            CloudFormationDeleteStackOperator(\n                task_id=f'delete-cloudformation-{self.task_id}',\n                params={'StackName': self.stack_name},\n            )\n        )\n\n        delete_stack_sensor = self._add_variables_to_operator(\n            CloudFormationDeleteStackSensor(\n                task_id=f'cloudformation-watch-{self.task_id}-delete',\n                stack_name=self.stack_name,\n            )\n        )\n\n        stack_delete_end_task = DummyOperator(task_id=f'delete-end-{self.task_id}', dag=self.dag)\n\n        if self.parent:\n            self.parent.set_downstream(check_dags_queued_task)\n\n        check_dags_queued_task.set_downstream(stack_delete_end_task)\n        check_dags_queued_task.set_downstream(delete_stack_task)\n        delete_stack_task.set_downstream(delete_stack_sensor)\n        delete_stack_sensor.set_downstream(stack_delete_end_task)\n\n        return stack_delete_end_task\n\n    # noinspection PyUnusedLocal\n    def __queued_dag_runs_exists(self, **kwargs):\n        if self.dag.get_num_active_runs() > 1:\n            return f'delete-end-{self.task_id}'\n        else:\n            return f'delete-cloudformation-{self.task_id}'\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/hadoop.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom abc import ABC, abstractmethod\n\nfrom liminal.runners.airflow.model import task\n\n\nclass HadoopTask(task.Task, ABC):\n    \"\"\"\n    Hadoop task\n    \"\"\"\n\n    @abstractmethod\n    def get_runnable_command(self):\n        pass\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/job_end.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom liminal.runners.airflow.operators.job_status_operator import JobEndOperator\nfrom liminal.runners.airflow.tasks import airflow\n\n\nclass JobEndTask(airflow.AirflowTask):\n    \"\"\"\n    Job end task. Reports job end metrics.\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)\n        metrics = self.liminal_config.get('metrics', {})\n        self.metrics_namespace = metrics.get('namespace', '')\n        self.metrics_backends = metrics.get('backends', [])\n\n    def apply_task_to_dag(self):\n        job_end_task = JobEndOperator(\n            task_id='end',\n            namespace=self.metrics_namespace,\n            application_name=self.pipeline_config['pipeline'],\n            backends=self.metrics_backends,\n            dag=self.dag,\n            trigger_rule=self.trigger_rule,\n        )\n\n        if self.parent:\n            self.parent.set_downstream(job_end_task)\n\n        return job_end_task\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/job_start.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom liminal.runners.airflow.operators.job_status_operator import JobStartOperator\nfrom liminal.runners.airflow.tasks import airflow\n\n\nclass JobStartTask(airflow.AirflowTask):\n    \"\"\"\n    Job start task. Reports job start metrics.\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)\n        metrics = self.liminal_config.get('metrics', {})\n        self.metrics_namespace = metrics.get('namespace', '')\n        self.metrics_backends = metrics.get('backends', [])\n\n    def apply_task_to_dag(self):\n        job_start_task = JobStartOperator(\n            task_id='start',\n            namespace=self.metrics_namespace,\n            application_name=self.pipeline_config['pipeline'],\n            backends=self.metrics_backends,\n            dag=self.dag,\n            trigger_rule=self.trigger_rule,\n        )\n\n        if self.parent:\n            self.parent.set_downstream(job_start_task)\n\n        return job_start_task\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/python.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom liminal.runners.airflow.tasks import containerable\n\n\nclass PythonTask(containerable.ContainerTask):\n    \"\"\"\n    Python task.\n    \"\"\"\n\n    def _kubernetes_cmds_and_arguments(self):\n        cmds = ['/bin/bash', '-c']\n        arguments = [self.__cmd()]\n\n        return cmds, arguments\n\n    def __cmd(self):\n        return self.task_config['cmd']\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/spark.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom itertools import chain\n\nfrom flatdict import FlatDict\n\nfrom liminal.runners.airflow.tasks import containerable, hadoop\n\n\nclass SparkTask(hadoop.HadoopTask, containerable.ContainerTask):\n    \"\"\"\n    Executes a Spark application.\n    \"\"\"\n\n    def __init__(\n        self, task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables=None\n    ):\n        task_config['image'] = task_config.get('image', '')\n        task_config['cmd'] = task_config.get('cmd', [])\n        task_config['env_vars'] = {'SPARK_LOCAL_HOSTNAME': 'localhost'}\n        super().__init__(task_id, dag, parent, trigger_rule, liminal_config, pipeline_config, task_config, variables)\n\n    def get_runnable_command(self):\n        \"\"\"\n        Return spark-submit runnable command\n        \"\"\"\n        return self.__generate_spark_submit()\n\n    def __generate_spark_submit(self):\n        spark_submit = ['spark-submit']\n\n        spark_arguments = self.__spark_args()\n        application_arguments = self.__additional_arguments()\n\n        spark_submit.extend(spark_arguments)\n        spark_submit.extend(application_arguments)\n\n        return [str(x) for x in spark_submit]\n\n    def __spark_args(self):\n        # reformat spark conf\n        flat_conf_args = list()\n\n        spark_arguments = {\n            'master': self.task_config.get('master', None),\n            'class': self.task_config.get('class', None),\n        }\n\n        source_code = self.task_config.get(\"application_source\")\n\n        for conf_arg in [f'{k}={v}' for (k, v) in FlatDict(self.task_config.get('conf', {})).items()]:\n            flat_conf_args.append('--conf')\n            flat_conf_args.append(conf_arg)\n\n        spark_conf = self.__parse_spark_arguments(spark_arguments)\n        spark_conf.extend(flat_conf_args)\n        spark_conf.extend([source_code])\n        return spark_conf\n\n    def __additional_arguments(self):\n        application_arguments = self.task_config.get('application_arguments', {})\n        if type(application_arguments) == list:\n            return application_arguments\n        return self.__interleaving(application_arguments.keys(), application_arguments.values())\n\n    def __parse_spark_arguments(self, spark_arguments):\n        spark_arguments = {x[0]: x[1] for x in spark_arguments.items() if x[1]}\n\n        return self.__interleaving(\n            [f'--{k}' for k in spark_arguments.keys() if spark_arguments[k]], spark_arguments.values()\n        )\n\n    @staticmethod\n    def __interleaving(keys, values):\n        return list(chain.from_iterable(zip(keys, values)))\n\n    def _kubernetes_cmds_and_arguments(self):\n        return self.__generate_spark_submit(), []\n"
  },
  {
    "path": "liminal/runners/airflow/tasks/sql.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom liminal.runners.airflow.model import task\n\n\nclass SqlTask(task.Task):\n    \"\"\"\n    Executes an SQL application.\n    \"\"\"\n"
  },
  {
    "path": "liminal/settings.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport os\nimport sys\n\nfrom liminal.core.environment import get_liminal_home\n\nLIMINAL_HOME = get_liminal_home()\n\n\ndef prepare_syspath():\n    plugins = os.path.join(LIMINAL_HOME, 'liminal')\n    sys.path.append(plugins)\n\n\ndef initialize():\n    # making sure all extensible modules are in root path\n    prepare_syspath()\n"
  },
  {
    "path": "liminal/sql/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "liminal-arch.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n# Liminal\n\nLiminal is an end-to-end platform for data engineers & scientists, allowing them to build, train and deploy machine learning models in a robust and agile way. The platform provides the abstractions and declarative capabilities for data extraction & feature engineering followed by model training and serving. Apache Liminal's goal is to operationalise the machine learning process, allowing data scientists to quickly transition from a successful experiment to an automated pipeline of model training, validation, deployment and inference in production, freeing them from engineering and non-functional tasks, and allowing them to focus on machine learning code and artifacts.\n\n## Motivation\n\nThe challenges involved in operationalizing machine learning models are one of the main reasons why many machine learning projects never make it to production.\nThe process involves automating and orchestrating multiple steps which run on heterogeneous infrastructure - different compute environments, data processing platforms, ML frameworks, notebooks, containers and monitoring tools.\nThere are no mature standards for this workflow, and most organizations do not have the experience to build it in-house. In the best case, dev-ds-devops teams form in order to accomplish this task together; in many cases, it's the data scientists who try to deal with this themselves without the knowledge or the inclination to become infrastructure experts.\nAs a result, many projects never make it through the cycle. Those who do suffer from a very long lead time from a successful experiment to an operational, refreshable, deployed and monitored model in production.\nThe goal of Apache Liminal is to simplify the creation and management of machine learning pipelines by data engineers & scientists. The platform provides declarative building blocks which define the workflow, orchestrate the underlying infrastructure,  take care of non functional concerns, enabling focus in business logic / algorithm code.\nSome Commercial E2E solutions have started to emerge in the last few years, however, they are limited to specific parts of the workflow, such as Databricks MLFlow. Other solutions are tied to specific environments (e.g. SageMaker on AWS).\n\n## High Level Architecture\nThe platform is aimed to provide data engineers & scientists with a solution for end to end flows from model training to real time inference in production. It’s architecture enables and promotes adoption of specific components in existing (non-Liminal) frameworks, as well as seamless integration with other open source projects. Liminal was created to enable scalability in ML efforts and after a thorough review of available solutions and frameworks, which did not meet our main KPIs:\nProvide an opinionated but customizable end-to-end workflow\nAbstract away the complexity of underlying infrastructure\nSupport major open source tools and cloud-native infrastructure to carry out many of the steps\nAllow teams to leverage their existing investments or bring in their tools of choice into the workflow\nWe have found that other tech companies in the Israeli Hi-Tech ecosystem also have an interest in such a platform, hence decided to share our work with the community.\nThe following diagram depicts these main components and where Apache Liminal comes in:\n\n![raibow-arch1](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/liminal_001.png)\n\nA classical data scientist workflow includes some base phases:\n_Train, Deploy and Consume._\n\n**The Train phase includes the following tasks:**\n\n1. Fetch -  get the data needed to build a model - usually using SQL\n1. Clean - make sure the data is useful for building the model\n1. Prepare - split data and encode features from the data according to model needs\n1. Train - Build the model and tune it\n1. Evaluate - make sure the model is correct - run it on a test set, etc…\n1. Validate - make sure the model is up to the standards you need\n\n**The Deploy phase includes these tasks:**\n1. Deploy - make it available for usage in production\n1. Inference - Batch or Real-time - use the model to evaluate data by your offline or online by your applications\n1. Consume - The actual use of the models created by applications and ETLs, usually through APIs to the batch or real-time inference that usually rely on Model and Feature stores.\n\nLiminal provides its users a declarative composition capabilities to materialize these steps in a robust way, while exploiting existing frameworks and tools. e.g. Data science frameworks such as scikit-learn, Tensor flow, Keras and such, for running core data science algorithms; as numerous core mechanisms as data stores, processing engines, parallelism, schedulers, code deployment as well as batch and real-time inference.\nLiminal allows the creation and wiring of these kinds of functional and non functional tasks while making the underlying infrastructure used by these tasks very easy to use and even abstracted away entirely. While handling the non-functional aspects as monitoring (in a standard fashion) deployment, scheduling, resource management and execution.\n\n![raibow-arch2](https://raw.githubusercontent.com/apache/incubator-liminal/master/images/liminal_002.png)\n"
  },
  {
    "path": "pyproject.toml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n[tool.black]\nline-length = 119  # TODO: Decide if 119 or 110?\ntarget-version = ['py36', 'py37', 'py38', 'py39']\nskip-string-normalization = true\ninclude = '\\.pyi?$'\nexclude = '''\n(\n  /(\n      \\.eggs\n    | \\.git\n    | \\.mypy_cache\n    | \\.tox\n    | _build\n    | build\n    | dist\n  )/\n  | src/buildstream/_protos\n)\n'''\n"
  },
  {
    "path": "requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\ndocker==4.2.0\napache-airflow==2.1.2\nclick==7.1.1\nFlask==1.1.1\npyyaml==5.4.1\nboto3==1.17.112\nbotocore==1.20.112\nwheel==0.36.2\ntermcolor~=1.1.0\ndocker-pycreds==0.4.0\ntyping==3.7.4.1\nGitPython==3.1.11\nmoto==1.3.14\ndiskcache==3.1.1\ncroniter==0.3.31\npytz==2020.5\npytzdata==2020.1\nfreezegun==1.1.0\nstatsd>=3.3.0, <4.0\nsqlalchemy~=1.3.15\nflatdict==3.4.0\njinja2>=2.10.1, <2.11.0\npython-json-logger==2.0.1\nrequests==2.26.0\napache-airflow-providers-amazon==1.4.0\napache-airflow-providers-cncf-kubernetes==1.0.2\n#apache-airflow-providers-google==4.0.0\n#apache-airflow[google,amazon,apache.spark]==2.0.0\ncfn-lint==0.53.0\npre-commit==2.16.0\nWerkzeug==1.0.1\nitsdangerous==1.1.0\nMarkupSafe==1.1.1\n"
  },
  {
    "path": "run_tests.sh",
    "content": "#!/bin/sh\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\npython -m unittest\n"
  },
  {
    "path": "scripts/Dockerfile-airflow",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nFROM apache/airflow:2.1.2-python3.8\n\nARG LIMINAL_VERSION='apache-liminal'\nARG DAG_FOLDER='/opt/airflow/dags/'\nADD scripts/* ${DAG_FOLDER}\nADD *.whl ${DAG_FOLDER}\n\nRUN ls -ls ${DAG_FOLDER}\nADD liminal/runners/airflow/dag/liminal_dags.py ${DAG_FOLDER}\nADD scripts/webserver_config.py /opt/airflow/\n\nUSER airflow\n\nRUN pip --disable-pip-version-check install -r /opt/airflow/dags/requirements-airflow.txt --user\nRUN pip --disable-pip-version-check install --user ${LIMINAL_VERSION}\n"
  },
  {
    "path": "scripts/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "scripts/docker-compose.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nversion: '3'\nx-airflow-common: &airflow-common\n  image: liminal-airflow\n  build:\n    context: .\n    dockerfile: scripts/Dockerfile-airflow\n    args:\n      LIMINAL_VERSION: ${LIMINAL_VERSION}\n  environment: &airflow-common-env\n    LOAD_EX: n\n    AIRFLOW__CORE__EXECUTOR: LocalExecutor\n    KUBECONFIG: /home/airflow/kube/config\n    AWS_DEFAULT_REGION: \"${AWS_DEFAULT_REGION:-us-east-1}\"\n    AIRFLOW__CORE__LOAD_EXAMPLES: 'False'\n    AIRFLOW__WEBSERVER__WORKERS: '1'\n    AIRFLOW__WEBSERVER__EXPOSE_CONFIG: 'True'\n    _AIRFLOW_DB_UPGRADE: 'true'\n    LIMINAL_HOME: /opt/airflow/dags\n    AIRFLOW__CORE__LOAD_DEFAULT_CONNECTIONS: 'False'\n    AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres:5432/airflow\n    AIRFLOW_CONN_METADATA_DB: postgresql+psycopg2://airflow:airflow@postgres:5432/airflow\n    AIRFLOW_VAR__METADATA_DB_SCHEMA: airflow\n  volumes:\n    - ${LIMINAL_HOME}/liminal:/opt/airflow/dags/liminal/\n    - ${LIMINAL_HOME}:/opt/airflow/dags\n    - ${LIMINAL_HOME}/logs:/opt/airflow/logs\n    - ${HOME}/.kube:/home/airflow/kube\n  depends_on:\n    postgres:\n      condition: service_healthy\n  healthcheck:\n    test: [CMD-SHELL, '[ -f /usr/local/airflow/airflow-webserver.pid ]']\n    interval: 30s\n    timeout: 30s\n    retries: 3\n  logging:\n    options:\n      max-size: 10m\n      max-file: '3'\n  restart: always\n\nservices:\n  postgres:\n    image: postgres:13\n    container_name: liminal-postgress\n    environment:\n      POSTGRES_USER: airflow\n      POSTGRES_PASSWORD: airflow\n      POSTGRES_DB: ''\n    ports:\n      - 5432:5432\n    volumes:\n      - ${LIMINAL_HOME}/db:/var/lib/postgresql/data\n    logging:\n      options:\n        max-size: 10m\n        max-file: '3'\n    healthcheck:\n      test: [CMD, pg_isready, -U, airflow]\n      interval: 30s\n      timeout: 30s\n      retries: 3\n    restart: always\n\n  webserver:\n    <<: *airflow-common\n    container_name: liminal-webserver\n    environment:\n      <<: *airflow-common-env\n    ports:\n      - 8080:8080\n    command: webserver\n\n  scheduler:\n    <<: *airflow-common\n    container_name: liminal-scheduler\n    environment:\n      <<: *airflow-common-env\n    ports:\n      - 8793:8793\n    command: scheduler\n"
  },
  {
    "path": "scripts/liminal",
    "content": "#!/usr/bin/env python3\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport pathlib\nimport shutil\nimport subprocess\nimport sys\nimport yaml\n\nimport click\n\nimport scripts\nfrom liminal import settings\nfrom liminal.build import liminal_apps_builder\nfrom liminal.core import environment\nfrom liminal.core.config.config import ConfigUtil\nfrom liminal.core.util import files_util\nfrom liminal.kubernetes import volume_util, secret_util\nfrom liminal.logging.logging_setup import logging_initialization\nfrom liminal.runners.airflow import dag\n\nlogging_initialization()\nsettings.initialize()\n\ntry:\n    import importlib.resources as pkg_resources\nexcept ImportError:\n    # Try backported to PY<37 `importlib_resources`.\n    import importlib_resources as pkg_resources\n\n\n@click.group()\ndef cli():\n    pass\n\n\ndef docker_is_running():\n    try:\n        return not subprocess.check_output(\"docker info >/dev/null 2>&1\", shell=True)\n    except subprocess.CalledProcessError:\n        msg = \"Docker is not running. Please start docker service on your machine\\n\"\n        sys.stderr.write(f\"ERROR: {msg}\")\n        raise RuntimeError(msg)\n\n\n@cli.command(\"build\", short_help=\"builds dockers from your business logic\")\n@click.option('--path', default=os.getcwd(), help='Build within this path.')\ndef build(path):\n    click.echo(f'Building liminal apps in {path}')\n    if docker_is_running():\n        liminal_apps_builder.build_liminal_apps(path)\n    configs = ConfigUtil(path).safe_load(is_render_variables=True,\n                                         soft_merge=True)\n    for config in configs:\n        if config.get('volumes'):\n            logging.info(f'local volume is being created')\n            volume_util.create_local_volumes(config, os.path.dirname(\n                files_util.resolve_pipeline_source_file(config['name'])))\n\n\ndef deploy_liminal_core_internal(liminal_home, clean):\n    # noinspection PyTypeChecker\n    with pkg_resources.path(dag, 'liminal_dags.py') as p:\n        dags_path = p\n    os.makedirs(environment.get_dags_dir(), exist_ok=True)\n    dag_target_file = os.path.join(environment.get_liminal_home(), 'liminal_dags.py')\n    shutil.copyfile(dags_path, dag_target_file)\n    # initialize the env. variable which indicates to the docke compose which\n    # liminal to install in airflow docker\n    liminal_version = environment.get_liminal_version()\n    logging.info(f'Deploying liminal version: {liminal_version}')\n    # if liminal is installed from local file - the developer needs to put it in the /scripts folder\n    # in which case it will end up inside the container during build\n    if liminal_version.find(\"file://\") > -1:\n        local_file_name = os.path.basename(liminal_version)\n        full_path = os.path.join('/opt/airflow/dags', local_file_name)\n        logging.info(f'Liminal was installed locally, setting LIMINAL_VERSION to {full_path}')\n        os.environ[environment.LIMINAL_VERSION_PARAM_NAME] = full_path\n        _, project_dir = get_docker_compose_paths()\n        shutil.copyfile(os.path.join(liminal_home, local_file_name),\n                        os.path.join(project_dir, local_file_name))\n    if clean:\n        docker_compose_command('down', ['--remove-orphans', '--rmi', 'local'])\n        docker_compose_command('build', [])\n        docker_compose_command('run', ['webserver', 'db reset', '-y'])\n        docker_compose_command('run', ['webserver', 'db init'])\n        docker_compose_command('run', ['webserver', 'users create '\n                                                    '--role Admin '\n                                                    '--username admin '\n                                                    '--email admin '\n                                                    '--firstname admin '\n                                                    '--lastname admin '\n                                                    '--password admin'])\n        docker_compose_command('down', ['--remove-orphans'])\n\n\ndef docker_compose_command(command_name, args):\n    detached_mode = False\n    docker_compose_path, project_dir = get_docker_compose_paths()\n    concated_args = ' '.join(args)\n    run_command = [\n        'docker-compose '\n        f'-f \"{docker_compose_path}\" '\n        '-p liminal --project-directory '\n        f'{project_dir} {command_name} {concated_args}'\n    ]\n    logging.info(run_command[0])\n    if 'tail' in str(args) or '-d' in str(args) or command_name in ['ps', 'down']:\n        detached_mode = True\n\n    if detached_mode:\n        output = subprocess.run(run_command, env=os.environ, shell=True, stdout=subprocess.PIPE,\n                                stderr=subprocess.PIPE, universal_newlines=True)\n        return output.stdout, output.stderr\n    else:\n        subprocess.call(run_command, env=os.environ, shell=True)\n        return '', ''\n\n\n@cli.command(\"deploy\", short_help=\"deploys your liminal.yaml files to $LIMINAL_HOME folder\")\n@click.option('--path', default=os.getcwd(), help=\"folder containing liminal.yaml files\")\n@click.option('--clean', is_flag=True, default=False,\n              help=\"re-deploy liminal airflow components from scratch\")\ndef deploy_liminal_apps(path, clean):\n    click.echo(\"deploying liminal yaml files\")\n    liminal_home = environment.get_liminal_home()\n    if (clean):\n        click.echo(\"cleaning liminal home directory\")\n        shutil.rmtree(liminal_home)\n    os.makedirs(liminal_home, exist_ok=True)\n    os.makedirs(environment.get_dags_dir(), exist_ok=True)\n\n    deploy_liminal_core_internal(liminal_home, clean)\n    config_files = files_util.find_config_files(path)\n    for config_file in config_files:\n        if (is_file_empty(config_file)):\n            continue\n        click.echo(f\"deploying liminal file: {config_file}\")\n        relative_path = os.path.relpath(config_file, os.path.dirname(path))\n        target_yml_name = os.path.join(environment.get_dags_dir(), relative_path)\n        pathlib.Path(target_yml_name).parent.mkdir(parents=True, exist_ok=True)\n        shutil.copyfile(config_file, target_yml_name)\n        configs_path = ConfigUtil(os.path.dirname(config_file)).safe_load(is_render_variables=True,\n                                                                          soft_merge=True)\n        for config in configs_path:\n            if config.get('secrets'):\n                logging.info(f'Secret volume is being created')\n                secret_util.create_local_secrets(config)\n\n\ndef is_file_empty(file):\n    try:\n        with open(file, \"r\") as stream:\n            data_loaded = yaml.safe_load(stream)\n    except yaml.YAMLError as exc:\n        print(exc)\n    return data_loaded is None\n\n\ndef liminal_is_running():\n    stdout, stderr = docker_compose_command('ps', [])\n    return \"liminal\" in stdout\n\n\n@cli.command(\"stop\", short_help=\"stops the docker compose\")\ndef stop():\n    if docker_is_running() and liminal_is_running():\n        # initialize liminal home by default\n        environment.get_liminal_home()\n        docker_compose_command('down', [])\n\n\n@cli.command(\"logs\", short_help=\"display the docker compose logs\")\n@click.option('--follow', '-f', is_flag=True, default=False, help=\"Follow log output.\")\n@click.option('--tail', '-t', default=0, type=int,\n              help=\"Number of lines to show from the end of the log\")\ndef logs(follow, tail):\n    if docker_is_running() and liminal_is_running():\n        # initialize liminal home by default\n        environment.get_liminal_home()\n        if follow:\n            docker_compose_command('logs', ['--follow'])\n        if tail > 0:\n            stdout, stderr = docker_compose_command('logs', [f'--tail={tail}'])\n            logging.info(stdout)\n\n\n@cli.command(\"start\",\n             short_help=\"starts a local airflow in docker compose. should be run after deploy. \" +\n                        \"Make sure docker is running on your machine\")\n@click.option('--detached-mode', '-d', is_flag=True, default=False,\n              help=\"Start liminal in detached mode.\")\ndef start(detached_mode):\n    liminal_version = environment.get_liminal_version()\n    logging.info(f'starting liminal version: {liminal_version}')\n    if docker_is_running():\n        # initialize liminal home by default\n        environment.get_liminal_home()\n        if detached_mode:\n            docker_compose_command('up', ['-d'])\n        else:\n            docker_compose_command('up', [])\n\n\n@cli.command(\"delete\", short_help=\"delete liminal resource registration\")\n@click.option('--path', default=os.getcwd(), help='Delete within this path.')\n@click.option('--clean', is_flag=True, default=False,\n              help=\"Delete all resource: DAGs, Volumes\")\ndef delete(path, clean):\n    configs = ConfigUtil(path).safe_load(is_render_variables=True,\n                                         soft_merge=True)\n    for config in configs:\n        if config.get('volumes'):\n            logging.info(f'local volume is being deleted')\n            volume_util.delete_local_volumes(config, os.path.dirname(\n                files_util.resolve_pipeline_source_file(config['name'])))\n\n    if clean:\n        dir_path = os.path.abspath(environment.get_liminal_home())\n        shutil.rmtree(dir_path)\n        docker_compose_command('down', ['--remove-orphans', '--rmi', 'local'])\n\n\ndef get_docker_compose_paths():\n    # noinspection PyTypeChecker\n    with pkg_resources.path(scripts, 'docker-compose.yml') as p:\n        docker_compose_path = p\n    project_dir = pathlib.Path(docker_compose_path).parent.parent.absolute()\n    return docker_compose_path, project_dir\n\n\nif __name__ == '__main__':\n    cli()\n"
  },
  {
    "path": "scripts/requirements-airflow.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nclick==7.1.1\npyyaml\nboto3==1.12.10\nbotocore==1.15.21\nkubernetes\ndiskcache==3.1.1\nflatdict==4.0.1\nkafka-python==2.0.2\ninfluxdb-client\n"
  },
  {
    "path": "scripts/webserver_config.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\"\"\"Default configuration for the Airflow webserver\"\"\"\nimport os\n\nfrom flask_appbuilder.security.manager import AUTH_DB\n\n# from flask_appbuilder.security.manager import AUTH_LDAP\n# from flask_appbuilder.security.manager import AUTH_OAUTH\n# from flask_appbuilder.security.manager import AUTH_OID\n# from flask_appbuilder.security.manager import AUTH_REMOTE_USER\n\n\nbasedir = os.path.abspath(os.path.dirname(__file__))\n\n# Flask-WTF flag for CSRF\nWTF_CSRF_ENABLED = True\n\n# ----------------------------------------------------\n# AUTHENTICATION CONFIG\n# ----------------------------------------------------\n# For details on how to set up each of the following authentication, see\n# http://flask-appbuilder.readthedocs.io/en/latest/security.html# authentication-methods\n# for details.\n\n# The authentication type\n# AUTH_OID : Is for OpenID\n# AUTH_DB : Is for database\n# AUTH_LDAP : Is for LDAP\n# AUTH_REMOTE_USER : Is for using REMOTE_USER from web server\n# AUTH_OAUTH : Is for OAuth\nAUTH_TYPE = AUTH_DB\n\n# Uncomment to setup Full admin role name\n# AUTH_ROLE_ADMIN = 'Admin'\n\n# Uncomment to setup Public role name, no authentication needed\nAUTH_ROLE_PUBLIC = 'Admin'\n\n# Will allow user self registration\n# AUTH_USER_REGISTRATION = True\n\n# The recaptcha it's automatically enabled for user self registration is active and the keys are necessary\n# RECAPTCHA_PRIVATE_KEY = PRIVATE_KEY\n# RECAPTCHA_PUBLIC_KEY = PUBLIC_KEY\n\n# Config for Flask-Mail necessary for user self registration\n# MAIL_SERVER = 'smtp.gmail.com'\n# MAIL_USE_TLS = True\n# MAIL_USERNAME = 'yourappemail@gmail.com'\n# MAIL_PASSWORD = 'passwordformail'\n# MAIL_DEFAULT_SENDER = 'sender@gmail.com'\n\n# The default user self registration role\n# AUTH_USER_REGISTRATION_ROLE = \"Public\"\n\n# When using OAuth Auth, uncomment to setup provider(s) info\n# Google OAuth example:\n# OAUTH_PROVIDERS = [{\n#   'name':'google',\n#     'token_key':'access_token',\n#     'icon':'fa-google',\n#         'remote_app': {\n#             'api_base_url':'https://www.googleapis.com/oauth2/v2/',\n#             'client_kwargs':{\n#                 'scope': 'email profile'\n#             },\n#             'access_token_url':'https://accounts.google.com/o/oauth2/token',\n#             'authorize_url':'https://accounts.google.com/o/oauth2/auth',\n#             'request_token_url': None,\n#             'client_id': GOOGLE_KEY,\n#             'client_secret': GOOGLE_SECRET_KEY,\n#         }\n# }]\n\n# When using LDAP Auth, setup the ldap server\n# AUTH_LDAP_SERVER = \"ldap://ldapserver.new\"\n\n# When using OpenID Auth, uncomment to setup OpenID providers.\n# example for OpenID authentication\n# OPENID_PROVIDERS = [\n#    { 'name': 'Yahoo', 'url': 'https://me.yahoo.com' },\n#    { 'name': 'AOL', 'url': 'http://openid.aol.com/<username>' },\n#    { 'name': 'Flickr', 'url': 'http://www.flickr.com/<username>' },\n#    { 'name': 'MyOpenID', 'url': 'https://www.myopenid.com' }]\n\n# ----------------------------------------------------\n# Theme CONFIG\n# ----------------------------------------------------\n# Flask App Builder comes up with a number of predefined themes\n# that you can use for Apache Airflow.\n# http://flask-appbuilder.readthedocs.io/en/latest/customizing.html#changing-themes\n# Please make sure to remove \"navbar_color\" configuration from airflow.cfg\n# in order to fully utilize the theme. (or use that property in conjunction with theme)\n# APP_THEME = \"bootstrap-theme.css\"  # default bootstrap\n# APP_THEME = \"amelia.css\"\n# APP_THEME = \"cerulean.css\"\n# APP_THEME = \"cosmo.css\"\n# APP_THEME = \"cyborg.css\"\n# APP_THEME = \"darkly.css\"\n# APP_THEME = \"flatly.css\"\n# APP_THEME = \"journal.css\"\n# APP_THEME = \"lumen.css\"\n# APP_THEME = \"paper.css\"\n# APP_THEME = \"readable.css\"\n# APP_THEME = \"sandstone.css\"\n# APP_THEME = \"simplex.css\"\n# APP_THEME = \"slate.css\"\n# APP_THEME = \"solar.css\"\n# APP_THEME = \"spacelab.css\"\n# APP_THEME = \"superhero.css\"\n# APP_THEME = \"united.css\"\n# APP_THEME = \"yeti.css\"\n"
  },
  {
    "path": "setup.py",
    "content": "#!/usr/bin/env python3\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\n\nimport setuptools\n\nwith open(\"README.md\") as fh:\n    long_description = fh.read()\n\nwith open('requirements.txt') as f:\n    requirements = f.read().splitlines()\n    logging.info(requirements)\n\nsetuptools.setup(\n    name=\"apache-liminal\",\n    version=os.environ.get(\"LIMINAL_BUILD_VERSION\", os.environ.get('LIMINAL_VERSION', None)),\n    author=\"dev@liminal.apache.org\",\n    author_email='dev@liminal.apache.org',\n    description=\"A package for authoring and deploying machine learning workflows\",\n    long_description=long_description,\n    long_description_content_type=\"text/markdown\",\n    url=\"https://github.com/apache/incubator-liminal\",\n    packages=setuptools.find_packages(),\n    classifiers=[\n        \"Programming Language :: Python :: 3\",\n        \"License :: OSI Approved :: Apache Software License\",\n        \"Operating System :: OS Independent\",\n    ],\n    license='Apache License, Version 2.0',\n    python_requires='>=3.6',\n    install_requires=requirements,\n    scripts=['scripts/liminal'],\n    include_package_data=True,\n    package_data={\n        'liminal': [\n            '../DISCLAIMER',\n            '../LICENSE',\n            '../NOTICE',\n            '../README.md',\n            '../liminal/core/config/defaults/base/liminal.yml',\n        ],\n    },\n)\n"
  },
  {
    "path": "tests/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/core/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/core/config/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/core/config/defaults/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/core/config/defaults/test_apply_variables_substitution.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom unittest import TestCase\n\nfrom liminal.core.config.defaults import default_configs\n\n\nclass TestApplyVariablesSubstitution(TestCase):\n    def test_apply(self):\n        subliminal = {'variables': {'one': 'one_value', 'two': 'two_value'}}\n\n        superliminal = {'variables': {'three': 'three_value', 'two': 'two_super_value'}}\n\n        expected = {'variables': {'one': 'one_value', 'two': 'two_value', 'three': 'three_value'}}\n        self.assertEqual(expected, default_configs.apply_variable_substitution(subliminal, superliminal))\n\n    def test_apply_superliminal_is_missing_variables(self):\n        subliminal = {'variables': {'one': 'one_value', 'two': 'two_value'}}\n\n        superliminal = {}\n\n        expected = {'variables': {'one': 'one_value', 'two': 'two_value'}}\n        self.assertEqual(expected, default_configs.apply_variable_substitution(subliminal, superliminal))\n\n    def test_apply_subliminal_is_missing_variables(self):\n        subliminal = {}\n\n        superliminal = {'variables': {'one': 'one_value', 'two': 'two_value'}}\n\n        expected = {'variables': {'one': 'one_value', 'two': 'two_value'}}\n        self.assertEqual(expected, default_configs.apply_variable_substitution(subliminal, superliminal))\n"
  },
  {
    "path": "tests/liminal/core/config/defaults/test_defaults_pipeline_config.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom unittest import TestCase\n\nfrom liminal.core.config.defaults import default_configs\n\n\nclass TestDefaultsPipelineConfig(TestCase):\n    def test_apply(self):\n        pipeline = {\"name\": \"mypipe\", \"param\": \"constant\", \"tasks\": [{\"task\": \"middle_task\", \"type\": \"python\"}]}\n\n        subliminal = {\n            \"name\": \"my_subliminal_test\",\n            \"pipelines\": [\n                {\n                    \"name\": \"mypipe\",\n                    \"param\": \"constant\",\n                    \"tasks\": [{\"task\": \"middle_task\", \"type\": \"python\", \"env_vars\": {\"env1\": \"env1\"}}],\n                }\n            ],\n            \"pipeline_defaults\": {\"param1\": \"param1_value\"},\n        }\n        superliminal = {\n            \"pipeline_defaults\": {\n                \"param2\": \"param2super_value\",\n                \"param3\": \"param3super_value\",\n                \"before_tasks\": [{\"task\": \"first_task\", \"type\": \"python\"}],\n                \"after_tasks\": [{\"task\": \"end_task\", \"type\": \"python\"}],\n            },\n            \"task_defaults\": {\n                \"python\": {\n                    \"default_task_super\": \"default_task_super_value\",\n                    \"env_vars\": {\"env1\": \"env1super\", \"env2\": \"env2super\"},\n                }\n            },\n        }\n        expected = {\n            \"param\": \"constant\",\n            \"param3\": \"param3super_value\",\n            \"param2\": \"param2super_value\",\n            \"tasks\": [\n                {\n                    \"type\": \"python\",\n                    \"env_vars\": {\"env1\": \"env1super\", \"env2\": \"env2super\"},\n                    \"task\": \"first_task\",\n                    \"default_task_super\": \"default_task_super_value\",\n                },\n                {\n                    \"type\": \"python\",\n                    \"env_vars\": {\"env1\": \"env1super\", \"env2\": \"env2super\"},\n                    \"task\": \"middle_task\",\n                    \"default_task_super\": \"default_task_super_value\",\n                },\n                {\n                    \"type\": \"python\",\n                    \"env_vars\": {\"env1\": \"env1super\", \"env2\": \"env2super\"},\n                    \"task\": \"end_task\",\n                    \"default_task_super\": \"default_task_super_value\",\n                },\n            ],\n            \"name\": \"mypipe\",\n            \"param1\": \"param1_value\",\n        }\n\n        self.assertEqual(expected, default_configs.apply_pipeline_defaults(subliminal, superliminal, pipeline))\n"
  },
  {
    "path": "tests/liminal/core/config/defaults/test_defaults_service_config.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom unittest import TestCase\n\nfrom liminal.core.config.defaults import default_configs\n\n\nclass TestDefaultsServiceConfig(TestCase):\n    def test_apply(self):\n        subliminal = {\n            \"services\": [\n                {\"name\": \"my_python_server\", \"type\": \"python_server\", \"image\": \"default_image\"},\n                {\"name\": \"my_python_server_for_stg\", \"type\": \"python_server\", \"image\": \"test_image\"},\n            ]\n        }\n\n        superliminal = {\"service_defaults\": {\"param\": \"param1\", \"param2\": \"param2\"}}\n\n        expected = [\n            {\n                'image': 'default_image',\n                'name': 'my_python_server',\n                'param': 'param1',\n                'param2': 'param2',\n                'type': 'python_server',\n            },\n            {\n                'image': 'test_image',\n                'name': 'my_python_server_for_stg',\n                'param': 'param1',\n                'param2': 'param2',\n                'type': 'python_server',\n            },\n        ]\n\n        self.assertEqual(expected, default_configs.apply_service_defaults(subliminal, superliminal))\n"
  },
  {
    "path": "tests/liminal/core/config/defaults/test_defaults_tasks_config.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom unittest import TestCase\n\nfrom liminal.core.config.defaults import default_configs\n\n\nclass TestDefaultsTaskConfig(TestCase):\n    def test_apply(self):\n        pipeline = {\n            'pipeline': 'mypipe',\n            \"tasks\": [\n                {\"task\": \"middle\", \"type\": \"spark\", \"task_param\": \"task_middle_param\"},\n                {\"task\": \"end\", \"type\": \"python\", \"task_param\": \"task_end_param\"},\n            ],\n        }\n\n        subliminal = {'pipelines': [pipeline]}\n\n        superliminal = {\n            \"task_defaults\": {\n                \"python\": {\"env_vars\": {\"env2\": \"env2value\"}},\n                \"spark\": {\"task_param\": \"task_spark_param\", \"executor\": \"emr\"},\n            }\n        }\n\n        expected = {\n            'pipeline': 'mypipe',\n            'tasks': [\n                {'env_vars': {'env1': 'env1value', 'env2': 'env2value'}, 'task': 'start', 'type': 'python'},\n                {'executor': 'emr', 'task': 'middle', 'task_param': 'task_middle_param', 'type': 'spark'},\n                {'env_vars': {'env2': 'env2value'}, 'task': 'end', 'task_param': 'task_end_param', 'type': 'python'},\n            ],\n        }\n        self.assertEqual(\n            expected,\n            default_configs.apply_task_defaults(\n                subliminal,\n                superliminal,\n                pipeline=pipeline,\n                superliminal_before_tasks=[{\"task\": \"start\", \"type\": \"python\", \"env_vars\": {\"env1\": \"env1value\"}}],\n                superliminal_after_tasks=[],\n            ),\n        )\n\n    def test_missing_tasks_from_supr(self):\n        pipeline = {\n            'pipeline': 'mypipe',\n            \"tasks\": [\n                {\"task\": \"middle\", \"type\": \"spark\", \"task_param\": \"task_middle_param\"},\n                {\"task\": \"end\", \"type\": \"python\", \"task_param\": \"task_end_param\"},\n            ],\n        }\n\n        subliminal = {'pipelines': [pipeline]}\n\n        superliminal = {\n            \"task_defaults\": {\n                \"python\": {\"env_vars\": {\"env2\": \"env2value\"}},\n                \"spark\": {\"task_param\": \"task_spark_param\"},\n            }\n        }\n\n        expected = {\n            'pipeline': 'mypipe',\n            'tasks': [\n                {'task': 'middle', 'task_param': 'task_middle_param', 'type': 'spark'},\n                {'env_vars': {'env2': 'env2value'}, 'task': 'end', 'task_param': 'task_end_param', 'type': 'python'},\n            ],\n        }\n\n        self.assertEqual(\n            expected,\n            default_configs.apply_task_defaults(\n                subliminal, superliminal, pipeline=pipeline, superliminal_before_tasks=[], superliminal_after_tasks=[]\n            ),\n        )\n\n    def test_missing_tasks_from_sub(self):\n        pipeline = {'pipeline': 'mypipe'}\n\n        subliminal = {'pipelines': [pipeline]}\n\n        superliminal = {\n            \"task_defaults\": {\n                \"python\": {\"env_vars\": {\"env2\": \"env2value\"}},\n                \"spark\": {\n                    \"task_param\": \"task_start_param\",\n                },\n            }\n        }\n\n        expected = {\n            'pipeline': 'mypipe',\n            'tasks': [\n                {'task': 'start', 'task_param': 'task_middle_param', 'type': 'spark'},\n                {'env_vars': {'env2': 'env2value'}, 'task': 'end', 'task_param': 'task_end_param', 'type': 'end'},\n            ],\n        }\n\n        self.assertEqual(\n            expected,\n            default_configs.apply_task_defaults(\n                subliminal,\n                superliminal,\n                pipeline=pipeline,\n                superliminal_before_tasks=[{\"task\": \"start\", \"task_param\": \"task_middle_param\", \"type\": \"spark\"}],\n                superliminal_after_tasks=[\n                    {\"env_vars\": {\"env2\": \"env2value\"}, \"task\": \"end\", \"task_param\": \"task_end_param\", \"type\": \"end\"}\n                ],\n            ),\n        )\n"
  },
  {
    "path": "tests/liminal/core/config/test_config.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nfrom unittest import TestCase, mock\n\nfrom liminal.core.config.config import ConfigUtil\n\n\n# noinspection PyUnresolvedReferences,DuplicatedCode\nclass TestConfigUtil(TestCase):\n    @mock.patch('liminal.core.util.files_util.load')\n    def test_safe_load(self, find_config_files_mock):\n        subliminal = {\n            'name': 'my_subliminal_test',\n            'type': 'sub',\n            'super': 'my_superliminal_test',\n            'images': [{'image': 'my_image'}],\n            'pipelines': [{'name': 'mypipe1', 'param': 'constant'}, {'name': 'mypipe2', 'param': 'constant'}],\n            'pipeline_defaults': {'param1': 'param1_value'},\n            'task_defaults': {'job_start': {'task_sub_def': 'task_sub_def_value'}},\n        }\n        superliminal = {\n            'name': 'my_superliminal_test',\n            'type': 'super',\n            'super': 'super_superliminal',\n            'images': [{'image': 'my_image', 'source': '.'}],\n            'pipeline_defaults': {'param2': 'param2super_value', 'param3': 'param3super_value'},\n            'task_defaults': {\n                'job_start': {\n                    'task_def1': 'task_def1_value',\n                    'task_def2': {\n                        'task_def2_1': 'task_def2_1_value',\n                    },\n                }\n            },\n        }\n        super_superliminal = {\n            'name': 'super_superliminal',\n            'type': 'super',\n            'pipeline_defaults': {\n                'param2': 'param2super_value',\n                'param3': 'param3hyper_value',\n                'param4': 'param4hyper_value',\n            },\n        }\n\n        expected = [\n            {\n                'executors': [{'executor': 'default_k8s', 'type': 'kubernetes'}],\n                'name': 'my_subliminal_test',\n                'pipeline_defaults': {'param1': 'param1_value'},\n                'pipelines': [\n                    {\n                        'description': 'add defaults parameters for all pipelines',\n                        'name': 'mypipe1',\n                        'param': 'constant',\n                        'param1': 'param1_value',\n                        'param2': 'param2super_value',\n                        'param3': 'param3super_value',\n                        'param4': 'param4hyper_value',\n                        'tasks': [\n                            {\n                                'task': 'start',\n                                'task_def1': 'task_def1_value',\n                                'task_def2': {'task_def2_1': 'task_def2_1_value'},\n                                'task_sub_def': 'task_sub_def_value',\n                                'type': 'job_start',\n                            },\n                            {'task': 'end', 'type': 'job_end'},\n                        ],\n                    },\n                    {\n                        'description': 'add defaults parameters for all pipelines',\n                        'name': 'mypipe2',\n                        'param': 'constant',\n                        'param1': 'param1_value',\n                        'param2': 'param2super_value',\n                        'param3': 'param3super_value',\n                        'param4': 'param4hyper_value',\n                        'tasks': [\n                            {\n                                'task': 'start',\n                                'task_def1': 'task_def1_value',\n                                'task_def2': {'task_def2_1': 'task_def2_1_value'},\n                                'task_sub_def': 'task_sub_def_value',\n                                'type': 'job_start',\n                            },\n                            {'task': 'end', 'type': 'job_end'},\n                        ],\n                    },\n                ],\n                'service_defaults': {'description': 'add defaults parameters for all ' 'services'},\n                'images': [{'image': 'my_image', 'source': '.'}],\n                'services': [],\n                'super': 'my_superliminal_test',\n                'task_defaults': {'job_start': {'task_sub_def': 'task_sub_def_value'}},\n                'type': 'sub',\n            }\n        ]\n\n        expected = [\n            {\n                'executors': [\n                    {'executor': 'default_k8s', 'type': 'kubernetes'},\n                    {'executor': 'airflow_executor', 'type': 'airflow'},\n                ],\n                'images': [{'image': 'my_image', 'source': '.'}],\n                'name': 'my_subliminal_test',\n                'pipeline_defaults': {'param1': 'param1_value'},\n                'pipelines': [\n                    {\n                        'description': 'add defaults parameters for all pipelines',\n                        'name': 'mypipe1',\n                        'param': 'constant',\n                        'param1': 'param1_value',\n                        'param2': 'param2super_value',\n                        'param3': 'param3super_value',\n                        'param4': 'param4hyper_value',\n                        'tasks': [\n                            {\n                                'executor': 'airflow_executor',\n                                'task': 'start',\n                                'task_def1': 'task_def1_value',\n                                'task_def2': {'task_def2_1': 'task_def2_1_value'},\n                                'task_sub_def': 'task_sub_def_value',\n                                'type': 'job_start',\n                            },\n                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},\n                        ],\n                    },\n                    {\n                        'description': 'add defaults parameters for all pipelines',\n                        'name': 'mypipe2',\n                        'param': 'constant',\n                        'param1': 'param1_value',\n                        'param2': 'param2super_value',\n                        'param3': 'param3super_value',\n                        'param4': 'param4hyper_value',\n                        'tasks': [\n                            {\n                                'executor': 'airflow_executor',\n                                'task': 'start',\n                                'task_def1': 'task_def1_value',\n                                'task_def2': {'task_def2_1': 'task_def2_1_value'},\n                                'task_sub_def': 'task_sub_def_value',\n                                'type': 'job_start',\n                            },\n                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},\n                        ],\n                    },\n                ],\n                'service_defaults': {'description': 'add defaults parameters for all ' 'services'},\n                'services': [],\n                'super': 'my_superliminal_test',\n                'task_defaults': {'job_start': {'task_sub_def': 'task_sub_def_value'}},\n                'type': 'sub',\n            }\n        ]\n\n        find_config_files_mock.return_value = {\n            'my_subliminal_test': subliminal,\n            'my_superliminal_test': superliminal,\n            'super_superliminal': super_superliminal,\n        }\n\n        config_util = ConfigUtil('')\n\n        self.assertEqual(expected, config_util.safe_load(is_render_variables=True))\n\n        # validate cache\n        self.assertEqual(expected, config_util.loaded_subliminals)\n\n    @mock.patch('liminal.core.util.files_util.load')\n    def test_get_config(self, find_config_files_mock):\n        find_config_files_mock.return_value = {\n            'my_subliminal_test': {'type': 'sub'},\n            'my_superliminal_test': {'type': 'super'},\n        }\n\n        config_util = ConfigUtil('')\n\n        self.assertEqual({'type': 'sub'}, config_util._ConfigUtil__get_config('my_subliminal_test'))\n\n        self.assertEqual({'type': 'super'}, config_util._ConfigUtil__get_config('my_superliminal_test'))\n\n    @mock.patch('liminal.core.util.files_util.load')\n    def test_get_superliminal(self, find_config_files_mock):\n        base = {\n            'executors': [\n                {'executor': 'default_k8s', 'type': 'kubernetes'},\n                {'executor': 'airflow_executor', 'type': 'airflow'},\n            ],\n            'name': 'base',\n            'pipeline_defaults': {\n                'after_tasks': [{'task': 'end', 'type': 'job_end'}],\n                'before_tasks': [{'task': 'start', 'type': 'job_start'}],\n                'description': 'add defaults parameters for all ' 'pipelines',\n            },\n            'service_defaults': {'description': 'add defaults parameters for all ' 'services'},\n            'task_defaults': {\n                'description': 'add defaults parameters for all tasks ' 'separate by task type',\n                'job_end': {'executor': 'airflow_executor'},\n                'job_start': {'executor': 'airflow_executor'},\n                'python': {'executor': 'default_k8s'},\n                'spark': {'executor': 'default_k8s'},\n            },\n            'type': 'super',\n        }\n        subliminal = {'name': 'subliminal_test', 'type': 'sub'}\n\n        find_config_files_mock.return_value = {'subliminal_test': subliminal}\n\n        config_util = ConfigUtil('')\n\n        self.assertEqual(base, config_util._ConfigUtil__get_superliminal(subliminal, False))\n\n        self.assertEqual({}, config_util._ConfigUtil__get_superliminal(base, False))\n\n        liminal = {'name': 'subliminal_test', 'type': 'sub', 'super': 'my_superliminal'}\n\n        with self.assertRaises(FileNotFoundError):\n            config_util._ConfigUtil__get_superliminal(liminal, False)\n\n    @mock.patch('liminal.core.util.files_util.load')\n    def test_merge_superliminals(self, find_config_files_mock):\n        superliminal = {\n            'name': 'my_superliminal_test',\n            'type': 'super',\n            'super': 'super_superliminal',\n            'pipeline_defaults': {\n                'before_tasks': [\n                    {'task': 'start-2', 'type': 'spark'},\n                ],\n                'after_tasks': [{'task': 'end-1', 'type': 'spark'}],\n            },\n            'task_defaults': {'task_def1': 'task_def1_value'},\n        }\n\n        super_superliminal = {\n            'name': 'super_superliminal',\n            'type': 'super',\n            'pipeline_defaults': {\n                'before_tasks': [{'task': 'start-1', 'type': 'spark'}],\n                'after_tasks': [{'task': 'end-2', 'type': 'spark'}],\n            },\n        }\n\n        config_util = ConfigUtil('')\n\n        find_config_files_mock.return_value = {'super_superliminal': super_superliminal, 'superliminal': superliminal}\n\n        expected = {\n            'name': 'my_superliminal_test',\n            'pipeline_defaults': {\n                'after_tasks': [{'task': 'end-1', 'type': 'spark'}, {'task': 'end-2', 'type': 'spark'}],\n                'before_tasks': [{'task': 'start-1', 'type': 'spark'}, {'task': 'start-2', 'type': 'spark'}],\n            },\n            'super': 'super_superliminal',\n            'task_defaults': {'task_def1': 'task_def1_value'},\n            'type': 'super',\n        }\n\n        self.assertEqual(\n            expected, dict(config_util._ConfigUtil__merge_superliminals(superliminal, super_superliminal))\n        )\n\n    @mock.patch('liminal.core.util.files_util.load')\n    @mock.patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True'})\n    def test_safe_load_with_variables(self, find_config_files_mock):\n        subliminal = {\n            'name': 'my_subliminal_test',\n            'type': 'sub',\n            'super': 'my_superliminal_test',\n            'variables': {\n                'var': 'simple case',\n                'var-2': '-case',\n                'var_2': '_case',\n                'image': 'prod image',\n                'a': '{{env}}1',\n                'b': '{{a}}2',\n                'c': '{{a}}{{b}}2',\n            },\n            'pipelines': [\n                {\n                    'name': 'mypipe1',\n                    'param': '{{var}}',\n                    'tasks': [\n                        {'task': 'sub_tasks', 'type': 'dummy'},\n                    ],\n                },\n                {\n                    'name': 'mypipe2',\n                    'param': '{{var-2   }}',\n                    'tasks': [\n                        {'task': 'sub_tasks', 'type': 'dummy'},\n                    ],\n                },\n            ],\n            'pipeline_defaults': {'param1': '{{var-2}}'},\n            'task_defaults': {'job_start': {'task_def1:': 'task_sub_def_value'}},\n            'services': [\n                {'name': 'my_python_server', 'type': 'python_server', 'image': '{{image}}'},\n                {'name': 'my_python_server_for_stg', 'type': 'python_server', 'image': '{{default_image}}'},\n            ],\n        }\n\n        superliminal = {\n            'name': 'my_superliminal_test',\n            'type': 'super',\n            'variables': {\n                'var-2': 'override',\n                'var3': 'super_var',\n                'default_image': 'default_image_value',\n                'image': 'default_image_value',\n            },\n            'super': 'super_superliminal',\n            'pipeline_defaults': {\n                'param2': '{{pipe-var}}',\n                'param3': 'param3super_value',\n                'before_tasks': [\n                    {'task': 'second_task', 'type': 'dummy'},\n                ],\n            },\n            'task_defaults': {\n                'pipeline': {\n                    'path': '{{var-2}}',\n                    'task_def1': 'task_def1_value',\n                    'task_def2': {\n                        'task_def2_1': 'task_def2_1_value',\n                    },\n                }\n            },\n        }\n        super_superliminal = {\n            'name': 'super_superliminal',\n            'type': 'super',\n            'variables': {'default_image': 'def_default_image_value'},\n            'pipeline_defaults': {\n                'global_conf': '{{var3}}',\n                'param2': 'param2super_value',\n                'param3': 'param3hyper_value',\n                'param4': 'param4hyper_value',\n                'after_tasks': [\n                    {'task': 'before_last_task', 'type': 'dummy'},\n                ],\n            },\n        }\n\n        expected = [\n            {\n                'executors': [\n                    {'executor': 'default_k8s', 'type': 'kubernetes'},\n                    {'executor': 'airflow_executor', 'type': 'airflow'},\n                ],\n                'name': 'my_subliminal_test',\n                'pipeline_defaults': {'param1': '-case'},\n                'pipelines': [\n                    {\n                        'description': 'add defaults parameters for all pipelines',\n                        'global_conf': 'super_var',\n                        'name': 'mypipe1',\n                        'param': 'simple case',\n                        'param1': '-case',\n                        'param2': '{{pipe-var}}',\n                        'param3': 'param3super_value',\n                        'param4': 'param4hyper_value',\n                        'tasks': [\n                            {\n                                'executor': 'airflow_executor',\n                                'task': 'start',\n                                'task_def1:': 'task_sub_def_value',\n                                'type': 'job_start',\n                            },\n                            {'task': 'second_task', 'type': 'dummy'},\n                            {'task': 'sub_tasks', 'type': 'dummy'},\n                            {'task': 'before_last_task', 'type': 'dummy'},\n                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},\n                        ],\n                    },\n                    {\n                        'description': 'add defaults parameters for all pipelines',\n                        'global_conf': 'super_var',\n                        'name': 'mypipe2',\n                        'param': '-case',\n                        'param1': '-case',\n                        'param2': '{{pipe-var}}',\n                        'param3': 'param3super_value',\n                        'param4': 'param4hyper_value',\n                        'tasks': [\n                            {\n                                'executor': 'airflow_executor',\n                                'task': 'start',\n                                'task_def1:': 'task_sub_def_value',\n                                'type': 'job_start',\n                            },\n                            {'task': 'second_task', 'type': 'dummy'},\n                            {'task': 'sub_tasks', 'type': 'dummy'},\n                            {'task': 'before_last_task', 'type': 'dummy'},\n                            {'executor': 'airflow_executor', 'task': 'end', 'type': 'job_end'},\n                        ],\n                    },\n                ],\n                'service_defaults': {'description': 'add defaults parameters for all ' 'services'},\n                'images': [],\n                'services': [\n                    {\n                        'description': 'add defaults parameters for all services',\n                        'image': 'prod image',\n                        'name': 'my_python_server',\n                        'type': 'python_server',\n                    },\n                    {\n                        'description': 'add defaults parameters for all services',\n                        'image': 'default_image_value',\n                        'name': 'my_python_server_for_stg',\n                        'type': 'python_server',\n                    },\n                ],\n                'super': 'my_superliminal_test',\n                'task_defaults': {'job_start': {'task_def1:': 'task_sub_def_value'}},\n                'type': 'sub',\n                'variables': {\n                    'a': 'myenv1',\n                    'b': 'myenv12',\n                    'c': 'myenv1myenv122',\n                    'image': 'prod image',\n                    'var': 'simple case',\n                    'var-2': '-case',\n                    'var_2': '_case',\n                },\n            }\n        ]\n\n        find_config_files_mock.return_value = {\n            'my_subliminal_test': subliminal,\n            'my_superliminal_test': superliminal,\n            'super_superliminal': super_superliminal,\n        }\n\n        config_util = ConfigUtil('')\n\n        self.assertEqual(expected, config_util.safe_load(is_render_variables=True))\n\n        # validate cache\n        self.assertEqual(expected, config_util.loaded_subliminals)\n\n    @mock.patch('os.path.exists')\n    @mock.patch('liminal.core.environment.get_airflow_home_dir')\n    @mock.patch('liminal.core.util.files_util.load')\n    @mock.patch.dict(os.environ, {'LIMINAL_STAND_ALONE_MODE': 'True', 'POD_NAMESPACE': 'my_pod_ns'})\n    def test_liminal_config_snapshot(self, find_config_files_mock, get_airflow_dir_mock, path_exists_mock):\n        subliminal = {\n            'name': 'my_subliminal_test',\n            'type': 'sub',\n            'variables': {'var': 1, 'var-2': True},\n            'pipelines': [{'name': 'mypipe1', 'param': '{{var}}'}, {'name': 'mypipe2', 'param': '{{var-2   }}'}],\n        }\n\n        expected = {\n            'name': 'my_subliminal_test',\n            'type': 'sub',\n            'executors': [\n                {'executor': 'default_k8s', 'type': 'kubernetes'},\n                {'executor': 'airflow_executor', 'type': 'airflow'},\n            ],\n            'service_defaults': {'description': 'add defaults parameters for all services'},\n            'task_defaults': {\n                'description': 'add defaults parameters for all tasks separate by task type',\n                'python': {'executor': 'default_k8s'},\n                'spark': {'executor': 'default_k8s'},\n                'job_end': {'executor': 'airflow_executor'},\n                'job_start': {'executor': 'airflow_executor'},\n            },\n            'pipeline_defaults': {\n                'description': 'add defaults parameters for all pipelines',\n                'before_tasks': [{'task': 'start', 'type': 'job_start'}],\n                'after_tasks': [{'task': 'end', 'type': 'job_end'}],\n            },\n            'variables': {'var': 1, 'var-2': True},\n            'pipelines': [\n                {\n                    'name': 'mypipe1',\n                    'param': '1',\n                    'description': 'add defaults parameters for all pipelines',\n                    'tasks': [\n                        {'task': 'start', 'type': 'job_start', 'executor': 'airflow_executor'},\n                        {'task': 'end', 'type': 'job_end', 'executor': 'airflow_executor'},\n                    ],\n                },\n                {\n                    'name': 'mypipe2',\n                    'param': 'True',\n                    'description': 'add defaults parameters for all pipelines',\n                    'tasks': [\n                        {'task': 'start', 'type': 'job_start', 'executor': 'airflow_executor'},\n                        {'task': 'end', 'type': 'job_end', 'executor': 'airflow_executor'},\n                    ],\n                },\n            ],\n            'images': [],\n            'services': [],\n        }\n\n        find_config_files_mock.return_value = {'my_subliminal_test': subliminal}\n\n        get_airflow_dir_mock.return_value = '/tmp'\n        path_exists_mock.return_value = True\n\n        with mock.patch('builtins.open', mock.mock_open()) as m:\n            with mock.patch('yaml.dump') as ydm:\n                config_util = ConfigUtil('')\n                config_util.safe_load(is_render_variables=True)\n                config_util.snapshot_final_liminal_configs()\n\n                m.assert_called_once_with(os.path.join('/tmp', '../liminal_config_files/my_subliminal_test.yml'), 'w')\n                ydm.assert_called_once_with(expected, m.return_value, default_flow_style=False)\n\n    @mock.patch('liminal.core.util.files_util.load')\n    def test_soft_merge_load(self, find_config_files_mock):\n        subliminal = {'name': 'my_name', 'type': 'sub', 'super': 'my_super'}\n        find_config_files_mock.return_value = {'my_subliminal_test': subliminal}\n\n        config_util = ConfigUtil('')\n\n        self.assertEqual([subliminal], config_util.safe_load(is_render_variables=True, soft_merge=True))\n\n    def test_non_soft_merge_load(self):\n        subliminal = {'name': 'my_name', 'type': 'sub', 'super': 'my_super'}\n\n        config_util = ConfigUtil('')\n\n        self.assertRaises(FileNotFoundError, config_util._ConfigUtil__get_superliminal, subliminal, False)\n"
  },
  {
    "path": "tests/liminal/core/util/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/core/util/test_dict_utils.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nfrom unittest import TestCase, mock\n\nfrom liminal.core.util import dict_util\n\n\nclass TestDictUtils(TestCase):\n    def setUp(self) -> None:\n        self.dict1 = {\"env\": \"env1\", \"env_dict\": {\"env3\": \"env3\"}, \"env4\": \"env4\"}\n\n        self.dict2 = {\"env\": \"env1\", \"env_dict\": {\"env2\": \"env2\"}}\n\n    def test_merge_dicts(self):\n        expected = {\"env\": \"env1\", \"env4\": \"env4\", \"env_dict\": {\"env2\": \"env2\"}}\n\n        self.assertEqual(expected, dict_util.merge_dicts(self.dict1, self.dict2))\n\n    def test_recursive_merge_dicts(self):\n        expected = {\"env\": \"env1\", \"env4\": \"env4\", \"env_dict\": {\"env2\": \"env2\", \"env3\": \"env3\"}}\n        self.assertEqual(expected, dict_util.merge_dicts(self.dict1, self.dict2, True))\n\n    def test_merge_with_empty(self):\n        self.assertEqual(self.dict2, dict_util.merge_dicts({}, self.dict2, True))\n        self.assertEqual(self.dict2, dict_util.merge_dicts({}, self.dict2))\n\n        self.assertEqual(self.dict2, dict_util.merge_dicts(self.dict2, {}, True))\n        self.assertEqual(self.dict2, dict_util.merge_dicts(self.dict2, {}))\n\n    def test_replace_variables_simple_case(self):\n        dct = {\n            \"env\": \"{{env_var}}\",\n            \"env_dict\": {\"env3\": \"{{ var1 }}\"},\n            \"env4\": \"{{var2 }}\",\n            \"env5\": \"{{{var2}}\",\n            \"env6\": \"{{var3}}\",\n        }\n\n        variables = {\"env_var\": \"env value\", \"var1\": \"value1\", \"var2\": \"value2\"}\n\n        expected = {\n            \"env\": \"env value\",\n            \"env_dict\": {\"env3\": \"value1\"},\n            \"env4\": \"value2\",\n            \"env5\": \"{value2\",\n            \"env6\": \"{{var3}}\",\n        }\n\n        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))\n\n    def test_replace_variables_empty_var(self):\n        dct = {\n            \"env\": \"{{env_var}}\",\n            \"env_dict\": {\"env3\": \"{{ var1 }}\"},\n            \"env4\": \"{{var2 }}\",\n            \"env5\": \"{{{var2}}\",\n            \"env6\": \"{{var3}}\",\n        }\n        self.assertEqual(dct, dict_util.replace_placeholders(dct, {}))\n\n    @mock.patch.dict(os.environ, {\"LIMINAL_STAND_ALONE_MODE\": \"False\"})\n    @mock.patch('airflow.models.Variable.get')\n    def test_replace_variables_flat_replace(self, airflow_variable_mock):\n        def airflow_variable_values(key, default_var):\n            return 'liminal playground' if key == 'playground' else default_var\n\n        airflow_variable_mock.side_effect = airflow_variable_values\n\n        dct = {\n            \"query\": \"select * from my_table \" \"where event_type = {{event_type}} and region = {{region}}\",\n            \"env\": \"{{prod}}, {{stg}}, {{playground}}\",\n            \"optional\": \"{{optionals}}\",\n        }\n\n        variables = {\n            \"region\": \"us_east_1\",\n            \"event_type\": \"subscription\",\n            \"prod\": \"liminal production\",\n            \"stg\": \"liminal staging\",\n        }\n\n        expected = {\n            'query': 'select * from my_table ' 'where event_type = subscription and region = us_east_1',\n            \"env\": \"liminal production, liminal staging, liminal playground\",\n            \"optional\": \"{{optionals}}\",\n        }\n\n        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))\n\n    @mock.patch.dict(os.environ, {\"LIMINAL_STAND_ALONE_MODE\": \"False\"})\n    @mock.patch('airflow.models.Variable.get')\n    def test_replace_variables_with_nested_list(self, airflow_variable_mock):\n        def airflow_variable_values(key, default_var):\n            return 'liminal playground' if key == 'playground' else default_var\n\n        airflow_variable_mock.side_effect = airflow_variable_values\n\n        dct = {\n            \"query\": \"select * from my_table \" \"where event_type = {{event_type}} and region = {{region}}\",\n            \"env\": ['{{prod}}', '{{stg}}', '{{playground}}'],\n            \"tasks\": [{'id': 'id1', 'image': '{{image}}'}],\n            \"optional\": \"{{optionals}}\",\n        }\n\n        variables = {\n            \"region\": \"us_east_1\",\n            \"event_type\": \"subscription\",\n            \"prod\": \"liminal production\",\n            \"stg\": \"liminal staging\",\n            \"image\": \"my_image_name\",\n        }\n\n        expected = {\n            'env': ['liminal production', 'liminal staging', 'liminal playground'],\n            'optional': '{{optionals}}',\n            'query': 'select * from my_table where event_type = subscription and region = ' 'us_east_1',\n            'tasks': [{'id': 'id1', 'image': 'my_image_name'}],\n        }\n\n        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))\n\n    @mock.patch.dict(os.environ, {\"table\": \"my_table\", \"LIMINAL_STAND_ALONE_MODE\": \"True\"})\n    def test_replace_variables_from_env(self):\n        dct = {\n            \"query\": \"select * from my_table \"\n            \"where event_type = {{event_type}} and region = {{region}} from {{table}}\"\n        }\n\n        variables = {}\n\n        expected = {\n            'query': 'select * from my_table '\n            'where event_type = {{event_type}} '\n            'and region = {{region}} from my_table'\n        }\n\n        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))\n\n    @mock.patch.dict(os.environ, {\"table\": \"my_table\", \"LIMINAL_STAND_ALONE_MODE\": \"True\"})\n    def test_replace_variables_from_variable_and_not_env(self):\n        dct = {\n            \"query\": \"select * from my_table \"\n            \"where event_type = {{event_type}} and region = {{region}} from {{table}}\"\n        }\n\n        variables = {\"table\": \"my_variable_table\"}\n\n        expected = {\n            'query': 'select * from my_table '\n            'where event_type = {{event_type}} '\n            'and region = {{region}} from my_variable_table'\n        }\n\n        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))\n\n    @mock.patch.dict(os.environ, {\"table\": \"my_table\", \"LIMINAL_STAND_ALONE_MODE\": \"False\"})\n    @mock.patch('airflow.models.Variable.get')\n    def test_replace_variables_from_airflow_and_not_enc(self, airflow_variable_mock):\n        def airflow_variable_values(key, default_var):\n            return 'my_airflow_table' if key == 'table' else default_var\n\n        airflow_variable_mock.side_effect = airflow_variable_values\n\n        dct = {\n            \"query\": \"select * from my_table \"\n            \"where event_type = {{event_type}} and region = {{region}} from {{table}}\"\n        }\n\n        variables = {}\n\n        expected = {\n            'query': 'select * from my_table '\n            'where event_type = {{event_type}} '\n            'and region = {{region}} from my_airflow_table'\n        }\n\n        self.assertEqual(expected, dict_util.replace_placeholders(dct, variables))\n"
  },
  {
    "path": "tests/liminal/kubernetes/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/liminal/kubernetes/test_volume_util.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport sys\nimport unittest\n\nfrom kubernetes import config\n\nfrom liminal.kubernetes import volume_util\n\ntry:\n    config.load_kube_config()\nexcept Exception:\n    msg = \"Kubernetes is not running\\n\"\n    sys.stdout.write(f\"INFO: {msg}\")\n\n\nclass TestKubernetesVolume(unittest.TestCase):\n    def setUp(self) -> None:\n        self.config = {\n            'volumes': [\n                {\n                    'volume': 'gettingstartedvol-test',\n                    'claim_name': 'gettingstartedvol-test-pvc',\n                    'local': {'path': '.'},\n                }\n            ]\n        }\n\n    def test_volume_config(self):\n        volumes_config = volume_util.get_volume_configs(self.config, \".\")\n        self.assertEqual(str(self.config['volumes'][0]), str(volumes_config[0]))\n\n    def test_create_volume(self):\n        self._delete_volumes()\n        self._create_volumes()\n        matching_volumes = volume_util._list_persistent_volumes(self.config['volumes'][0]['volume'])\n        self.assertTrue(\n            self.config['volumes'][0]['volume'] in matching_volumes[0]['metadata']['name'],\n            self.config['volumes'][0]['claim_name'] in matching_volumes[0]['spec']['claim_ref']['name'],\n        )\n\n    def test_delete_volume(self):\n        self._create_volumes()\n        self._delete_volumes()\n        matching_volumes = volume_util._list_persistent_volumes(self.config['volumes'][0]['volume'])\n        self.assertEqual([], matching_volumes)\n\n    def _create_volumes(self):\n        volume_util.create_local_volumes(self.config, \".\")\n\n    def _delete_volumes(self):\n        volume_util.delete_local_volumes(self.config, \".\")\n"
  },
  {
    "path": "tests/runners/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/build/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/build/http/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/build/http/python/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/build/http/python/test_python_server_image_builder.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\nimport logging\nimport os\nimport threading\nimport time\nimport unittest\nimport urllib.request\nfrom unittest import TestCase\n\nimport docker\n\nfrom liminal.build.image.python_server.python_server import PythonServerImageBuilder\nfrom liminal.build.python import PythonImageVersions\n\nIMAGE_NAME = 'liminal_server_image'\n\n\nclass TestPythonServer(TestCase):\n    def setUp(self) -> None:\n        super().setUp()\n        self.docker_client = docker.from_env()\n        self.config = self.__create_conf()\n        self.__remove_containers()\n\n    def tearDown(self) -> None:\n        self.__remove_containers()\n        self.docker_client.close()\n\n    def test_build_python_server(self):\n        versions = list(PythonImageVersions().supported_versions)\n        for version in versions:\n            build_out = self.__test_build_python_server(python_version=version)\n            self.assertTrue('RUN pip install -r requirements.txt' in build_out, 'Incorrect pip command')\n\n    def test_build_python_server_with_pip_conf(self):\n        build_out = self.__test_build_python_server(use_pip_conf=True)\n\n        self.assertTrue(\n            'RUN --mount=type=secret,id=pip_config,dst=/etc/pip.conf  pip install' in build_out,\n            'Incorrect pip command',\n        )\n\n    def __test_build_python_server(self, use_pip_conf=False, python_version=None):\n        base_path = os.path.join(os.path.dirname(__file__), '../../../liminal')\n\n        config = self.__create_conf()\n\n        if use_pip_conf:\n            config['pip_conf'] = os.path.join(base_path, 'pip.conf')\n\n        if python_version:\n            config['python_version'] = python_version\n\n        builder = PythonServerImageBuilder(\n            config=config, base_path=base_path, relative_source_path='myserver', tag=IMAGE_NAME\n        )\n\n        build_out = str(builder.build())\n\n        thread = threading.Thread(target=self.__run_container)\n        thread.start()\n\n        time.sleep(20)\n\n        logging.info('Sending request to server')\n\n        json_string = '{\"key1\": \"val1\", \"key2\": \"val2\"}'\n\n        encoding = 'ascii'\n\n        server_response = str(\n            urllib.request.urlopen('http://localhost:9294/myendpoint1', data=json_string.encode(encoding))\n            .read()\n            .decode(encoding)\n        )\n\n        logging.info(f'Response from server: {server_response}')\n\n        self.assertEqual(f'Input was: {json.loads(json_string)}', server_response)\n\n        server_favicon_response = urllib.request.urlopen('http://localhost:9294/favicon.ico')\n\n        self.assertEqual(204, server_favicon_response.status)\n\n        self.__remove_containers()\n\n        return build_out\n\n    def __remove_containers(self):\n        logging.info(f'Stopping containers with image: {IMAGE_NAME}')\n\n        matching_containers = self.__get_docker_containers()\n\n        for container in matching_containers:\n            container_id = container.id\n            logging.info(f'Stopping container {container_id}')\n            self.docker_client.api.stop(container_id)\n            logging.info(f'Removing container {container_id}')\n            self.docker_client.api.remove_container(container_id)\n\n        while len(matching_containers) > 0:\n            matching_containers = self.__get_docker_containers()\n\n    def __get_docker_containers(self):\n        return self.docker_client.containers.list(filters={'ancestor': IMAGE_NAME})\n\n    def __run_container(self):\n        try:\n            logging.info(f'Running container for image: {IMAGE_NAME}')\n            self.docker_client.containers.run(IMAGE_NAME, ports={'80/tcp': 9294}, detach=True)\n        except Exception as err:\n            logging.exception(err)\n            pass\n\n    @staticmethod\n    def __create_conf():\n        return {\n            'image': IMAGE_NAME,\n            'cmd': 'foo bar',\n            'source': 'baz',\n            'input_type': 'my_input_type',\n            'input_path': 'my_input',\n            'output_path': '/my_output.json',\n            'no_cache': True,\n            'endpoints': [{'endpoint': '/myendpoint1', 'module': 'my_server', 'function': 'myendpoint1func'}],\n        }\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "tests/runners/airflow/build/python/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/build/python/test_python_image_builder.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport shutil\nimport tempfile\nfrom unittest import TestCase\n\nimport docker\n\nfrom liminal.build.image.python.python import PythonImageBuilder\nfrom liminal.build.python import PythonImageVersions\n\n\nclass TestPythonImageBuilder(TestCase):\n    __IMAGE_NAME = 'my_python_task_img'\n\n    def setUp(self) -> None:\n        super().setUp()\n        os.environ['TMPDIR'] = '/tmp'\n        self.temp_dir = self.__temp_dir()\n        self.temp_airflow_dir = self.__temp_dir()\n\n    def tearDown(self) -> None:\n        super().tearDown()\n        self.__remove_dir(self.temp_dir)\n        self.__remove_dir(self.temp_airflow_dir)\n\n    def test_build(self):\n        for python_version in [None, PythonImageVersions().supported_versions[0]]:\n            build_out = self.__test_build(python_version=python_version)\n        self.assertTrue('RUN pip install -r requirements.txt' in build_out, 'Incorrect pip command')\n\n        self.__test_image()\n\n    def test_build_with_pip_conf(self):\n        build_out = self.__test_build(use_pip_conf=True)\n\n        self.assertTrue(\n            'RUN --mount=type=secret,id=pip_config,dst=/etc/pip.conf  pip install' in build_out,\n            'Incorrect pip command',\n        )\n\n        self.__test_image()\n\n    def test_with_unsupported_python_version(self):\n        with self.assertRaises(ValueError):\n            self.__test_build(python_version='3.5.2')\n\n    def __test_build(self, use_pip_conf=False, python_version=None):\n        config = self.__create_conf('my_task')\n\n        base_path = os.path.join(os.path.dirname(__file__), '../../liminal')\n\n        if use_pip_conf:\n            config['pip_conf'] = os.path.join(base_path, 'pip.conf')\n\n        if python_version:\n            config['python_version'] = python_version\n\n        builder = PythonImageBuilder(\n            config=config, base_path=base_path, relative_source_path='write_inputs', tag=self.__IMAGE_NAME\n        )\n\n        build_out = str(builder.build())\n\n        return build_out\n\n    def __test_image(self):\n        docker_client = docker.from_env()\n        docker_client.images.get(self.__IMAGE_NAME)\n\n        cmds = ['/bin/bash', '-c', 'python write_inputs.py']\n\n        container_log = docker_client.containers.run(\n            self.__IMAGE_NAME,\n            cmds,\n            volumes={self.temp_dir: {'bind': '/mnt/vol1', 'mode': 'rw'}},\n            environment={'NUM_FILES': 10, 'NUM_SPLITS': 3},\n        )\n\n        docker_client.close()\n\n        logging.info(container_log)\n\n        self.assertEqual(\n            \"b'\"\n            \"Writing input file /mnt/vol1/inputs/0/input0.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/1/input1.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/2/input2.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/0/input3.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/1/input4.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/2/input5.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/0/input6.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/1/input7.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/2/input8.json\\\\n\"\n            \"Writing input file /mnt/vol1/inputs/0/input9.json\\\\n\"\n            \"'\",\n            str(container_log),\n        )\n\n    def __create_conf(self, task_id):\n        return {\n            'task': task_id,\n            'cmd': 'foo bar',\n            'image': self.__IMAGE_NAME,\n            'source': 'baz',\n            'no_cache': True,\n        }\n\n    @staticmethod\n    def __temp_dir():\n        temp_dir = tempfile.mkdtemp()\n        return temp_dir\n\n    @staticmethod\n    def __remove_dir(temp_dir):\n        shutil.rmtree(temp_dir, ignore_errors=True)\n"
  },
  {
    "path": "tests/runners/airflow/build/spark/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/build/spark/test_spark_image_builder.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport logging\nimport os\nimport shutil\nimport tempfile\nfrom unittest import TestCase\n\nimport docker\n\nfrom liminal.build.image.spark.spark import SparkImageBuilder\n\n\nclass TestSparkImageBuilder(TestCase):\n    __IMAGE_NAME = 'my_spark_image'\n\n    def setUp(self) -> None:\n        super().setUp()\n        os.environ['TMPDIR'] = '/tmp'\n        self.temp_dir = self.__temp_dir()\n        self.temp_airflow_dir = self.__temp_dir()\n\n    def tearDown(self) -> None:\n        super().tearDown()\n        self.__remove_dir(self.temp_dir)\n        self.__remove_dir(self.temp_airflow_dir)\n\n    def test_build(self):\n        build_out = self.__test_build()\n        self.assertTrue('RUN pip install -r requirements.txt' in build_out, 'Incorrect pip command')\n\n        self.__test_image()\n\n    def __test_build(self):\n        config = self.__create_conf('my_task')\n\n        base_path = os.path.join(os.path.dirname(__file__), '../../../apps/test_spark_app')\n\n        builder = SparkImageBuilder(\n            config=config, base_path=base_path, relative_source_path='wordcount', tag=self.__IMAGE_NAME\n        )\n\n        build_out = str(builder.build())\n\n        return build_out\n\n    def __test_image(self):\n        docker_client = docker.from_env()\n        docker_client.images.get(self.__IMAGE_NAME)\n\n        cmds = ['spark-submit', 'wordcount.py', 'words.txt', '/mnt/vol1/outputs']\n\n        container_log = docker_client.containers.run(\n            self.__IMAGE_NAME, cmds, volumes={self.temp_dir: {'bind': '/mnt/vol1', 'mode': 'rw'}}\n        )\n\n        docker_client.close()\n\n        logging.info(container_log)\n\n        self.assertEqual(\n            \"b'\\\\n\"\n            \"my: 1\\\\n\"\n            \"first: 1\\\\n\"\n            \"liminal: 1\\\\n\"\n            \"spark: 1\\\\n\"\n            \"task: 1\\\\n\"\n            \"writing the results to /mnt/vol1/outputs\\\\n'\",\n            str(container_log),\n        )\n\n    def __create_conf(self, task_id):\n        return {\n            'task': task_id,\n            'cmd': 'foo bar',\n            'image': self.__IMAGE_NAME,\n            'source': 'baz',\n            'no_cache': True,\n        }\n\n    @staticmethod\n    def __temp_dir():\n        temp_dir = tempfile.mkdtemp()\n        return temp_dir\n\n    @staticmethod\n    def __remove_dir(temp_dir):\n        shutil.rmtree(temp_dir, ignore_errors=True)\n"
  },
  {
    "path": "tests/runners/airflow/build/test_liminal_apps_builder.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport unittest\nfrom unittest import TestCase\n\nimport docker\n\nfrom liminal.build import liminal_apps_builder\n\n\nclass TestLiminalAppsBuilder(TestCase):\n    __image_names = ['my_python_task_img', 'my_parallelized_python_task_img']\n\n    def setUp(self) -> None:\n        self.docker_client = docker.client.from_env()\n        self.__remove_images()\n\n    def tearDown(self) -> None:\n        self.__remove_images()\n        self.docker_client.close()\n\n    def __remove_images(self):\n        for image_name in self.__image_names:\n            if len(self.docker_client.images.list(image_name)) > 0:\n                self.docker_client.images.remove(image=image_name, force=True)\n\n    def test_build_liminal(self):\n        liminal_apps_builder.build_liminal_apps(os.path.join(os.path.dirname(__file__), '../liminal'))\n\n        for image in self.__image_names:\n            self.docker_client.images.get(image)\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "tests/runners/airflow/compiler/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/dag/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/dag/test_liminal_dags.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport unittest\nfrom unittest import TestCase, mock\n\nfrom liminal.core.config.config import ConfigUtil\nfrom liminal.runners.airflow.dag import liminal_register_dags\nfrom liminal.runners.airflow.operators.job_status_operator import (\n    JobEndOperator,\n    JobStartOperator,\n)\n\n\nclass Test(TestCase):\n    def test_register_dags(self):\n        dags = self.get_register_dags()\n\n        self.assertEqual(len(dags), 1)\n\n        test_pipeline = dags[0][1]\n\n        # TODO: elaborate tests to assert all dags have correct tasks\n        self.assertEqual(test_pipeline.dag_id, 'my_pipeline')\n\n    def test_default_start_task(self):\n        dags = self.get_register_dags()[0]\n\n        task_dict = dags[1].task_dict\n\n        self.assertIsInstance(task_dict['start'], JobStartOperator)\n\n    def test_default_end_task(self):\n        dags = self.get_register_dags()[0]\n\n        task_dict = dags[1].task_dict\n\n        self.assertIsInstance(task_dict['end'], JobEndOperator)\n\n    def test_default_args(self):\n        dag = self.get_register_dags()[0]\n        default_args = dag[1].default_args\n\n        keys = default_args.keys()\n        self.assertIn('default_arg_loaded', keys)\n        self.assertIn('default_array_loaded', keys)\n        self.assertIn('default_object_loaded', keys)\n\n    @staticmethod\n    @mock.patch.object(ConfigUtil, \"snapshot_final_liminal_configs\")\n    def get_register_dags(mock_snapshot_final_liminal_configs):\n        mock_snapshot_final_liminal_configs.side_effect = None\n        base_path = os.path.join(os.path.dirname(__file__), '../../apps/test_app')\n        return liminal_register_dags.register_dags(base_path)\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "tests/runners/airflow/executors/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/executors/test_airflow_executor.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom unittest import TestCase\nfrom unittest.mock import MagicMock\n\nfrom liminal.runners.airflow.executors import airflow\nfrom liminal.runners.airflow.tasks.airflow import AirflowTask\n\n\nclass TestAirflowExecutorTask(TestCase):\n    \"\"\"\n    Test AirflowExecutor task\n    \"\"\"\n\n    def test_apply_task_to_dag(self):\n        task0 = MagicMock(spec=AirflowTask)\n\n        task0.apply_task_to_dag = MagicMock()\n\n        airflow.AirflowExecutor(\"executor-task\", {}, {}).apply_task_to_dag(task=task0)\n\n        task0.apply_task_to_dag.assert_called_once()\n"
  },
  {
    "path": "tests/runners/airflow/executors/test_emr.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom copy import deepcopy\nfrom unittest import TestCase, mock\nfrom unittest.mock import MagicMock\n\nimport boto3\nfrom airflow.contrib.hooks.emr_hook import EmrHook\nfrom airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator\nfrom airflow.contrib.sensors.emr_step_sensor import EmrStepSensor\nfrom moto import mock_emr\n\nfrom liminal.runners.airflow import DummyDag\nfrom liminal.runners.airflow.executors.emr import EMRExecutor\nfrom liminal.runners.airflow.operators.operator_with_variable_resolving import (\n    OperatorWithVariableResolving,\n)\nfrom liminal.runners.airflow.tasks import hadoop\nfrom tests.util import dag_test_utils\n\n\n@mock_emr\nclass TestEMRExecutorTask(TestCase):\n    \"\"\"\n    Test EMRExecutor task\n    \"\"\"\n\n    def setUp(self) -> None:\n        self.run_job_flow_args = dict(\n            Instances={\n                \"InstanceCount\": 1,\n                \"KeepJobFlowAliveWhenNoSteps\": True,\n                \"MasterInstanceType\": \"c3.medium\",\n                \"Placement\": {\"AvailabilityZone\": \"us-east-1\"},\n                \"SlaveInstanceType\": \"c3.xlarge\",\n            },\n            JobFlowRole=\"EMR_EC2_DefaultRole\",\n            LogUri=\"s3://liminal/log\",\n            Name=\"test-emr-cluster\",\n            ServiceRole=\"EMR_DefaultRole\",\n            VisibleToAllUsers=True,\n        )\n\n        self.client = boto3.client(\"emr\", region_name=\"us-east-1\")\n\n        args = deepcopy(self.run_job_flow_args)\n\n        self.cluster_id = self.client.run_job_flow(**args)[\"JobFlowId\"]\n\n        self.dag = dag_test_utils.create_dag()\n        self.dag.context = DummyDag(dag_id=self.dag.dag_id, task_id=\"\").context\n        self.executor_name = 'test-emr-cluster'\n        executor_config = {\n            'executor': self.executor_name,\n            'cluster_name': self.executor_name,\n            'aws_conn_id': 'us-east-1',\n            'type': 'emr',\n            'properties': {'ActionOnFailure': 'CONTINUE'},\n        }\n        self.hadoop_task = MagicMock(spec=hadoop.HadoopTask)\n        self.hadoop_task.get_runnable_command.return_value = ['spark-submit', 'test', 'params', '--param']\n        self.hadoop_task.task_id = 'spark-task'\n        self.hadoop_task.dag = self.dag\n        self.hadoop_task.trigger_rule = 'all_done'\n        self.hadoop_task.parent = None\n        self.hadoop_task.task_config = {}\n        self.hadoop_task.variables = {}\n\n        self.emr = EMRExecutor(self.executor_name, liminal_config={}, executor_config=executor_config)\n\n    def test_apply_task_to_dag(self):\n        self.emr.apply_task_to_dag(task=self.hadoop_task)\n\n        self.assertEqual(len(self.dag.tasks), 2)\n\n        self.assertIsInstance(self.dag.tasks[0], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[0].operator_delegate, EmrAddStepsOperator)\n        self.assertIsInstance(self.dag.tasks[1], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[1].operator_delegate, EmrStepSensor)\n\n    @mock.patch.object(EmrHook, 'get_conn')\n    def test_add_step(self, mock_emr_hook_get_conn):\n        aws_region = 'us-east-1'\n        mock_emr_hook_get_conn.return_value = boto3.client('emr', region_name=aws_region)\n\n        self.emr.apply_task_to_dag(task=self.hadoop_task)\n\n        emr_add_step_task = self.dag.tasks[0]\n\n        self.assertIsInstance(emr_add_step_task, OperatorWithVariableResolving)\n        self.assertIsInstance(emr_add_step_task.operator_delegate, EmrAddStepsOperator)\n\n        emr_add_step_task.render_template_fields({})\n\n        step_id = emr_add_step_task.execute(self.dag.context)[0]\n\n        desc_step = self.client.describe_step(ClusterId=self.cluster_id, StepId=step_id)['Step']\n\n        self.assertEqual(\n            desc_step['Config'],\n            {'Jar': 'command-runner.jar', 'Properties': {}, 'Args': ['spark-submit', 'test', 'params', '--param']},\n        )\n\n        self.assertEqual(desc_step['Name'], 'spark-task')\n\n        self.assertEqual(desc_step['ActionOnFailure'], 'CONTINUE')\n\n    def test_watch_step(self):\n        self.emr.apply_task_to_dag(task=self.hadoop_task, parents=[])\n\n        emr_watch_step_task = self.dag.tasks[1]\n\n        self.assertIsInstance(emr_watch_step_task, OperatorWithVariableResolving)\n        self.assertIsInstance(emr_watch_step_task.operator_delegate, EmrStepSensor)\n\n        # todo - elaborate tests\n"
  },
  {
    "path": "tests/runners/airflow/liminal/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/liminal/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: MyPipeline\nvolumes:\n  - volume: myvol1\n    local:\n      path: /tmp/liminal_tests\nsecrets:\n  - secret: aws\n    local_path_file: \"~/.aws/credentials\"\nimages:\n  - image: my_python_task_img\n    type: python\n    source: write_inputs\n    no_cache: true\n  - image: my_parallelized_python_task_img\n    type: python\n    source: write_outputs\n    no_cache: true\n  - image: my_server_image\n    type: python_server\n    source: myserver\n    no_cache: true\n    endpoints:\n      - endpoint: /myendpoint1\n        module: my_server\n        function: myendpoint1func\npipelines:\n  - pipeline: my_pipeline\n    owner: Bosco Albert Baracus\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    default_arg_loaded: check\n    default_array_loaded: [2, 3, 4]\n    default_object_loaded:\n      key1: val1\n      key2: val2\n    metrics:\n      namespace: TestNamespace\n      backends: []\n    tasks:\n      - task: my_python_task\n        type: python\n        description: static input task\n        image: my_python_task_img\n        env_vars:\n          NUM_FILES: 10\n          NUM_SPLITS: 3\n          AWS_CONFIG_FILE: \"/mnt/credentials\"\n          AWS_PROFILE: \"dev\"\n        secrets:\n          - secret: aws\n            remote_path: \"/mnt\"\n        mounts:\n          - mount: mymount\n            volume: myvol1\n            path: /mnt/vol1\n        cmd: python -u write_inputs.py\n      - task: my_parallelized_python_task\n        type: python\n        description: parallelized python task\n        image: my_parallelized_python_task_img\n        env_vars:\n          FOO: BAR\n        executors: 3\n        mounts:\n          - mount: mymount\n            volume: myvol1\n            path: /mnt/vol1\n        cmd: python -u write_inputs.py\nservices:\n  - service: my_python_server\n    description: my python server\n    image: my_server_image\n"
  },
  {
    "path": "tests/runners/airflow/liminal/myserver/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/liminal/myserver/my_server.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nimport json\n\n\ndef myendpoint1func(input_json):\n    input_dict = json.loads(input_json) if input_json else {}\n    return f'Input was: {input_dict}'\n"
  },
  {
    "path": "tests/runners/airflow/liminal/pip.conf",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/liminal/requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\npillow\n"
  },
  {
    "path": "tests/runners/airflow/liminal/write_inputs/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/liminal/write_inputs/write_inputs.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\nimport os\n\ninputs_dir = '/mnt/vol1/inputs/'\n\nnum_files = int(os.environ['NUM_FILES'])\nnum_splits = int(os.environ['NUM_SPLITS'])\n\n# create input files - split by round robin\nfor i in range(0, num_files):\n    split_id = i % num_splits\n    split_dir = os.path.join(inputs_dir, str(split_id))\n\n    if not os.path.exists(split_dir):\n        os.makedirs(split_dir)\n\n    filename = os.path.join(split_dir, f'input{i}.json')\n    with open(filename, 'w') as f:\n        print(f'Writing input file {filename}')\n        f.write(json.dumps({'mykey': f'myval{i}'}))\n"
  },
  {
    "path": "tests/runners/airflow/liminal/write_outputs/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/liminal/write_outputs/write_outputs.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\nimport logging\nimport os\n\n# Needs to be fixed in the following Jira:\n# https://issues.apache.org/jira/browse/LIMINAL-76\ninputs_dir = f'/mnt/vol1/inputs/'\noutputs_dir = '/mnt/vol1/outputs/'\n\nif not os.path.exists(outputs_dir):\n    os.makedirs(outputs_dir)\n\nfor directory in os.listdir(inputs_dir):\n    logging.info(f'Running write_outputs for split id {directory}')\n    inputs_dir_file = os.path.join(inputs_dir, directory)\n    for filename in os.listdir(inputs_dir_file):\n        with open(os.path.join(inputs_dir_file, filename)) as infile, open(\n            os.path.join(outputs_dir, filename.replace('input', 'output').replace('.json', '.txt')), 'w'\n        ) as outfile:\n            logging.info(f'Writing output file: {outfile.name}')\n            data = json.loads(infile.read())\n            outfile.write(data['mykey'])\n"
  },
  {
    "path": "tests/runners/airflow/operators/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/operators/test_operator_with_variable_resolving.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nfrom unittest import TestCase\nfrom unittest.mock import MagicMock, patch\n\nfrom airflow.models import BaseOperator\n\nfrom liminal.runners.airflow.operators.operator_with_variable_resolving import (\n    OperatorWithVariableResolving,\n)\n\n\nclass TestKubernetesPodOperatorWithAutoImage(TestCase):\n    @patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True', 'nested': 'value1'})\n    def test_value_from_environment_and_liminal_config(self):\n        variables = {'my-image': 'my-image', 'something-value1': 'stg'}\n        operator = TestOperator(task_id='abc', field='{{env}}', expected='{{my-image}}')\n        ret_val = OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables=variables\n        ).execute({})\n\n        self.assertEqual(operator.field, 'myenv')\n        self.assertEqual(ret_val, 'my-image')\n\n    @patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True', 'nested': 'value1'})\n    def test_value_from_task_variables(self):\n        variables = {'my-image': 'my-image', 'my_dict_var': {'env': 'myenv2'}}\n        operator = TestOperator(task_id='abc', field='{{env}}', expected='{{my-image}}')\n        ret_val = OperatorWithVariableResolving(\n            dag=None,\n            operator=operator,\n            task_id='my_task',\n            task_config={'variables': 'my_dict_var'},\n            variables=variables,\n        ).execute({})\n\n        self.assertEqual(operator.field, 'myenv2')\n        self.assertEqual(ret_val, 'my-image')\n\n    @patch.dict(os.environ, {'env': 'myenv', 'LIMINAL_STAND_ALONE_MODE': 'True', 'nested': 'value1'})\n    def test_value_from_inline_task_variables(self):\n        variables = {'my-image': 'my-image'}\n        operator = TestOperator(task_id='abc', field='{{env}}', expected='{{my-image}}')\n        ret_val = OperatorWithVariableResolving(\n            dag=None,\n            operator=operator,\n            task_id='my_task',\n            task_config={'variables': {'env': 'myenv2'}},\n            variables=variables,\n        ).execute({})\n\n        self.assertEqual(operator.field, 'myenv2')\n        self.assertEqual(ret_val, 'my-image')\n\n    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})\n    def test_nested_variables(self):\n        variables = {\n            'nested-the-s3-location': 'good',\n            'my-image': 'my-image',\n            'something-value1': 'stg',\n            'env_val-staging': 'the-s3-location',\n            'nested': 'value1',\n        }\n        operator = TestOperator(\n            task_id='abc', field='something-{{nested-{{env_val-{{env}}}}}}', expected='{{my-image}}'\n        )\n\n        OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables=variables\n        ).execute({})\n\n        self.assertEqual(operator.field, 'something-good')\n\n    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})\n    def test_dag_run_conf_parameters(self):\n        dag_run = MagicMock()\n        dag_run.conf = {\n            'nested-the-s3-location': 'good',\n            'my-image': 'my-image',\n            'something-value1': 'stg',\n            'env_val-staging': 'the-s3-location',\n            'nested': 'value1',\n        }\n        operator = TestOperator(\n            task_id='abc', field='something-{{nested-{{env_val-{{env}}}}}}', expected='{{my-image}}'\n        )\n        OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}\n        ).execute({'dag_run': dag_run})\n\n        self.assertEqual(operator.field, 'something-good')\n\n    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})\n    def test_duplicated_params(self):\n        dag_run = MagicMock()\n        dag_run.conf = {\n            'nested-the-s3-location': 'good',\n            'my-image': 'my-image',\n            'something-value1': 'stg',\n            'env_val-staging': 'the-s3-location',\n            'nested': 'value1',\n        }\n        operator = TestOperator(\n            task_id='abc',\n            field='something-{{nested-{{env_val-{{env}}}}}}-' '{{nested-{{env_val-{{env}}}}}}',\n            expected='{{my-image}}',\n        )\n        OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}\n        ).execute({'dag_run': dag_run})\n\n        self.assertEqual(operator.field, 'something-good-good')\n\n        operator = TestOperator(task_id='abc', field='something-{{env}} {{env}}', expected='{{my-image}}')\n        OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}\n        ).execute({'dag_run': dag_run})\n\n        self.assertEqual(operator.field, 'something-staging staging')\n\n        dag_run.conf = {'var1': '{{var2}}', 'var2': 'bla', 'my-image': 'my-image'}\n        operator = TestOperator(task_id='abc', field='{{ var1 }}', expected='{{my-image}}')\n        OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}\n        ).execute({'dag_run': dag_run})\n\n        self.assertEqual(operator.field, 'bla')\n\n    @patch.dict(os.environ, {'env': 'staging', 'LIMINAL_STAND_ALONE_MODE': 'True'})\n    def test_non_existing_param(self):\n        dag_run = MagicMock()\n        dag_run.conf = {'var1': '{{var2}}', 'var2': 'bla'}\n        operator = TestOperator(task_id='abc', field='{{myimage}}', expected='{{myimage}}')\n        OperatorWithVariableResolving(\n            dag=None, operator=operator, task_id='my_task', task_config={}, variables={}\n        ).execute({'dag_run': dag_run})\n\n        self.assertEqual(operator.field, '')\n\n\nclass BaseTestOperator(BaseOperator):\n    def execute(self, context):\n        raise NotImplementedError()\n\n    def __init__(self, field, expected, *args, **kwargs):\n        self.field = field\n        self.expected = expected\n        super().__init__(*args, **kwargs)\n\n\nclass TestOperator(BaseTestOperator):\n    def __init__(self, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n\n    def execute(self, context):\n        return self.expected\n"
  },
  {
    "path": "tests/runners/airflow/tasks/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/airflow/tasks/test_create_cloudformation_stack.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nfrom datetime import datetime\nfrom unittest import TestCase, mock\nfrom unittest.mock import MagicMock\n\nfrom airflow.operators.dummy import DummyOperator\nfrom airflow.operators.python import BranchPythonOperator\nfrom moto import mock_cloudformation\n\nfrom liminal.runners.airflow.executors import airflow\nfrom liminal.runners.airflow.operators.cloudformation import (\n    CloudFormationCreateStackOperator,\n    CloudFormationCreateStackSensor,\n    CloudFormationHook,\n)\nfrom liminal.runners.airflow.operators.operator_with_variable_resolving import (\n    OperatorWithVariableResolving,\n)\nfrom liminal.runners.airflow.tasks.create_cloudformation_stack import (\n    CreateCloudFormationStackTask,\n)\nfrom tests.util import dag_test_utils\n\n\n# noinspection DuplicatedCode\n@mock_cloudformation\nclass TestCreateCloudFormationStackTask(TestCase):\n    \"\"\"\n    Test CreateCloudFormationStackTask\n    \"\"\"\n\n    def setUp(self) -> None:\n        self.dag = dag_test_utils.create_dag()\n        self.dag.context = {\n            'ti': self.dag,\n            'ts': datetime.now().timestamp(),\n            'execution_date': datetime.now().timestamp(),\n        }\n        self.dag.get_dagrun = MagicMock()\n\n        self.cluster_name = \"liminal-cluster-for-tests\"\n        self.config = {\n            'task': 'create_emr',\n            'type': 'create_cloudformation_stack',\n            'description': 'create emr',\n            'stack_name': self.cluster_name,\n            'properties': {\n                'OnFailure': 'DO_NOTHING',\n                'TimeoutInMinutes': 25,\n                'Capabilities': ['CAPABILITY_NAMED_IAM'],\n                'TemplateURL': 'https://s3.amazonaws.com/liminal-tests/emr_cluster_creation.yml',\n                'Parameters': {\n                    'Environment': 'Staging',\n                    'OwnerTeam': 'liminal-team',\n                    'Tenancy': 'Ephemeral',\n                    'MasterServerCount': '1',\n                    'CoreServerCount': '1',\n                    'EmrApplicationRelease': '5.28',\n                    'InstanceTypeMaster': 'm5.xlarge',\n                    'InstanceTypeCore': 'm5.xlarge',\n                },\n            },\n        }\n\n        self.create_cloudformation_task = CreateCloudFormationStackTask(\n            self.config['task'],\n            self.dag,\n            [],\n            trigger_rule='all_success',\n            liminal_config={},\n            pipeline_config={},\n            task_config=self.config,\n        )\n\n        airflow.AirflowExecutor(\"airflow-executor\", {}, {}).apply_task_to_dag(task=self.create_cloudformation_task)\n\n    def test_apply_task_to_dag(self):\n        self.assertEqual(len(self.dag.tasks), 4)\n\n        self.assertIsInstance(self.dag.tasks[0], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[0].operator_delegate, BranchPythonOperator)\n        self.assertIsInstance(self.dag.tasks[1], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[1].operator_delegate, CloudFormationCreateStackOperator)\n        self.assertIsInstance(self.dag.tasks[2], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[2].operator_delegate, CloudFormationCreateStackSensor)\n        self.assertIsInstance(self.dag.tasks[3], DummyOperator)\n\n    def test_cloudformation_does_not_exist(self):\n        with mock.patch.object(CloudFormationHook, 'get_conn') as mock_conn:\n            mock_cf_conn = MagicMock()\n            mock_cf_conn.describe_stacks.return_value.raiseError.side_effect = Exception()\n\n            mock_conn.return_value = mock_cf_conn\n            is_cloudformation_exists = self.dag.tasks[0].operator_delegate\n\n            print(is_cloudformation_exists)\n            self.assertEqual(\n                is_cloudformation_exists.python_callable(stack_name=self.cluster_name),\n                'create-cloudformation-create_emr',\n            )\n\n    def test_cloudformation_exist_and_running(self):\n        is_cloudformation_exists = self.dag.tasks[0].operator_delegate\n\n        for status in ['CREATE_COMPLETE', 'DELETE_FAILED']:\n            with mock.patch.object(CloudFormationHook, 'get_conn') as mock_conn:\n                mock_cloudformation_conn = MagicMock()\n                mock_cloudformation_conn.describe_stacks.return_value = {'Stacks': [{'StackStatus': status}]}\n\n                mock_conn.return_value = mock_cloudformation_conn\n\n                self.assertEqual(\n                    is_cloudformation_exists.python_callable(stack_name=self.cluster_name), 'creation-end-create_emr'\n                )\n\n    def test_cloudformation_exists_and_not_running(self):\n        is_cloudformation_exists = self.dag.tasks[0].operator_delegate\n\n        for status in ['DELETED']:\n            with mock.patch.object(CloudFormationHook, 'get_conn') as mock_conn:\n                mock_cloudformation_conn = MagicMock()\n                mock_cloudformation_conn.describe_stacks.return_value = {'Stacks': [{'StackStatus': status}]}\n\n                mock_conn.return_value = mock_cloudformation_conn\n\n                self.assertEqual(\n                    is_cloudformation_exists.python_callable(stack_name=self.cluster_name),\n                    'create-cloudformation-create_emr',\n                )\n\n    def test_cloudformation_create_stack_operator_task(self):\n        create_cloudformation_stack = self.dag.tasks[1]\n        self.assertEqual(create_cloudformation_stack.task_id, 'create-cloudformation-create_emr')\n"
  },
  {
    "path": "tests/runners/airflow/tasks/test_delete_cloudformation_stack.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom datetime import datetime\nfrom unittest import TestCase\nfrom unittest.mock import MagicMock\n\nfrom airflow.operators.dummy_operator import DummyOperator\nfrom airflow.operators.python_operator import BranchPythonOperator\nfrom airflow.utils.trigger_rule import TriggerRule\nfrom moto import mock_cloudformation\n\nfrom liminal.runners.airflow.operators.cloudformation import (\n    CloudFormationDeleteStackOperator,\n    CloudFormationDeleteStackSensor,\n)\nfrom liminal.runners.airflow.operators.operator_with_variable_resolving import (\n    OperatorWithVariableResolving,\n)\nfrom liminal.runners.airflow.tasks.delete_cloudformation_stack import (\n    DeleteCloudFormationStackTask,\n)\nfrom tests.util import dag_test_utils\n\n\n# noinspection DuplicatedCode\n@mock_cloudformation\nclass TestDeleteCloudFormationStackTask(TestCase):\n    \"\"\"\n    Test DeleteCloudFormationStackTask\n    \"\"\"\n\n    def setUp(self) -> None:\n        self.dag = dag_test_utils.create_dag()\n        self.dag.context = {\n            'ti': self.dag,\n            'ts': datetime.now().timestamp(),\n            'execution_date': datetime.now().timestamp(),\n        }\n        self.dag.get_dagrun = MagicMock()\n\n        self.cluster_name = \"liminal-cluster-for-tests\"\n\n        self.delete_cloudformation_task = DeleteCloudFormationStackTask(\n            'delete-emr',\n            self.dag,\n            [],\n            trigger_rule='all_success',\n            liminal_config={},\n            pipeline_config={},\n            task_config={'stack_name': self.cluster_name},\n        )\n\n        self.delete_cloudformation_task.apply_task_to_dag()\n\n    def test_apply_task_to_dag(self):\n        self.assertEqual(len(self.dag.tasks), 4)\n\n        self.assertIsInstance(self.dag.tasks[0], BranchPythonOperator)\n        self.assertIsInstance(self.dag.tasks[1], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[1].operator_delegate, CloudFormationDeleteStackOperator)\n        self.assertIsInstance(self.dag.tasks[2], OperatorWithVariableResolving)\n        self.assertIsInstance(self.dag.tasks[2].operator_delegate, CloudFormationDeleteStackSensor)\n        self.assertIsInstance(self.dag.tasks[3], DummyOperator)\n\n    def test_check_dags_queued_task(self):\n        self.dag.get_num_active_runs = MagicMock()\n\n        self.dag.get_num_active_runs.return_value = 2\n        check_dags_queued_task = self.dag.tasks[0]\n        self.assertEqual(check_dags_queued_task.trigger_rule, TriggerRule.ALL_DONE)\n\n        self.assertEqual(check_dags_queued_task.python_callable(), f'delete-end-delete-emr')\n\n        self.dag.get_num_active_runs.return_value = 1\n\n        self.assertEqual(check_dags_queued_task.python_callable(), f'delete-cloudformation-delete-emr')\n"
  },
  {
    "path": "tests/runners/airflow/tasks/test_job_end.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport unittest\nfrom unittest import TestCase\n\nfrom liminal.runners.airflow.executors import airflow\nfrom liminal.runners.airflow.tasks import job_end\nfrom tests.util import dag_test_utils\n\n\n# noinspection DuplicatedCode\nclass TestJobEndTask(TestCase):\n    def test_apply_task_to_dag(self):\n        dag = dag_test_utils.create_dag()\n\n        task0 = job_end.JobEndTask(\n            task_id='job_end',\n            dag=dag,\n            pipeline_config={'pipeline': 'my_end_pipeline'},\n            task_config={},\n            parent=None,\n            trigger_rule='all_done',\n            liminal_config={'metrics': {'namespace': 'EndJobNameSpace', 'backends': ['cloudwatch']}},\n        )\n        airflow.AirflowExecutor(\"airflow-executor\", {}, {}).apply_task_to_dag(task=task0)\n\n        self.assertEqual(len(dag.tasks), 1)\n        dag_task0 = dag.tasks[0]\n\n        self.assertEqual(dag_task0.namespace, 'EndJobNameSpace')\n        self.assertEqual(dag_task0.backends, ['cloudwatch'])\n\n        self.assertEqual(dag_task0.task_id, 'end')\n\n    def test_apply_task_to_dag_missing_metrics(self):\n        conf = {'pipeline': 'my_pipeline'}\n        dag = dag_test_utils.create_dag()\n\n        task0 = job_end.JobEndTask(\n            task_id=\"job_end\",\n            dag=dag,\n            pipeline_config={'pipeline': 'my_end_pipeline'},\n            liminal_config=conf,\n            parent=None,\n            trigger_rule='all_done',\n            task_config={},\n        )\n\n        airflow.AirflowExecutor(\"airflow-executor\", {}, {}).apply_task_to_dag(task=task0)\n\n        self.assertEqual(len(dag.tasks), 1)\n        dag_task0 = dag.tasks[0]\n\n        self.assertEqual(dag_task0.namespace, '')\n        self.assertEqual(dag_task0.backends, [])\n        self.assertEqual(dag_task0.trigger_rule, 'all_done')\n\n    def test_apply_task_to_dag_with_partial_configuration(self):\n        dag = dag_test_utils.create_dag()\n\n        task0 = job_end.JobEndTask(\n            task_id=\"job_enc\",\n            dag=dag,\n            liminal_config={'metrics': {'namespace': 'EndJobNameSpace'}},\n            pipeline_config={'pipeline': 'my_end_pipeline'},\n            task_config={},\n            parent=None,\n            trigger_rule='all_done',\n        )\n        task0.apply_task_to_dag()\n\n        self.assertEqual(len(dag.tasks), 1)\n        dag_task0 = dag.tasks[0]\n\n        self.assertEqual(dag_task0.namespace, 'EndJobNameSpace')\n        self.assertEqual(dag_task0.backends, [])\n        self.assertEqual(dag_task0.trigger_rule, 'all_done')\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "tests/runners/airflow/tasks/test_job_start.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport unittest\nfrom unittest import TestCase\n\nfrom liminal.runners.airflow.executors import airflow\nfrom liminal.runners.airflow.tasks import job_start\nfrom tests.util import dag_test_utils\n\n\n# noinspection DuplicatedCode\nclass TestJobStartTask(TestCase):\n    def test_apply_task_to_dag(self):\n        dag = dag_test_utils.create_dag()\n\n        task0 = job_start.JobStartTask(\n            task_id=\"start_task\",\n            dag=dag,\n            liminal_config={'metrics': {'namespace': 'StartJobNameSpace', 'backends': ['cloudwatch']}},\n            pipeline_config={'pipeline': 'my_start_pipeline'},\n            task_config={},\n            parent=None,\n            trigger_rule='all_success',\n        )\n        task0.apply_task_to_dag()\n\n        self.assertEqual(len(dag.tasks), 1)\n        dag_task0 = dag.tasks[0]\n\n        self.assertEqual(dag_task0.namespace, 'StartJobNameSpace')\n        self.assertEqual(dag_task0.backends, ['cloudwatch'])\n\n        self.assertEqual(dag_task0.task_id, 'start')\n\n    def test_apply_task_to_dag_missing_metrics(self):\n        conf = {'pipeline': 'my_pipeline'}\n\n        dag = dag_test_utils.create_dag()\n\n        task0 = job_start.JobStartTask(\n            task_id=\"start_task\",\n            dag=dag,\n            liminal_config=conf,\n            task_config={},\n            pipeline_config={'pipeline': 'my_end_pipeline'},\n            parent=None,\n            trigger_rule='all_success',\n        )\n        airflow.AirflowExecutor(\"airflow-executor\", {}, {}).apply_task_to_dag(task=task0)\n\n        self.assertEqual(len(dag.tasks), 1)\n        dag_task0 = dag.tasks[0]\n\n        self.assertEqual(dag_task0.namespace, '')\n        self.assertEqual(dag_task0.backends, [])\n        self.assertEqual(dag_task0.trigger_rule, 'all_success')\n\n    def test_apply_task_to_dag_with_partial_configuration(self):\n        dag = dag_test_utils.create_dag()\n\n        task0 = job_start.JobStartTask(\n            task_id=\"start_task\",\n            dag=dag,\n            liminal_config={'metrics': {'namespace': 'StartJobNameSpace'}},\n            pipeline_config={'pipeline': 'my_start_pipeline'},\n            task_config={},\n            parent=None,\n            trigger_rule='all_success',\n        )\n        airflow.AirflowExecutor(\"airflow-executor\", {}, {}).apply_task_to_dag(task=task0)\n\n        self.assertEqual(len(dag.tasks), 1)\n        dag_task0 = dag.tasks[0]\n\n        self.assertEqual(dag_task0.namespace, 'StartJobNameSpace')\n        self.assertEqual(dag_task0.backends, [])\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "tests/runners/airflow/tasks/test_python.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport tempfile\nimport unittest\nfrom unittest import TestCase\n\nfrom liminal.build import liminal_apps_builder\nfrom liminal.kubernetes import secret_util, volume_util\nfrom liminal.runners.airflow import DummyDag\nfrom liminal.runners.airflow.executors.kubernetes import KubernetesPodExecutor\nfrom liminal.runners.airflow.tasks import python\nfrom tests.util import dag_test_utils\n\n\nclass TestPythonTask(TestCase):\n    _VOLUME_NAME = 'myvol1'\n    _SECRET_NAME = 'aws'\n\n    def setUp(self) -> None:\n        volume_util.delete_local_volume(self._VOLUME_NAME)\n        secret_util.delete_local_secret(self._SECRET_NAME)\n        os.environ['TMPDIR'] = '/tmp'\n        self.temp_dir = tempfile.mkdtemp()\n        self.liminal_config = {\n            'volumes': [\n                {\n                    'volume': self._VOLUME_NAME,\n                    'local': {'path': self.temp_dir.replace(\"/var/folders\", \"/private/var/folders\")},\n                }\n            ],\n            'secrets': [{'secret': self._SECRET_NAME, 'remote_path': \"/mnt\"}],\n        }\n\n        volume_util.create_local_volumes(self.liminal_config, None)\n        secret_util.create_local_secrets(self.liminal_config)\n\n        liminal_apps_builder.build_liminal_apps(os.path.join(os.path.dirname(__file__), '../liminal'))\n\n    def test_apply_task_to_dag(self):\n        dag = dag_test_utils.create_dag()\n\n        task0 = self.__create_python_task(\n            dag,\n            'my_input_task',\n            None,\n            'my_python_task_img',\n            'python -u write_inputs.py',\n            env_vars={'NUM_FILES': 10, 'NUM_SPLITS': 3, 'AWS_CONFIG_FILE': '/mnt/credentials', 'AWS_PROFILE': 'dev'},\n        )\n        executor = KubernetesPodExecutor(\n            task_id='k8s', liminal_config=self.liminal_config, executor_config={'executor': 'k8s', 'name': 'mypod'}\n        )\n\n        executor.apply_task_to_dag(task=task0)\n\n        task1 = self.__create_python_task(\n            dag,\n            'my_output_task',\n            dag.tasks[0],\n            'my_parallelized_python_task_img',\n            'python -u write_outputs.py',\n            executors=3,\n        )\n        executor.apply_task_to_dag(task=task1)\n\n        for task in dag.tasks:\n            print(f'Executing task {task.task_id}')\n            task.execute(DummyDag('my_dag', task.task_id).context)\n\n        inputs_dir = os.path.join(self.temp_dir, 'inputs')\n        outputs_dir = os.path.join(self.temp_dir, 'outputs')\n\n        self.assertListEqual(sorted(os.listdir(self.temp_dir)), sorted(['outputs', 'inputs']))\n\n        inputs_dir_contents = sorted(os.listdir(inputs_dir))\n\n        self.assertListEqual(inputs_dir_contents, ['0', '1', '2'])\n\n        self.assertListEqual(\n            sorted(os.listdir(os.path.join(inputs_dir, '0'))),\n            ['input0.json', 'input3.json', 'input6.json', 'input9.json'],\n        )\n\n        self.assertListEqual(\n            sorted(os.listdir(os.path.join(inputs_dir, '1'))), ['input1.json', 'input4.json', 'input7.json']\n        )\n\n        self.assertListEqual(\n            sorted(os.listdir(os.path.join(inputs_dir, '2'))), ['input2.json', 'input5.json', 'input8.json']\n        )\n\n        self.assertListEqual(\n            sorted(os.listdir(outputs_dir)),\n            [\n                'output0.txt',\n                'output1.txt',\n                'output2.txt',\n                'output3.txt',\n                'output4.txt',\n                'output5.txt',\n                'output6.txt',\n                'output7.txt',\n                'output8.txt',\n                'output9.txt',\n            ],\n        )\n\n        for filename in os.listdir(outputs_dir):\n            with open(os.path.join(outputs_dir, filename)) as f:\n                expected_file_content = filename.replace('output', 'myval').replace('.txt', '')\n                self.assertEqual(f.read(), expected_file_content)\n\n    def __create_python_task(self, dag, task_id, parent, image, cmd, env_vars=None, executors=None):\n\n        self.liminal_config['volumes'] = [\n            {\n                'volume': self._VOLUME_NAME,\n                'local': {'path': self.temp_dir.replace(\"/var/folders\", \"/private/var/folders\")},\n            }\n        ]\n\n        self.liminal_config['executors'] = [\n            {\n                'executor': 'k8s',\n                'type': 'kubernetes',\n            }\n        ]\n        task_config = {\n            'task': task_id,\n            'cmd': cmd,\n            'image': image,\n            'env_vars': env_vars if env_vars is not None else {},\n            'mounts': [{'mount': 'mymount', 'volume': self._VOLUME_NAME, 'path': '/mnt/vol1'}],\n            'secrets': [{'secret': self._SECRET_NAME, 'remote_path': '/mnt'}],\n        }\n\n        if executors:\n            task_config['executors'] = executors\n\n        return python.PythonTask(\n            task_id=task_id,\n            dag=dag,\n            liminal_config=self.liminal_config,\n            pipeline_config={'pipeline': 'my_pipeline'},\n            task_config=task_config,\n            parent=parent,\n            trigger_rule='all_success',\n        )\n\n\nif __name__ == '__main__':\n    unittest.main()\n"
  },
  {
    "path": "tests/runners/airflow/tasks/test_spark_task.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport tempfile\nfrom unittest import TestCase\n\nfrom liminal.build import liminal_apps_builder\nfrom liminal.kubernetes import volume_util\nfrom liminal.runners.airflow import DummyDag\nfrom liminal.runners.airflow.executors.kubernetes import KubernetesPodExecutor\nfrom liminal.runners.airflow.tasks.spark import SparkTask\nfrom tests.util import dag_test_utils\n\n\nclass TestSparkTask(TestCase):\n    \"\"\"\n    Test Spark Task\n    \"\"\"\n\n    _VOLUME_NAME = 'myvol1'\n\n    def test_spark_on_k8s(self):\n        volume_util.delete_local_volume(self._VOLUME_NAME)\n        os.environ['TMPDIR'] = '/tmp'\n        self.temp_dir = tempfile.mkdtemp()\n        self.liminal_config = {\n            'volumes': [\n                {\n                    'volume': self._VOLUME_NAME,\n                    'local': {'path': self.temp_dir.replace(\"/var/folders\", \"/private/var/folders\")},\n                }\n            ]\n        }\n        volume_util.create_local_volumes(self.liminal_config, None)\n\n        # build spark image\n        liminal_apps_builder.build_liminal_apps(os.path.join(os.path.dirname(__file__), '../../apps/test_spark_app'))\n\n        outputs_dir = os.path.join(self.temp_dir, 'outputs')\n\n        task_config = {\n            'task': \"my_spark_task\",\n            'image': \"my_spark_image\",\n            'application_source': 'wordcount.py',\n            'application_arguments': ['words.txt', '/mnt/vol1/outputs/'],\n            'env_vars': {},\n            'mounts': [{'mount': 'mymount', 'volume': self._VOLUME_NAME, 'path': '/mnt/vol1'}],\n        }\n\n        dag = dag_test_utils.create_dag()\n\n        task1 = SparkTask(\n            task_id=\"my_spark_task\",\n            dag=dag,\n            liminal_config=self.liminal_config,\n            pipeline_config={'pipeline': 'my_pipeline'},\n            task_config=task_config,\n            parent=None,\n            trigger_rule='all_success',\n        )\n\n        executor = KubernetesPodExecutor(\n            task_id='k8s', liminal_config=self.liminal_config, executor_config={'executor': 'k8s', 'name': 'mypod'}\n        )\n        executor.apply_task_to_dag(task=task1)\n\n        for task in dag.tasks:\n            print(f'Executing task {task.task_id}')\n            task.execute(DummyDag('my_dag', task.task_id).context)\n\n        expected_output = (\n            '{\"word\":\"my\",\"count\":1}\\n'\n            '{\"word\":\"first\",\"count\":1}\\n'\n            '{\"word\":\"liminal\",\"count\":1}\\n'\n            '{\"word\":\"spark\",\"count\":1}\\n'\n            '{\"word\":\"task\",\"count\":1}\\n'.split(\"\\n\")\n        )\n\n        actual = ''\n        for filename in os.listdir(outputs_dir):\n            if filename.endswith(\".json\"):\n                with open(os.path.join(outputs_dir, filename)) as f:\n                    actual = f.read()\n\n        self.assertEqual(actual.split(\"\\n\"), expected_output)\n\n    def test_get_runnable_command(self):\n        task_config = {\n            'application_source': 'my_app.py',\n            'master': 'yarn',\n            'class': 'org.apache.liminal.MySparkApp',\n            'conf': {\n                'spark.driver.memory': '1g',\n                'spark.driver.maxResultSize': '1g',\n                'spark.yarn.executor.memoryOverhead': '500M',\n            },\n            'application_arguments': {\n                '--query': \"select * from \" \"my_table where date_prt >= \" \"'{{yesterday_ds}}'\",\n                '--output': 'mytable',\n            },\n        }\n\n        expected = [\n            'spark-submit',\n            '--master',\n            'yarn',\n            '--class',\n            'org.apache.liminal.MySparkApp',\n            '--conf',\n            'spark.driver.maxResultSize=1g',\n            '--conf',\n            'spark.driver.memory=1g',\n            '--conf',\n            'spark.yarn.executor.memoryOverhead=500M',\n            'my_app.py',\n            '--query',\n            \"select * from my_table where \" \"date_prt >= '{{yesterday_ds}}'\",\n            '--output',\n            'mytable',\n        ]\n\n        actual = SparkTask(\n            'my_spark_task',\n            DummyDag('dag-id', 'my_spark_task'),\n            [],\n            trigger_rule='all_success',\n            liminal_config={},\n            pipeline_config={'pipeline': 'pipeline'},\n            task_config=task_config,\n        ).get_runnable_command()\n\n        self.assertEqual(actual.sort(), expected.sort())\n\n    def test_missing_spark_arguments(self):\n        task_config = {\n            'application_source': 'my_app.py',\n            'application_arguments': {\n                '--query': \"select * from my_table where\" \" date_prt >= '{{yesterday_ds}}'\",\n                '--output': 'myoutputtable',\n            },\n        }\n\n        expected = [\n            'spark-submit',\n            'my_app.py',\n            '--query',\n            \"select * from my_table where \" \"date_prt >= '{{yesterday_ds}}'\",\n            '--output',\n            'myoutputtable',\n        ]\n\n        actual = SparkTask(\n            'my_spark_task',\n            DummyDag('dag-id', 'my_spark_task'),\n            [],\n            trigger_rule='all_success',\n            liminal_config={},\n            pipeline_config={'pipeline': 'pipeline'},\n            task_config=task_config,\n        ).get_runnable_command()\n\n        self.assertEqual(actual.sort(), expected.sort())\n\n    def test_partially_missing_spark_arguments(self):\n        task_config = {\n            'application_source': 'my_app.py',\n            'class': 'org.apache.liminal.MySparkApp',\n            'conf': {\n                'spark.driver.memory': '1g',\n                'spark.driver.maxResultSize': '1g',\n                'spark.yarn.executor.memoryOverhead': '500M',\n            },\n            'application_arguments': {\n                '--query': \"select * from my_table where \" \"date_prt >= '{{yesterday_ds}}'\",\n                '--output': 'myoutputtable',\n            },\n        }\n\n        expected = [\n            'spark-submit',\n            '--class',\n            'org.apache.liminal.MySparkApp',\n            '--conf',\n            'spark.driver.maxResultSize=1g',\n            '--conf',\n            'spark.driver.memory=1g',\n            '--conf',\n            'spark.yarn.executor.memoryOverhead=500M',\n            'my_app.py',\n            '--query',\n            'select * from my_table where ' \"date_prt >= '{{yesterday_ds}}'\",\n            '--output',\n            'myoutputtable',\n        ]\n\n        actual = SparkTask(\n            'my_spark_task',\n            DummyDag('dag-id', 'my_spark_task'),\n            [],\n            trigger_rule='all_success',\n            liminal_config={},\n            pipeline_config={'pipeline': 'pipeline'},\n            task_config=task_config,\n        ).get_runnable_command()\n\n        self.assertEqual(actual.sort(), expected.sort())\n"
  },
  {
    "path": "tests/runners/apps/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_app/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_app/defaults/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_app/defaults/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: default_liminal\ntype: super\nservice_defaults:\n  one: two\ntask_defaults:\n  env_vars:\n    env1: a\npipeline_defaults:\n  one: one\n  two: twoww\n"
  },
  {
    "path": "tests/runners/apps/test_app/extra/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_app/extra/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: extra_liminal\ntype: super\nsuper: default_liminal\nservice_defaults:\n  endpoints:\n    - endpoint: /myendpoint1\n      module: myserver.my_server\n      function: myendpoint1func\nimages:\n  - image: my_static_input_task_image\n    type: python\n    source: helloworld\n    no_cache: true\n  - image: my_task_output_input_task_image\n    type: python\n    source: helloworld\n    no_cache: true\npipeline_defaults:\n  tasks:\n    - task: my_static_input_task\n      type: python\n      description: static input task\n      image: my_static_input_task_image\n      env_vars:\n        env2: b\n      cmd: python -u hello_world.py\n    - task: extenable\n      type: pipeline\n      description: optional sub tasks\n    - task: my_task_output_input_task\n      type: python\n      no_cache: true\n      description: task with input from other task's output\n      image: my_task_output_input_task_image\n      env_vars:\n        env1: c\n        env2: b\n      cmd: python -u hello_world.py\n"
  },
  {
    "path": "tests/runners/apps/test_app/helloworld/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nprint('Hello world!\\n')\n"
  },
  {
    "path": "tests/runners/apps/test_app/helloworld/hellp_world.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport json\nimport os\n\ninputs_dir = '/mnt/vol1/inputs/'\n\nnum_files = int(os.environ['NUM_FILES'])\nnum_splits = int(os.environ['NUM_SPLITS'])\n\n# create input files - split by round robin\nfor i in range(0, num_files):\n    split_id = i % num_splits\n    split_dir = os.path.join(inputs_dir, str(split_id))\n\n    if not os.path.exists(split_dir):\n        os.makedirs(split_dir)\n\n    filename = os.path.join(split_dir, f'input{i}.json')\n    with open(filename, 'w') as f:\n        print(f'Writing input file {filename}')\n        f.write(json.dumps({'mykey': f'myval{i}'}))\n"
  },
  {
    "path": "tests/runners/apps/test_app/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: MyPipeline\nsuper: extra_liminal\nimages:\n  - image: my_static_input_task_image\n    type: python\n    source: helloworld\n    no_cache: true\n  - image: my_server_image\n    type: python_server\n    source: myserver\n    no_cache: true\npipelines:\n  - pipeline: my_pipeline\n    owner: Bosco Albert Baracus\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    default_arg_loaded: check\n    default_array_loaded: [2, 3, 4]\n    default_object_loaded:\n      key1: val1\n      key2: val2\n    tasks:\n      - task: my_parallelized_static_input_task\n        type: python\n        description: parallelized static input task\n        image: my_static_input_task_image\n        executors: 2\n        cmd: python -u hello_world.py\nservices:\n  - service: my_python_server\n    description: my python server\n    image: my_server_image\n"
  },
  {
    "path": "tests/runners/apps/test_app/my_server/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_app/my_server/my_server.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\nimport json\n\n\ndef myendpoint1func(input_json):\n    input_dict = json.loads(input_json) if input_json else {}\n    return f'Input was: {input_dict}'\n"
  },
  {
    "path": "tests/runners/apps/test_spark_app/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_spark_app/liminal.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\nname: MyPipeline\nimages:\n  - image: my_spark_image\n    type: spark\n    source: wordcount\n    no_cache: true\npipelines:\n  - pipeline: my_pipeline\n    owner: Bosco Albert Baracus\n    start_date: 1970-01-01\n    timeout_minutes: 45\n    schedule: 0 * 1 * *\n    tasks:\n      - task: my_test_spark_task\n        type: spark\n        description: spark task on k8s\n        image: my_spark_image\n        executors: 2\n        application_source: wordcount.py\n        application_arguments:\n          - words.txt\n"
  },
  {
    "path": "tests/runners/apps/test_spark_app/wordcount/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/runners/apps/test_spark_app/wordcount/requirements.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\npyyaml==5.4\n"
  },
  {
    "path": "tests/runners/apps/test_spark_app/wordcount/wordcount.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport sys\nfrom operator import add\n\nimport yaml\nfrom pyspark.sql import SparkSession\n\nif __name__ == \"__main__\":\n    if len(sys.argv) != 3:\n        print(\"Usage: wordcount <file> output <outputlocation>\", file=sys.stderr)\n        sys.exit(-1)\n\n    spark = SparkSession.builder.appName(\"PythonWordCount\").getOrCreate()\n\n    lines = spark.read.text(sys.argv[1]).rdd.filter(lambda x: not x[0].startswith('#')).map(lambda r: r[0])\n    counts = lines.flatMap(lambda x: x.split(' ')).map(lambda x: (x, 1)).reduceByKey(add)\n    output = counts.collect()\n    for (word, count) in output:\n        print(\"%s: %i\" % (word, count))\n\n    print(f\"writing the results to {sys.argv[2]}\")\n    counts.toDF([\"word\", \"count\"]).coalesce(1).write.mode(\"overwrite\").json(sys.argv[2])\n\n    spark.stop()\n"
  },
  {
    "path": "tests/runners/apps/test_spark_app/wordcount/words.txt",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nmy first liminal spark task\n"
  },
  {
    "path": "tests/test_licenses.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nimport os\nimport pathlib\nfrom unittest import TestCase\n\nfrom termcolor import colored\n\nEXCLUDED_EXTENSIONS = ['.gif', '.png', '.pyc', 'LICENSE', 'DISCLAIMER', 'DISCLAIMER-WIP', 'NOTICE', '.whl']\nEXCLUDED_DIRS = [\n    'docs/build',\n    'build',\n    'dist',\n    '.git',\n    '.idea',\n    'venv',\n    '.venv',\n    'apache_liminal.egg-info',\n    '.pytest_cache',\n]\nEXCLUDED_FILES = ['DISCLAIMER-WIP', 'LICENSE.txt', '.autovenv', 'credentials-aws.txt']\n\nPYTHON_LICENSE_HEADER = \"\"\"\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\"\"\".strip().split(\n    \"\\n\"\n)\n\nMD_LICENSE_HEADER = \"\"\"\n<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\"\"\".strip().split(\n    \"\\n\"\n)\n\nRST_LICENSE_HEADER = \"\"\"\n..\n   Licensed to the Apache Software Foundation (ASF) under one\n   or more contributor license agreements.  See the NOTICE file\n   distributed with this work for additional information\n   regarding copyright ownership.  The ASF licenses this file\n   to you under the Apache License, Version 2.0 (the\n   \"License\"); you may not use this file except in compliance\n   with the License.  You may obtain a copy of the License at\n..\n\n..  http://www.apache.org/licenses/LICENSE-2.0\n\n..\n   Unless required by applicable law or agreed to in writing,\n   software distributed under the License is distributed on an\n   \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n   KIND, either express or implied.  See the License for the\n   specific language governing permissions and limitations\n   under the License.\n..\n\"\"\".strip().split(\n    \"\\n\"\n)\n\nBAT_LICENSE_HEADER = \"\"\"\nREM\nREM Licensed to the Apache Software Foundation (ASF) under one\nREM or more contributor license agreements.  See the NOTICE file\nREM distributed with this work for additional information\nREM regarding copyright ownership.  The ASF licenses this file\nREM to you under the Apache License, Version 2.0 (the\nREM \"License\"); you may not use this file except in compliance\nREM with the License.  You may obtain a copy of the License at\nREM\nREM   http://www.apache.org/licenses/LICENSE-2.0\nREM\nREM Unless required by applicable law or agreed to in writing,\nREM software distributed under the License is distributed on an\nREM \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nREM KIND, either express or implied.  See the License for the\nREM specific language governing permissions and limitations\nREM under the License.\n\"\"\".strip().split(\n    \"\\n\"\n)\n\nJSON_LICENSE_HEADER = \"\"\"\n  \"\": [\n    \"Licensed to the Apache Software Foundation (ASF) under one\",\n    \"or more contributor license agreements.  See the NOTICE file\",\n    \"distributed with this work for additional information\",\n    \"regarding copyright ownership.  The ASF licenses this file\",\n    \"to you under the Apache License, Version 2.0 (the\",\n    \"\\\\\"License\\\\\"); you may not use this file except in compliance\",\n    \"with the License.  You may obtain a copy of the License at\",\n    \"\",\n    \"  http://www.apache.org/licenses/LICENSE-2.0\",\n    \"\",\n    \"Unless required by applicable law or agreed to in writing,\",\n    \"software distributed under the License is distributed on an\",\n    \"\\\\\"AS IS\\\\\"BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\",\n    \"KIND, either express or implied.  See the License for the\",\n    \"specific language governing permissions and limitations\",\n    \"under the License.\"\n  ],\n\"\"\".strip().split(\n    \"\\n\"\n)\n\n\nclass TestLicenses(TestCase):\n    def test_licenses(self):\n        files = []\n        base_dir = os.path.join(pathlib.Path(__file__).parent.parent.absolute())\n        print(f'Checking licenses for files in {base_dir}')\n        for r, d, f in os.walk(base_dir):\n            if not any(os.path.relpath(r, base_dir).startswith(excluded) for excluded in EXCLUDED_DIRS):\n                for file in f:\n                    if (\n                        not any(os.path.basename(file).endswith(ext) for ext in EXCLUDED_EXTENSIONS)\n                        and not os.path.basename(file) in EXCLUDED_FILES\n                    ):\n                        files.append(os.path.join(r, file))\n\n        output = ''\n        files_missing_license = []\n        success = True\n        for file in files:\n            print(f'Checking license for file {file}')\n            has_license = self.check_license(file)\n            if not has_license:\n                output += colored(f'Missing License: {file}\\n', 'red')\n                files_missing_license.append(file)\n            success = success and has_license\n\n        print(output)\n\n        if not success:\n            self.assertListEqual([], files_missing_license)\n\n    @staticmethod\n    def check_license(file):\n        header_lines = PYTHON_LICENSE_HEADER\n        if file.endswith('.md'):\n            header_lines = MD_LICENSE_HEADER\n        elif file.endswith('.rst'):\n            header_lines = RST_LICENSE_HEADER\n        elif file.endswith('.bat'):\n            header_lines = BAT_LICENSE_HEADER\n        elif file.endswith('.json'):\n            header_lines = JSON_LICENSE_HEADER\n            header_lines[0] = f'  {header_lines[0]}'\n        with open(file) as f:\n            file_lines = f.readlines()\n        return all(f'{line}\\n' in file_lines for line in header_lines)\n"
  },
  {
    "path": "tests/util/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/util/dag_test_utils.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\nfrom datetime import datetime\n\nfrom airflow import DAG\n\n\ndef create_dag():\n    \"\"\"\n    Test util to create a basic DAG for testing.\n    \"\"\"\n\n    return DAG(dag_id='test_dag', default_args={'start_date': datetime(1970, 1, 1)})\n"
  },
  {
    "path": "tests/util/test_class_utils.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nfrom unittest import TestCase\n\nfrom liminal.core.util import class_util\nfrom tests.util.test_pkg_1.test_clazz_base import A\nfrom tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B\nfrom tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_2 import C\nfrom tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 import D\nfrom tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_2.test_clazz_leaf_2 import E\n\n\nclass Test(TestCase):\n    def test_find_full_hierarchy_from_root(self):\n        expected_set = {\n            'test_clazz_child_1': B,\n            'test_clazz_child_2': C,\n            'test_clazz_leaf_1': D,\n            'test_clazz_leaf_2': E,\n        }\n        self.hierarchy_check(A, expected_set)\n\n    def hierarchy_check(self, clazz, expected_dict):\n        pkg_root = 'tests.util.test_pkg_1'\n        result_dict = class_util.find_subclasses_in_packages([pkg_root], clazz)\n        self.assertDictEqual(result_dict, expected_dict)\n"
  },
  {
    "path": "tests/util/test_pkg_1/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_clazz_base.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\nclass A:\n    pass\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_1.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\nfrom tests.util.test_pkg_1.test_clazz_base import A\n\n\nclass B(A):\n    pass\n\n\nclass M:\n    pass\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/test_clazz_child_2.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\nfrom tests.util.test_pkg_1.test_clazz_base import A\n\n\nclass C(A):\n    pass\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_1/test_clazz_leaf_1.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n\nfrom tests.util.test_pkg_1.test_pkg_1_1.test_clazz_child_1 import B\n\n\nclass D(B):\n    pass\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/__init__.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n"
  },
  {
    "path": "tests/util/test_pkg_1/test_pkg_1_1/test_pkg_1_1_2/test_clazz_leaf_2.py",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nfrom tests.util.test_pkg_1.test_pkg_1_1.test_pkg_1_1_1.test_clazz_leaf_1 import D\n\n\nclass E(D):\n    pass\n"
  },
  {
    "path": "yamllint-config.yml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\n---\n# https://yamllint.readthedocs.io/en/stable/\nextends: default\n\nlocale: en_US.UTF-8\n\nyaml-files:\n  - '*.yaml'\n  - '*.yml'\n  - .yamllint\n\nrules:\n  line-length:\n    max: 140\n    # level: warning\n  brackets: enable\n  colons: enable\n  commas: enable\n  empty-lines: enable\n  empty-values: disable\n  key-duplicates: disable\n  hyphens: enable\n  key-ordering: disable\n  new-line-at-end-of-file: enable\n  new-lines: enable\n  # quoted-strings: disable\n  truthy:\n    level: warning\n  indentation:\n    spaces: 2\n    indent-sequences: true\nignore: |\n  docs/\n  .asf.yaml\n"
  }
]