Repository: spotty-cloud/spotty Branch: dev Commit: bdbacd4e893b Files: 250 Total size: 358.8 KB Directory structure: gitextract_scgum9gk/ ├── .github/ │ ├── FUNDING.yml │ └── workflows/ │ ├── generate-docs.yml │ └── python-publish.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bin/ │ └── spotty ├── docs/ │ ├── Makefile │ ├── make.bat │ ├── requirements.txt │ └── source/ │ ├── _static/ │ │ ├── favicon/ │ │ │ ├── browserconfig.xml │ │ │ └── site.webmanifest │ │ ├── scripts.js │ │ └── styles.css │ ├── conf.py │ ├── docs/ │ │ ├── cli/ │ │ │ ├── spotty-aws.rst │ │ │ ├── spotty-download.rst │ │ │ ├── spotty-exec.rst │ │ │ ├── spotty-run.rst │ │ │ ├── spotty-sh.rst │ │ │ ├── spotty-start.rst │ │ │ ├── spotty-stop.rst │ │ │ ├── spotty-sync.rst │ │ │ └── spotty.rst │ │ ├── providers/ │ │ │ ├── aws/ │ │ │ │ ├── caching-docker-image-on-an-ebs-volume.md │ │ │ │ ├── ebs-volumes-and-deletion-policies.md │ │ │ │ ├── faq.md │ │ │ │ ├── instance-parameters.md │ │ │ │ └── overview.rst │ │ │ ├── gcp/ │ │ │ │ ├── account-preparation.md │ │ │ │ ├── caching-docker-image-on-a-disk.md │ │ │ │ ├── disks-and-deletion-policies.md │ │ │ │ ├── instance-parameters.md │ │ │ │ └── overview.rst │ │ │ ├── local/ │ │ │ │ ├── instance-parameters.md │ │ │ │ └── overview.rst │ │ │ └── remote/ │ │ │ ├── instance-parameters.md │ │ │ └── overview.rst │ │ └── user-guide/ │ │ ├── configuration-file.md │ │ ├── getting-started.md │ │ └── installation.md │ ├── index.rst │ └── main.html ├── setup.cfg ├── setup.py ├── spotty/ │ ├── __init__.py │ ├── cli.py │ ├── commands/ │ │ ├── __init__.py │ │ ├── abstract_command.py │ │ ├── abstract_config_command.py │ │ ├── abstract_provider_command.py │ │ ├── aws.py │ │ ├── download.py │ │ ├── exec.py │ │ ├── run.py │ │ ├── sh.py │ │ ├── start.py │ │ ├── status.py │ │ ├── stop.py │ │ ├── sync.py │ │ └── writers/ │ │ ├── __init__.py │ │ ├── abstract_output_writrer.py │ │ ├── null_output_writrer.py │ │ └── output_writrer.py │ ├── config/ │ │ ├── __init__.py │ │ ├── abstract_instance_config.py │ │ ├── abstract_instance_volume.py │ │ ├── config_utils.py │ │ ├── container_config.py │ │ ├── host_path_volume.py │ │ ├── project_config.py │ │ ├── tmp_dir_volume.py │ │ └── validation.py │ ├── configuration.py │ ├── deployment/ │ │ ├── __init__.py │ │ ├── abstract_cloud_instance/ │ │ │ ├── __init__.py │ │ │ ├── abstract_bucket_manager.py │ │ │ ├── abstract_cloud_instance_manager.py │ │ │ ├── abstract_data_transfer.py │ │ │ ├── abstract_instance_deployment.py │ │ │ ├── errors/ │ │ │ │ ├── __init__.py │ │ │ │ └── bucket_not_found.py │ │ │ ├── file_structure.py │ │ │ └── resources/ │ │ │ ├── __init__.py │ │ │ ├── abstract_bucket.py │ │ │ └── abstract_instance.py │ │ ├── abstract_docker_instance_manager.py │ │ ├── abstract_instance_manager.py │ │ ├── abstract_ssh_instance_manager.py │ │ ├── container/ │ │ │ ├── __init__.py │ │ │ ├── abstract_container_commands.py │ │ │ ├── abstract_container_script.py │ │ │ └── docker/ │ │ │ ├── __init__.py │ │ │ ├── docker_commands.py │ │ │ └── scripts/ │ │ │ ├── __init__.py │ │ │ ├── abstract_docker_script.py │ │ │ ├── container_bash_script.py │ │ │ ├── data/ │ │ │ │ ├── container_bash.sh.tpl │ │ │ │ ├── start_container.sh.tpl │ │ │ │ └── stop_container.sh.tpl │ │ │ ├── start_container_script.py │ │ │ └── stop_container_script.py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── cli.py │ │ ├── commands.py │ │ ├── print_info.py │ │ └── user_scripts.py │ ├── errors/ │ │ ├── __init__.py │ │ ├── instance_not_running.py │ │ └── nothing_to_do.py │ ├── providers/ │ │ ├── __init__.py │ │ ├── aws/ │ │ │ ├── __init__.py │ │ │ ├── cfn_templates/ │ │ │ │ ├── __init__.py │ │ │ │ ├── instance/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── data/ │ │ │ │ │ │ ├── files/ │ │ │ │ │ │ │ └── tmux.conf │ │ │ │ │ │ ├── startup_scripts/ │ │ │ │ │ │ │ ├── 01_prepare_instance.sh │ │ │ │ │ │ │ ├── 02_mount_volumes.sh │ │ │ │ │ │ │ ├── 03_set_docker_root.sh │ │ │ │ │ │ │ ├── 04_sync_project.sh │ │ │ │ │ │ │ ├── 05_run_instance_startup_commands.sh │ │ │ │ │ │ │ └── user_data.sh │ │ │ │ │ │ └── template.yaml │ │ │ │ │ ├── start_container_script.py │ │ │ │ │ └── template.py │ │ │ │ └── instance_profile/ │ │ │ │ ├── __init__.py │ │ │ │ ├── data/ │ │ │ │ │ └── template.yaml │ │ │ │ └── template.py │ │ │ ├── commands/ │ │ │ │ ├── __init__.py │ │ │ │ ├── clean_logs.py │ │ │ │ └── spot_prices.py │ │ │ ├── config/ │ │ │ │ ├── __init__.py │ │ │ │ ├── ebs_volume.py │ │ │ │ ├── instance_config.py │ │ │ │ └── validation.py │ │ │ ├── data_transfer.py │ │ │ ├── deletion_policies.py │ │ │ ├── errors/ │ │ │ │ ├── __init__.py │ │ │ │ └── volume_not_found.py │ │ │ ├── helpers/ │ │ │ │ ├── __init__.py │ │ │ │ ├── ami.py │ │ │ │ ├── availability_zone.py │ │ │ │ ├── instance_prices.py │ │ │ │ ├── logs.py │ │ │ │ ├── s3_sync.py │ │ │ │ ├── subnet.py │ │ │ │ └── vpc.py │ │ │ ├── instance_deployment.py │ │ │ ├── instance_manager.py │ │ │ ├── resource_managers/ │ │ │ │ ├── __init__.py │ │ │ │ ├── bucket_manager.py │ │ │ │ ├── instance_profile_stack_manager.py │ │ │ │ ├── instance_stack_manager.py │ │ │ │ └── key_pair_manager.py │ │ │ └── resources/ │ │ │ ├── __init__.py │ │ │ ├── bucket.py │ │ │ ├── image.py │ │ │ ├── instance.py │ │ │ ├── snapshot.py │ │ │ ├── stack.py │ │ │ ├── subnet.py │ │ │ ├── volume.py │ │ │ └── vpc.py │ │ ├── gcp/ │ │ │ ├── __init__.py │ │ │ ├── config/ │ │ │ │ ├── __init__.py │ │ │ │ ├── disk_volume.py │ │ │ │ ├── image_uri.py │ │ │ │ ├── instance_config.py │ │ │ │ └── validation.py │ │ │ ├── data_transfer.py │ │ │ ├── dm_templates/ │ │ │ │ ├── __init__.py │ │ │ │ └── instance/ │ │ │ │ ├── __init__.py │ │ │ │ ├── data/ │ │ │ │ │ ├── startup_script.sh.tpl │ │ │ │ │ ├── startup_scripts/ │ │ │ │ │ │ ├── 01_prepare_instance.sh │ │ │ │ │ │ ├── 02_mount_volumes.sh │ │ │ │ │ │ ├── 03_set_docker_root.sh │ │ │ │ │ │ ├── 04_sync_project.sh │ │ │ │ │ │ └── 05_run_instance_startup_commands.sh │ │ │ │ │ └── template.yaml │ │ │ │ └── instance_template.py │ │ │ ├── errors/ │ │ │ │ ├── __init__.py │ │ │ │ └── image_not_found.py │ │ │ ├── helpers/ │ │ │ │ ├── __init__.py │ │ │ │ ├── ce_client.py │ │ │ │ ├── deployment.py │ │ │ │ ├── dm_client.py │ │ │ │ ├── dm_resource.py │ │ │ │ ├── gcp_credentials.py │ │ │ │ ├── gs_client.py │ │ │ │ ├── gsutil_rsync.py │ │ │ │ ├── image.py │ │ │ │ ├── rtc_client.py │ │ │ │ └── volumes.py │ │ │ ├── instance_deployment.py │ │ │ ├── instance_manager.py │ │ │ ├── resource_managers/ │ │ │ │ ├── __init__.py │ │ │ │ ├── bucket_manager.py │ │ │ │ ├── instance_stack_manager.py │ │ │ │ └── ssh_key_manager.py │ │ │ └── resources/ │ │ │ ├── __init__.py │ │ │ ├── bucket.py │ │ │ ├── disk.py │ │ │ ├── image.py │ │ │ ├── instance.py │ │ │ ├── snapshot.py │ │ │ └── stack.py │ │ ├── instance_manager_factory.py │ │ ├── local/ │ │ │ ├── __init__.py │ │ │ ├── config/ │ │ │ │ ├── __init__.py │ │ │ │ ├── instance_config.py │ │ │ │ └── validation.py │ │ │ └── instance_manager.py │ │ └── remote/ │ │ ├── __init__.py │ │ ├── config/ │ │ │ ├── __init__.py │ │ │ ├── instance_config.py │ │ │ └── validation.py │ │ ├── helpers/ │ │ │ └── rsync.py │ │ └── instance_manager.py │ └── utils.py └── tests/ ├── __init__.py ├── container_config.py ├── helpers/ │ ├── __init__.py │ ├── cli.py │ └── spotty_cli.py └── providers/ ├── __init__.py ├── aws/ │ ├── __init__.py │ ├── commands/ │ │ ├── data/ │ │ │ └── test-project/ │ │ │ ├── ignored-dir/ │ │ │ │ ├── ignored-file │ │ │ │ └── included-file │ │ │ ├── ignored-file │ │ │ ├── local-file │ │ │ └── spotty.yaml │ │ ├── download.py │ │ └── sync.py │ ├── config/ │ │ ├── __init__.py │ │ ├── container_deployment.py │ │ ├── data/ │ │ │ ├── config-wo-mounts.yaml │ │ │ └── config1.yaml │ │ └── instance_config_validation.py │ └── project_resources/ │ ├── __init__.py │ ├── bucket.py │ └── key_pair.py ├── gcp/ │ └── config/ │ ├── __init__.py │ └── image_uri.py └── local/ ├── __init__.py ├── commands/ │ ├── __init__.py │ ├── data/ │ │ └── test-project/ │ │ └── spotty.yaml │ └── run.py └── config/ ├── __init__.py ├── container_deployment.py └── data/ └── config1.yaml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/FUNDING.yml ================================================ github: [apls777] ================================================ FILE: .github/workflows/generate-docs.yml ================================================ # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Generate Docs on: push: branches: - master jobs: update-doc: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v1 with: python-version: 3.6 - name: generate docs run: | cd docs pip install -r requirements.txt make html cd build/html touch .nojekyll echo "spotty.cloud" > CNAME git init git config --local user.email "github-bot@spotty.cloud" git config --local user.name "Spotty Dev Bot" git add . git commit -m "generated docs" -a - uses: ad-m/github-push-action@master with: github_token: ${{ secrets.BOT_GITHUB_TOKEN }} repository: spotty-cloud/website force: true directory: docs/build/html ================================================ FILE: .github/workflows/python-publish.yml ================================================ # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package on: release: types: [created] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.6' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist bdist_wheel twine upload dist/* ================================================ FILE: .gitignore ================================================ .idea/ build/ dist/ *.egg-info/ __pycache__/ todo ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to Spotty **Thank you for your interest in Spotty. Your contributions are highly welcome.** There are multiple ways of getting involved: - [Report a bug](#report-a-bug) - [Suggest a feature](#suggest-a-feature) - [Contribute code](#contribute-code) Below are a few guidelines we would like you to follow. If you need help, please reach out to us by opening an issue. ## Report a bug Reporting bugs is one of the best ways to contribute. Before creating a bug report, please check that an [issue](https://github.com/spotty-cloud/spotty/issues) reporting the same problem does not already exist. If there is such an issue, you may add your information as a comment. To report a new bug you should open an issue that summarizes the bug and set the label to "bug". If you want to provide a fix along with your bug report: that is great! In this case please send us a pull request as described in section [Contribute Code](#contribute-code). ## Suggest a feature To request a new feature you should open an [issue](https://github.com/spotty-cloud/spotty/issues/new) and summarize the desired functionality and its use case. Set the issue label to "feature". ## Contribute code This is a rough outline of what the workflow for code contributions looks like: - Check the list of open [issues](https://github.com/spotty-cloud/spotty/issues). Either assign an existing issue to yourself or create a new one that you would like to work on and discuss your ideas and use cases. It is always best to discuss your plans beforehand, to ensure that your contribution is in line with our goals for Spotty. - Fork the repository on GitHub - Create a topic branch from where you want to base your work. This is usually the master. - Make commits of logical units - Write good commit messages (see below) - Push your changes to a topic branch in your fork of the repository - Submit a pull request to [spotty-cloud/spotty](https://github.com/spotty-cloud/spotty) Thanks for your contributions! ### Commit messages Your commit messages ideally can answer two questions: what changed and why. The subject line should feature the "what" and the body of the commit should describe the "why". When creating a pull request, its comment should reference the corresponding issue ID. **Have fun and enjoy hacking!** ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2017 Oleg Polosin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ [![Documentation](https://img.shields.io/badge/documentation-reference-brightgreen.svg)](https://spotty.cloud) [![PyPI](https://img.shields.io/pypi/v/spotty.svg)](https://pypi.org/project/spotty/) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/spotty.svg) ![PyPI - License](https://img.shields.io/pypi/l/spotty.svg) Spotty drastically simplifies training of deep learning models on [AWS](https://aws.amazon.com/) and [GCP](https://cloud.google.com/): - it makes training on GPU instances as simple as training on your local machine - it automatically manages all necessary cloud resources including images, volumes, snapshots and SSH keys - it makes your model trainable in the cloud by everyone with a couple of commands - it uses [tmux](https://en.wikipedia.org/wiki/Tmux) to easily detach remote processes from their terminals - it saves you up to 70% of the costs by using [AWS Spot Instances](https://aws.amazon.com/ec2/spot/) and [GCP Preemtible VMs](https://cloud.google.com/preemptible-vms/) ## Documentation - See the [documentation page](https://spotty.cloud). - Read [this](https://medium.com/@apls/how-to-train-deep-learning-models-on-aws-spot-instances-using-spotty-8d9e0543d365) article on Medium for a real-world example. ## Installation Requirements: * Python >=3.6 * AWS CLI (see [Installing the AWS Command Line Interface](http://docs.aws.amazon.com/cli/latest/userguide/installing.html)) if you're using AWS * Google Cloud SDK (see [Installing Google Cloud SDK](https://cloud.google.com/sdk/install)) if you're using GCP Use [pip](http://www.pip-installer.org/en/latest/) to install or upgrade Spotty: $ pip install -U spotty ## Get Started 1. Prepare a `spotty.yaml` file and put it to the root directory of your project: - See the file specification [here](https://spotty.cloud/docs/user-guide/configuration-file.html). - Read [this](https://medium.com/@apls/how-to-train-deep-learning-models-on-aws-spot-instances-using-spotty-8d9e0543d365) article for a real-world example. 2. Start an instance: ```bash $ spotty start ``` It will run a Spot Instance, restore snapshots if any, synchronize the project with the running instance and start the Docker container with the environment. 3. Train a model or run notebooks. To connect to the running container via SSH, use the following command: ```bash $ spotty sh ``` It runs a [tmux](https://github.com/tmux/tmux/wiki) session, so you can always detach this session using __`Ctrl + b`__, then __`d`__ combination of keys. To be attached to that session later, just use the `spotty sh` command again. Also, you can run your custom scripts inside the Docker container using the `spotty run ` command. Read more about custom scripts in the documentation: [Configuration: "scripts" section](https://spotty.cloud/docs/configuration-file/#scripts-section-optional). ## Contributions Any feedback or contributions are welcome! Please check out the [guidelines](CONTRIBUTING.md). ## License [MIT License](LICENSE) ================================================ FILE: bin/spotty ================================================ #!/usr/bin/env python import sys import logging import spotty from spotty.cli import get_parser from spotty.commands.writers.output_writrer import OutputWriter parser = get_parser() args = sys.argv[1:] output = OutputWriter() # display the version if '-V' in args: output.write(spotty.__version__) sys.exit(0) # separate Spotty arguments from custom arguments custom_args = [] if '--' in args: dd_idx = args.index('--') custom_args = args[(dd_idx + 1):] args = args[:dd_idx] # parse arguments args = parser.parse_args(args) args.custom_args = custom_args # logging logging_level = logging.DEBUG if 'debug' in args and args.debug else logging.WARNING logging.basicConfig(level=logging_level, format='[%(levelname)s] %(message)s') if 'command' not in args: parser.print_help() sys.exit(1) # run a command try: args.command.run(args, output) except Exception as e: output.write('Error:\n' '------\n' '%s' % str(e)) sys.exit(1) ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = python -msphinx SPHINXPROJ = spotty SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=python -msphinx ) set SOURCEDIR=source set BUILDDIR=build set SPHINXPROJ=spotty if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The Sphinx module was not found. Make sure you have Sphinx installed, echo.then set the SPHINXBUILD environment variable to point to the full echo.path of the 'sphinx-build' executable. Alternatively you may add the echo.Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd ================================================ FILE: docs/requirements.txt ================================================ sphinx==3.1.2 recommonmark==0.6.0 sphinx-argparse==0.2.5 sphinx-rtd-theme==0.5.0 PyYAML schema chevron boto3 ================================================ FILE: docs/source/_static/favicon/browserconfig.xml ================================================ #00aba9 ================================================ FILE: docs/source/_static/favicon/site.webmanifest ================================================ { "name": "", "short_name": "", "icons": [ { "src": "/android-chrome-192x192.png", "sizes": "192x192", "type": "image/png" }, { "src": "/android-chrome-512x512.png", "sizes": "512x512", "type": "image/png" } ], "theme_color": "#ffffff", "background_color": "#ffffff", "display": "standalone" } ================================================ FILE: docs/source/_static/scripts.js ================================================ window.onload = function() { var links = document.querySelectorAll('a.external'); for(var i = 0; i < links.length; i++) { links[i].target = '_blank'; } } ================================================ FILE: docs/source/_static/styles.css ================================================ .wy-nav-content { max-width: 1280px; } .wy-side-nav-search { background: none; } .wy-menu-vertical header, .wy-menu-vertical p.caption { color: #e44859; } .wy-side-nav-search > a img.logo, .wy-side-nav-search .wy-dropdown > a img.logo { max-width: 75%; } .wy-breadcrumbs a.icon-home { color: #e44859; } .section#welcome-to-spotty-documentation { display: none; } ================================================ FILE: docs/source/conf.py ================================================ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # # spotty documentation build configuration file, created by # sphinx-quickstart on Fri Jul 17 16:00:08 2020. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # # import os # import sys # sys.path.insert(0, os.path.abspath('.')) import os import sys sys.path.insert(0, os.path.abspath('../..')) import spotty # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'recommonmark', 'sphinxarg.ext', 'sphinx_rtd_theme', 'sphinx.ext.autosectionlabel', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = { '.rst': 'restructuredtext', '.md': 'markdown', } # The master toctree document. master_doc = 'index' # General information about the project. project = 'spotty' copyright = '2020, Oleg Polosin' author = 'Oleg Polosin' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = spotty.__version__ # The full version, including alpha/beta/rc tags. release = spotty.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = [] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # to display double-dash (--) in epilogs of some Spotty commands smartquotes = False # Prefix document path to section labels, otherwise autogenerated labels would look like 'heading' # rather than 'path/to/file:heading' autosectionlabel_prefix_document = True # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { 'logo_only': True, 'style_external_links': True, 'collapse_navigation': False, 'titles_only': True, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { '**': [ 'about.html', 'navigation.html', 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', 'donate.html', ] } html_show_copyright = False html_show_sphinx = False html_show_sourcelink = False html_logo = '_static/images/logo_400x130_grey.png' html_favicon = '_static/favicon/favicon.ico' html_css_files = [ 'styles.css', ] html_js_files = [ 'scripts.js', ] # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'spottydoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'spotty.tex', 'spotty Documentation', 'Oleg Polosin', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'spotty', 'spotty Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'spotty', 'spotty Documentation', author, 'spotty', 'One line description of project.', 'Miscellaneous'), ] ================================================ FILE: docs/source/docs/cli/spotty-aws.rst ================================================ spotty aws ========== .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: aws ================================================ FILE: docs/source/docs/cli/spotty-download.rst ================================================ spotty download =============== .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: download ================================================ FILE: docs/source/docs/cli/spotty-exec.rst ================================================ spotty exec =========== .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: exec ================================================ FILE: docs/source/docs/cli/spotty-run.rst ================================================ spotty run ========== .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: run ================================================ FILE: docs/source/docs/cli/spotty-sh.rst ================================================ spotty sh ========= .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: sh ================================================ FILE: docs/source/docs/cli/spotty-start.rst ================================================ spotty start ============ .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: start ================================================ FILE: docs/source/docs/cli/spotty-stop.rst ================================================ spotty stop =========== .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: stop ================================================ FILE: docs/source/docs/cli/spotty-sync.rst ================================================ spotty sync =========== .. argparse:: :nodefaultconst: :ref: spotty.cli.get_parser :prog: spotty :path: sync ================================================ FILE: docs/source/docs/cli/spotty.rst ================================================ Spotty Command-line Interface ============================= .. argparse:: :nosubcommands: :nodefaultconst: :noepilog: :nodescription: :ref: spotty.cli.get_parser :prog: spotty .. toctree:: :maxdepth: 1 :caption: Sub-commands spotty-start spotty-stop spotty-sh spotty-sync spotty-download spotty-run spotty-exec .. toctree:: :maxdepth: 1 :caption: Custom provider sub-commands spotty-aws ================================================ FILE: docs/source/docs/providers/aws/caching-docker-image-on-an-ebs-volume.md ================================================ # Caching Docker Image on an EBS Volume You can cache images that you've built or downloaded from the internet on an EBS volume or in a snapshot. A configuration file has the "__dockerDataRoot__" parameter. It's a directory on the host OS where the Docker daemon will save all the images. Specify the `mountDir` directory for one of the instance volumes and set the `dockerDataRoot` parameter to the same value (or to a subdirectory of the `mountDir` directory). Also, consider changing a deletion policy for that volume to "__retain__", then the volume with the cache will be retained and the next time it just will be attached to the instance. Example: ```yaml # ... instances: - name: aws-1 provider: aws parameters: # ... dockerDataRoot: /docker volumes: # ... - name: docker parameters: size: 10 mountDir: /docker ``` ================================================ FILE: docs/source/docs/providers/aws/ebs-volumes-and-deletion-policies.md ================================================ # EBS Volumes and Deletion Policies By default, EBS volumes have names in the following format: `--`. But you can specify a custom name using the `volumeName` parameter. When you're starting an instance: 1. Spotty is looking for existing EBS volumes using their names. If a volume exists, it will be attached to the instance. 2. If not - Spotty will be looking for a snapshot with the same name. If the snapshot exists, the volume will be restored from that snapshot. 3. If neither snapshot nor volume with this name exists, new EBS volume will be created. When you're stopping the instance Spotty applies deletion policies for the volumes. There are 4 deletion policies that can be specified using the `deletionPolicy` parameter: - __Retain__: this is the default deletion policy. The volume will retain, a snapshot won't be created. - __CreateSnapshot__: Spotty will create a new snapshot every time you're stopping an instance, the old snapshot will be renamed. AWS uses incremental snapshots, so each new snapshot keeps only the data that was changed since the last snapshot made (see: [How Incremental Snapshots Work](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSSnapshots.html#how_snapshots_work)). - __UpdateSnapshot__: a new snapshot will be created and the old one will be deleted. - __Delete__: the volume will be deleted without creating a snapshot. All data on this volume will be lost. ================================================ FILE: docs/source/docs/providers/aws/faq.md ================================================ # FAQ ## How does Spotty choose the AWS Availability Zone where to run the instance? 1. If the AZ is specified in the configuration file, this AZ will be used to run the instance. 2. If the instance already has some EBS volumes created, Spotty will pick up the volumes' AZ. 3. Otherwise Spotty will let AWS choose an AZ. Automatically chosen AZ might not have the lowest Spot price, but in practice, it usually does. Spotty will raise an error if the AZ in the configuration file doesn't match AZs of the volumes or AZs of the volumes are different. ## Why an instance is launching too long? Most likely the instance cannot be launched because you're trying to launch a Spot instance and it cannot be fulfilled. You can try to change the region or availability zone, choose another type of the instance, or run an On-demand Instance by removing the `spotInstance` parameter or setting it to `false`. ## The instance is failed to start. Where can I find the logs? 1. If the CloudFormation stack failed when it was launching the instance itself, then you need to log in to your AWS Console and check CloudFormation logs there. 2. If the stack is failed after the instance is launched, then most likely the container is failed to start because of the startup commands. In this case, Spotty usually automatically downloads necessary logs to your local machine and shows where to find them. If that didn't happen, you can connect to the host OS using the following command: ```bash spotty sh -H ``` Then you can check the `cfn-init` logs to find out why the container is failed: ```bash sudo tail /var/log/cfn-init-cmd.log ``` ## How to ssh to a Spotty instance from a different machine? When you start an instance, Spotty creates an EC2 Key Pair and downloads a private key to the `~/.spotty/keys/aws` directory. If you want to have access to the instance from a different machine using the `spotty sh` or the `spotty run` commands, you need to copy the private key to that machine to the same directory. __Note:__ if you already have an EC2 Key Pair created for the project and the private key was saved on the machine A (where from an instance was launched the first time) and then you're running an instance for the same project from the machine B that doesn't have a private key in the `~/.spotty/keys/aws` directory, then the EC2 Key Pair will be recreated and the machine A will not be able to connect to instances because its private key doesn't match the EC2 Key Pair anymore. ================================================ FILE: docs/source/docs/providers/aws/instance-parameters.md ================================================ # Instance Parameters Instance parameters are part of the [configuration file], but for each provider they are different. Here you can find parameters for an AWS instance: - __`containerName`__ _(optional)_ - a name of the container from the `containers` section. Default value: `default`. - __`region`__ - AWS region where to run an instance (you can use command `spotty aws spot-prices` to find the cheapest region). - __`availabilityZone`__ _(optional)_ - AWS availability zone where to run an instance. If a zone is not specified, it will be chosen automatically. - __`subnetId`__ _(optional)_ - AWS subnet ID. If this parameter is set, the "availabilityZone" parameter should be set as well. If it's not specified, a default subnet will be used. - __`instanceType`__ - a type of the instance to run. You can find more information about types of GPU instances here: [Recommended GPU Instances](https://docs.aws.amazon.com/dlami/latest/devguide/gpu.html). - __`spotInstance`__ _(optional)_ - if set to `true`, runs a Spot instance instead of an On-demand instance, - __`amiName`__ _(optional)_ - a name of the AMI with NVIDIA Docker (default value is "SpottyAMI"). Use the `spotty aws create-ami` command to create it. This AMI will be used to run your application inside the Docker container. - __`amiId`__ _(optional)_ - ID of the AMI with NVIDIA Docker. This parameter can be used to run an instance using a shared Spotty AMI. - __`maxPrice`__ _(optional)_ - the maximum price per hour that you are willing to pay for a Spot Instance. By default, it's the On-demand price for the chosen instance type. Read more here: [Spot Instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-instances.html). - __`rootVolumeSize`__ _(optional)_ - size of the root volume in GB. The root volume will be destroyed once the instance is terminated. Use attached volumes to store the data you need to keep (see "volumes" parameter below). - __`dockerDataRoot`__ _(optional)_ - directory where Docker will store all downloaded and built images. Read more: [Caching Docker Image on an EBS Volume]. - __`volumes`__ _(optional)_ - the list of volumes to attach to the instance: - __`name`__ - a name of the volume. This name should match one of the container's `volumeMounts` to have this volume attached to the container's filesystem. - __`parameters`__ _(optional)_ - parameters of the volume: - __`type`__ _(optional)_ - the volume type. Supported types: "__gp2__", "__sc1__", "__st1__" and "__standard__". The default value is "gp2". Read more here: [Amazon EBS Volume Types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html). - __`size`__ _(optional)_ - size of the volume in GB. Size of the volume cannot be less than the size of the existing snapshot but can be increased. - __`deletionPolicy`__ _(optional)_ - what to do with the volume once the instance is terminated using the `spotty stop` command. Possible values include: "__Retain__" _(value by default)_, "__CreateSnapshot__", "__UpdateSnapshot__" and "__Delete__". Read more here: [EBS Volumes and Deletion Policies]. - __`volumeName`__ _(optional)_ - name of the EBS volume. The default name is "{project_name}-{instance_name}-{volume_name}". - __`mountDir`__ _(optional)_ - directory where the volume will be mounted on the instance. The default directory is "/mnt/{ebs_volume_name}". - __`ports`__ _(optional)_ - list of ports to open on the instance. For example: ```yaml ports: [6006, 8888] ``` It will open ports 6006 for TensorBoard and 8888 for Jupyter Notebook. - __`localSshPort`__ _(optional)_ - if this parameter is set, all the Spotty commands will create SSH connections with the instance using the IP address __127.0.0.1__ and the specified port. This can be useful in case when an instance doesn't have a public IP address and a jump-server is used for tunneling. - __`managedPolicyArns`__ _(optional)_ - a list of Amazon Resource Names (ARNs) of the IAM managed policies that you want to attach to the instance role. Read more about Managed Policies [here](https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies_managed-vs-inline.html). - __`instanceProfileArn`__ _(optional)_ - an Amazon Resource Name (ARN) of the IAM Instance Profile that you'd like to attach to the instance. Read more about Instance Profiles [here](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html). - __`commands`__ _(optional)_ - commands that should be run on the host OS before the container is started. For example, you could login to Amazon ECR to pull a Docker image from there ([Deep Learning Containers Images](https://docs.aws.amazon.com/dlami/latest/devguide/deep-learning-containers-images.html)): ```yaml commands: | $(aws ecr get-login --no-include-email --region us-east-2 --registry-ids 763104351884) ``` [configuration file]: [Caching Docker Image on an EBS Volume]: [EBS Volumes and Deletion Policies]: ================================================ FILE: docs/source/docs/providers/aws/overview.rst ================================================ AWS Provider Overview ===================== .. toctree:: instance-parameters ebs-volumes-and-deletion-policies caching-docker-image-on-an-ebs-volume faq ================================================ FILE: docs/source/docs/providers/gcp/account-preparation.md ================================================ # GCP Account Preparation 1. [Create a project](https://console.cloud.google.com/projectcreate) if you don't have one already. 2. Enable the [Deployment Manager API](https://console.cloud.google.com/apis/library/deploymentmanager.googleapis.com) for the created project. 3. Enable the [Runtime Configuration API](https://console.developers.google.com/apis/library/runtimeconfig.googleapis.com) for the created project. 4. [Create a service account](https://console.cloud.google.com/iam-admin/serviceaccounts/create). 5. Go to the [IAM page](https://console.cloud.google.com/iam-admin/iam) and add the following roles to the created service account: 1. _Compute Admin_ 2. _Storage Admin_ 3. _Deployment Manager Editor_ 4. _Cloud RuntimeConfig Admin_ 6. Make sure you have a quota to run GPU instances: 1. Go to the quotas page in "IAM & admin" and filter the list of services by setting the Metric field to __"GPUs (all regions)"__: [https://console.cloud.google.com/iam-admin/quotas?metric=GPUs%20(all%20regions)](https://console.cloud.google.com/iam-admin/quotas?metric=GPUs%20(all%20regions)). 2. Check the limit for the __"Compute Engine API"__ service. If it's a zero, select the service and click the __"[+] EDIT QUOTAS"__ button at the top of the page. 3. Set a new quota limit to 1 or more and submit the request. 7. [Install Google Cloud SDK](https://cloud.google.com/sdk/install). 8. Before using Spotty commands like `spotty start`, `spotty run` and others, make sure that the `GOOGLE_APPLICATION_CREDENTIALS` environmental variable is set up and contains the path to your service account key file: ```bash export GOOGLE_APPLICATION_CREDENTIALS="/path/to/the/service/account/key/file.json" ``` ================================================ FILE: docs/source/docs/providers/gcp/caching-docker-image-on-a-disk.md ================================================ # Caching Docker Image on a Disk You can cache images that you've built or downloaded from the internet on a disk that you attach to the instance. A configuration file has the "__dockerDataRoot__" parameter. It's a directory on the host OS where the Docker daemon will save all the images. Specify the `moundDir` directory for one of the instance volumes and set the `dockerDataRoot` parameter to the same value (or to a subdirectory of the `moundDir` directory). Example: ```yaml # ... instances: - name: gcp-1 provider: gcp parameters: # ... dockerDataRoot: /docker volumes: # ... - name: docker parameters: size: 10 mountDir: /docker ``` ================================================ FILE: docs/source/docs/providers/gcp/disks-and-deletion-policies.md ================================================ # Disks and Deletion Policies By default, disks have names in the following format: `--`. But you can specify a custom name using the `diskName` parameter. When you're starting an instance: 1. Spotty is looking for existing disks using their names. If a disk exists, it will be attached to the instance. 2. If not - Spotty will be looking for a snapshot with the same name. If the snapshot exists, the disk will be restored from that snapshot. 3. If neither snapshot nor disk with this name exists, a new disk will be created. __Note:__ Deletion Policies for the GCP provider are not implemented yet, so, regardless of the `deletionPolicy` parameter value, created disks will retain when the instance is terminated. ================================================ FILE: docs/source/docs/providers/gcp/instance-parameters.md ================================================ # Instance Parameters Instance parameters are part of the [configuration file], but for each provider they are different. Here you can find parameters for a GCP instance: - __`containerName`__ _(optional)_ - a name of the container from the `containers` section. Default value: `default`. - __`zone`__ - GCP zone where to run an instance. - __`machineType`__ - a type of the instance to run. You can find a list of predefined machine types here: [Machine Types](https://cloud.google.com/compute/docs/machine-types). If you in doubt what to use, just go for `n1-standard-1`. To attach GPUs to the selected machine type, use the `gpu` parameter (see the details below). - __`gpu`__ _(optional)_ - _a dictionary with keys `type` and `count`_: - __`type`__ - a type of GPU to attach to the instance. Read more about GPUs and their availabily in different zones here: [GPUs on Compute Engine](https://cloud.google.com/compute/docs/gpus/). - __`count`__ _(optional)_ - a number of GPUs that should be attached to the instance. The default value is 1. See here a number of GPUs that you can attach to different machine types: [Valid numbers of GPUs for each machine type](https://cloud.google.com/ml-engine/docs/tensorflow/using-gpus#gpu-compatibility-table). - __`preemptibleInstance`__ _(optional)_ - if set to `true`, runs a preemptible instance instead of an on-demand instance. __Note:__ be aware that GCP terminates preemptible instances in 24 hours. Read more about Preemptible VMs [here](https://cloud.google.com/compute/docs/instances/preemptible). - __`imageName`__ _(optional)_ - a name of the image with NVIDIA Docker in the current GCP project. You can use the `spotty gcp create-image` command to create it. By default, the command will create an image with the name "spotty". This image will be used to run your application inside the Docker container. If you didn't create your own image, see the behaviour of the `imageUrl` parameter. - __`imageUrl`__ _(optional)_ - a URL of the image with NVIDIA Docker. You can use this parameter to work with an image from another GCP project. If this parameter is not specified and you didn't create your own image (see the `imageName` parameter), Spotty will be using the `projects/spotty-cloud/global/images/family/spotty` image provided by the Spotty project. - __`bootDiskSize`__ _(optional)_ - size of the root volume in GB. The root volume will be destroyed once the instance is terminated. Use attached volumes to store the data that you need to keep (see the `volumes` parameter below). - __`dockerDataRoot`__ _(optional)_ - directory where Docker will store all downloaded and built images. Read more: [Caching Docker Image on a Disk]. - __`volumes`__ _(optional)_ - the list of volumes to attach to the instance: - __`name`__ - a name of the volume. This name should match one of the container's `volumeMounts` to have this volume attached to the container's filesystem. - __`parameters`__ _(optional)_ - parameters of the volume: - __`size`__ _(optional)_ - size of the disk in GB. Size of the disk cannot be less than the size of the existing snapshot but can be increased. - __`deletionPolicy`__ _(optional)_ - what to do with the disk once the instance is terminated using the `spotty stop` command. Possible values include: "__Retain__" _(value by default)_, "__CreateSnapshot__", "__UpdateSnapshot__" and "__Delete__". Read more: [Disks and Deletion Policies]. __(!) Note:__ Deletion Policies are not implemented yet, so created disks will always retain. - __`diskName`__ _(optional)_ - name of the disk. The default name is "{project_name}-{instance_name}-{volume_name}". - __`mountDir`__ _(optional)_ - directory where the disk will be mounted on the instance. The default directory is "/mnt/{disk_name}". - __`ports`__ _(optional)_ - list of ports to open on the instance. For example: ```yaml ports: [6006, 8888] ``` It will open ports 6006 for TensorBoard and 8888 for Jupyter Notebook. - __`localSshPort`__ _(optional)_ - if this parameter is set, all the Spotty commands will create SSH connections with the instance using the IP address __127.0.0.1__ and the specified port. This can be useful in case when an instance doesn't have a public IP address and a jump-server is used for tunneling. - __`commands`__ _(optional)_ - commands that should be run on the host OS before the container is started. [configuration file]: [Caching Docker Image on a Disk]: [Disks and Deletion Policies]: ================================================ FILE: docs/source/docs/providers/gcp/overview.rst ================================================ GCP Provider Overview ===================== .. toctree:: account-preparation instance-parameters disks-and-deletion-policies caching-docker-image-on-a-disk ================================================ FILE: docs/source/docs/providers/local/instance-parameters.md ================================================ # Instance Parameters Instance parameters are part of the [configuration file], but for each provider they are different. Here you can find parameters for a local instance: - __`containerName`__ _(optional)_ - a name of the container from the `containers` section. Default value: `default`. - __`volumes`__ _(optional)_ - the list of volumes to attach to the instance: - __`name`__ - a name of the volume. This name should match one of the container's `volumeMounts` to have this volume attached to the container's filesystem. - __`parameters`__ _(optional)_ - parameters of the volume: - __`path`__ _(optional)_ - a path on a local instance that should be mounted to the container. [configuration file]: ================================================ FILE: docs/source/docs/providers/local/overview.rst ================================================ Local Provider Overview ======================= .. toctree:: instance-parameters ================================================ FILE: docs/source/docs/providers/remote/instance-parameters.md ================================================ # Instance Parameters Instance parameters are part of the [configuration file], but for each provider they are different. Here you can find parameters for a remote instance: - __`containerName`__ _(optional)_ - a name of the container from the `containers` section. Default value: `default`. - __`volumes`__ _(optional)_ - the list of volumes to attach to the instance: - __`name`__ - a name of the volume. This name should match one of the container's `volumeMounts` to have this volume attached to the container's filesystem. - __`parameters`__ _(optional)_ - parameters of the volume: - __`path`__ _(optional)_ - a path on a remote instance that should be mounted to the container. [configuration file]: ================================================ FILE: docs/source/docs/providers/remote/overview.rst ================================================ Remote Provider Overview ======================== .. toctree:: instance-parameters ================================================ FILE: docs/source/docs/user-guide/configuration-file.md ================================================ # Spotty Configuration File By default, Spotty is looking for a `spotty.yaml` file in the root directory of the project. This file describes parameters of a remote instance and an environment for the project. Here is a basic example of such file for AWS: ```yaml project: name: my-project-name syncFilters: - exclude: - .git/* - .idea/* - '*/__pycache__/*' containers: - projectDir: /workspace/project image: tensorflow/tensorflow:latest-gpu-py3-jupyter env: PYTHONPATH: /workspace/project ports: # TensorBoard - containerPort: 6006 hostPort: 6006 # Jupyter - containerPort: 8888 hostPort: 8888 volumeMounts: - name: workspace mountPath: /workspace instances: - name: aws-1 provider: aws parameters: region: eu-west-1 instanceType: p2.xlarge ports: [6006, 8888] volumes: - name: workspace parameters: size: 50 scripts: tensorboard: | tensorboard --bind_all --port 6006 --logdir /workspace/project/training jupyter: | jupyter notebook --allow-root --ip 0.0.0.0 --notebook-dir=/workspace/project ``` Instance parameters are different for each provider: - [Local Provider Instance Parameters] - [Remote Provider Instance Parameters] - [AWS Provider Instance Parameters] - [GCP Provider Instance Parameters] ## Available Parameters Configuration file consists of 4 sections: `project`, `containers`, `instances` and `scripts`. ### __`project`__ section The `project` section contains the following parameters: - __`name`__ - the name of your project. It will be used to create an S3 bucket and a CloudFormation stack to run an instance. - __`syncFilters`__ _(optional)_ - filters to skip some directories or files during synchronization. By default, all project files will be synced with the instance. Example: ```yaml syncFilters: - exclude: - .idea/* - .git/* - data/* - include: - data/test/* - exclude: - data/test/dump.json ``` It will skip ".idea/", ".git/" and "data/" directories except the "data/test/" directory. All files from the "data/test/" directory will be synced with the instance except the "data/test/dump.json" file. You can read more about filters here: [Use of Exclude and Include Filter](https://docs.aws.amazon.com/cli/latest/reference/s3/index.html#use-of-exclude-and-include-filters). ### __`containers`__ section The `containers` section contains a list of containers where each container is described with the following parameters: - __`name`__ - a name of the container. You can associate containers with the instances using the `containerName` parameter in the instance configuration. Default value: `default`. - __`projectDir`__ - a directory inside the container where the local project will be copied. If it's a subdirectory of a container volume, the project will be located on that volume, otherwise, the data will be lost once the instance is terminated. - __`image`__ _(optional)_ - the name of the Docker image that contains the environment for your project. For example, you could use [TensorFlow image for GPU](https://hub.docker.com/r/tensorflow/tensorflow/) (`tensorflow/tensorflow:latest-gpu-py3-jupyter`). It already contains NumPy, SciPy, scikit-learn, pandas, Jupyter Notebook and TensorFlow itself. If you need to use your own image, you can specify the path to your Dockerfile in the __`file`__ parameter (see below), or push your image to the [Docker Hub](https://hub.docker.com/). - __`file`__ _(optional)_ - relative path to your custom Dockerfile. __Note:__ Spotty uses the directory with the Dockerfile as its build context, so make sure it doesn't contain gigabytes of irrelevant data (keep the Dockerfile in a separate directory or use the `.dockerignore` file). Otherwise, you may get an out-of-space error because Docker copies the entire build context to the Docker daemon during the build. Read more here: ["docker build" command](https://docs.docker.com/engine/reference/commandline/build/). __Example:__ if you use TensorFlow and need to download your dataset from S3, you could install [AWS CLI](https://github.com/aws/aws-cli) on top of the original TensorFlow image. Just create a `Dockerfile` in the `docker/` directory of your project: ```dockerfile FROM tensorflow/tensorflow:latest-gpu-py3-jupyter RUN pip install awscli ``` Then set the `file` parameter to `docker/Dockerfile`. - __`runAsHostUser`__ _(optional)_ - if set to `true`, the container will be run with the host user ID and group ID, - __`volumeMounts`__ _(optional)_ - where to mount instance volumes into the container's filesystem. Each element of a list has the following parameters: - __`name`__ - this must match the name of an instance volume. - __`mountPath`__ - a path within the container at which the volume should be mounted. - __`workingDir`__ _(optional)_ - working directory for your custom scripts (see "scripts" section below), - __`env`__ _(optional)_ - a dictionary with environmental variables that will be available in the container, - __`hostNetwork`__ _(optional)_ - if set to `true`, the Docker container will be run with the host network, - __`ports`__ _(optional)_ - container ports that should be published to the host. Each element of a list contains the following parameters: - __`containerPort`__ - a container port, - __`hostPort`__ _(optional)_ - a host port. By default, the container port will be published on a random host port. - __`commands`__ _(optional)_ - commands which should be performed once your container is started. For example, you could download your datasets from an S3 bucket to the project directory (see "project" section): ```yaml commands: | aws s3 sync s3://my-bucket/datasets/my-dataset /workspace/project/data ``` - __`runtimeParameters`__ _(optional)_ - a list of additional parameters for the container runtime. For example: ```yaml runtimeParameters: ['--privileged', '--shm-size', '2G'] ``` ### __`instances`__ section The `instances` section contains a list of instances where each instance is described with the following parameters: - __`name`__ - a name of the instance. Use this name to manage the instance with the commands like "spotty start" or "spotty stop". Also Spotty uses this name in the names of AWS and GCP resources. - __`provider`__ - a provider for the instance. At the moment Spotty supports 4 providers: - "__local__" - runs containers using the Docker installed on the local machine, - "__remote__" - runs containers on a remote machine through SSH, - "__aws__" - Amazon Web Services EC2 instances, - "__gcp__" - Google Cloud Platform VMs. - __`parameters`__ - parameters of the instance. These parameters are different for each provider: - [Local Provider Instance Parameters] - [Remote Provider Instance Parameters] - [AWS Provider Instance Parameters] - [GCP Provider Instance Parameters] ### __`scripts`__ section The `scripts` section contains custom scripts which can be run with the `spotty run ` command. The following example defines 2 scripts: `jupyter` - to run Jupyter server and `train` - to start training a model: ```yaml scripts: jupyter: | jupyter notebook --allow-root --ip 0.0.0.0 --notebook-dir=/workspace/project train: | if [ -n "{{MODEL}}" ]; then python /workspace/project/model/train.py --model-name {{MODEL}} else echo "The MODEL parameter is required." fi ``` To start Jupyter simply run: ```bash spotty run jupyter ``` It will start Jupyter server on the remote instance inside a tmux session. Jupyter will be available on the port specified in the container configuration (see the example on top of the page). Copy an authentication token from the command output and use the __`Ctrl + b`__, then __`d`__ combination of keys to detach the tmux session - Jupyter will keep running. You also can write parametrized scripts. For example, the `train` script contains the `MODEL` parameter. So you could run your training script with different model names: ```bash spotty run train -p MODEL=my-model ``` Use the __`Ctrl + b`__, then __`d`__ combination of keys to detached tmux session - the script will keep running. You can come back to the running script the following ways: - either use the same command again - you will be reattached to the existing tmux session, - or connect to the instance using the `spotty sh` command and then use the __`Ctrl + b`__, then __`s`__ combination of keys to switch into the right tmux session. __Note:__ don't forget to use the "|" character for multi-line scripts, otherwise the YAML parser will merge multiple lines together. [Local Provider Instance Parameters]: [Remote Provider Instance Parameters]: [AWS Provider Instance Parameters]: [GCP Provider Instance Parameters]: ================================================ FILE: docs/source/docs/user-guide/getting-started.md ================================================ # Getting Started ## Installation Use [pip](http://www.pip-installer.org/en/latest/) to install or upgrade Spotty: ```bash pip install -U spotty ``` Python >=3.6 is required. Also, depending on the use case, some additional software is needed: * __Docker__ if you want to run containers locally: [Get Docker](https://docs.docker.com/get-docker/) * __AWS CLI__ if you're going to use AWS: [Installing the AWS Command Line Interface](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) * __Google Cloud SDK__ if you're going to use GCP: [Installing Google Cloud SDK](https://cloud.google.com/sdk/install) ## Prepare a configuration file Prepare a `spotty.yaml` file and put it to the root directory of your project: - See the file specification and an example here: [Spotty Configuration File]. - Read [this](https://medium.com/@apls/how-to-train-deep-learning-models-on-aws-spot-instances-using-spotty-8d9e0543d365) article for a real-world example. ## Start an instance Use the following command to launch an instance with the Docker container: ```bash spotty start ``` If you're using AWS, it will create EBS volumes if needed, start an instance, upload project files and start the Docker container with the environment for your project. ## Train your models or run notebooks To connect to the running container via SSH, use the following command: ```bash spotty sh ``` It runs a [tmux](https://github.com/tmux/tmux/wiki) session, so you can always detach this session using __`Ctrl + b`__, then __`d`__ combination of keys. To be attached to that session later, just use the `spotty sh` command again. Also, you can run custom scripts inside the Docker container using the `spotty run ` command. Read more about custom scripts in the documentation: [Configuration File: "scripts" section]. [Spotty Configuration File]: [Configuration File: "scripts" section]: ================================================ FILE: docs/source/docs/user-guide/installation.md ================================================ # Installation Use [pip](http://www.pip-installer.org/en/latest/) to install or upgrade Spotty: ```bash pip install -U spotty ``` Python >=3.6 is required. Also, depending on the use case, some additional software is needed: * __Docker__ if you want to run containers locally: [Get Docker](https://docs.docker.com/get-docker/) * __AWS CLI__ if you're going to use AWS: [Installing the AWS Command Line Interface](http://docs.aws.amazon.com/cli/latest/userguide/installing.html) * __Google Cloud SDK__ if you're going to use GCP: [Installing Google Cloud SDK](https://cloud.google.com/sdk/install) ================================================ FILE: docs/source/index.rst ================================================ .. raw:: html :file: main.html Welcome to Spotty Documentation =============================== .. toctree:: :hidden: :maxdepth: 2 :caption: User Guide docs/user-guide/getting-started docs/user-guide/installation docs/user-guide/configuration-file docs/cli/spotty .. toctree:: :hidden: :maxdepth: 2 :caption: Providers Local Provider Remote Provider AWS Provider GCP Provider ================================================ FILE: docs/source/main.html ================================================

An Open-source Tool for Training
Deep Learning Models in the Cloud

Run Training on AWS and GCP Instances
Spotty makes training of deep learning models on AWS and GCP instances as simple as training on your local machine.
Reduce Training Costs
Spotty can save you up to 70% of the costs by using AWS Spot Instances or GCP Preemtible VMs.
Share Your Model with Everyone
Spotty makes your model trainable locally or in the cloud by everyone with a couple of commands.
Develop with Docker
Spotty helps you to develop your model locally using a Docker container, so the environment can be set up by anyone and anywhere with a single command.
================================================ FILE: setup.cfg ================================================ [metadata] description-file = README.md ================================================ FILE: setup.py ================================================ #!/usr/bin/env python import os import re from setuptools import setup, find_packages def get_version(): root_dir = os.path.abspath(os.path.dirname(__file__)) with open(os.path.join(root_dir, 'spotty', '__init__.py')) as f: content = f.read() version_match = re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', content, re.M) if not version_match: raise RuntimeError('Unable to find version string.') return version_match.group(1) def get_description(): readme_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'README.md')) with open(readme_path, encoding='utf-8') as f: description = f.read() return description setup(name='spotty', version=get_version(), description='Training deep learning models on AWS and GCP instances', url='https://github.com/spotty-cloud/spotty', author='Oleg Polosin', author_email='apls777@gmail.com', license='MIT', long_description=get_description(), long_description_content_type='text/markdown', packages=find_packages(exclude=['tests*']), package_data={ 'spotty.deployment.container.docker.scripts': ['data/*', 'data/*/*'], 'spotty.providers.aws.cfn_templates.instance': ['data/*', 'data/*/*'], 'spotty.providers.aws.cfn_templates.instance_profile': ['data/*', 'data/*/*'], 'spotty.providers.gcp.dm_templates.instance': ['data/*', 'data/*/*'], }, scripts=['bin/spotty'], install_requires=[ 'boto3>=1.9.0', 'google-api-python-client>=1.7.8', 'google-cloud-storage>=1.15.0', 'cfn_flip', # to work with CloudFormation templates 'schema', 'chevron', ], tests_require=['moto'], test_suite='tests', classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Science/Research', 'Intended Audience :: Developers', 'Intended Audience :: System Administrators', 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', ]) ================================================ FILE: spotty/__init__.py ================================================ __version__ = '1.3.4' ================================================ FILE: spotty/cli.py ================================================ import argparse from typing import List, Type import pkg_resources from spotty.commands.abstract_command import AbstractCommand from spotty.commands.aws import AwsCommand from spotty.commands.download import DownloadCommand from spotty.commands.exec import ExecCommand from spotty.commands.run import RunCommand from spotty.commands.sh import ShCommand from spotty.commands.start import StartCommand from spotty.commands.status import StatusCommand from spotty.commands.stop import StopCommand from spotty.commands.sync import SyncCommand def get_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument('-V', '--version', action='store_true', help='Display the version of the Spotty') command_classes = [ StartCommand, StopCommand, StatusCommand, ShCommand, RunCommand, ExecCommand, SyncCommand, DownloadCommand, AwsCommand, ] + _get_custom_commands() # add commands to the parser add_subparsers(parser, command_classes) return parser def add_subparsers(parser: argparse.ArgumentParser, command_classes: List[Type[AbstractCommand]]): """Adds commands to the parser.""" subparsers = parser.add_subparsers() for command_class in command_classes: command = command_class() subparser = subparsers.add_parser(command.name, help=command.description, description=command.description) subparser.set_defaults(command=command, parser=subparser) command.configure(subparser) def _get_custom_commands() -> List[Type[AbstractCommand]]: """Returns custom commands that integrated through entry points.""" return [entry_point.load() for entry_point in pkg_resources.iter_entry_points('spotty.commands')] ================================================ FILE: spotty/commands/__init__.py ================================================ ================================================ FILE: spotty/commands/abstract_command.py ================================================ from abc import ABC, abstractmethod from argparse import Namespace, ArgumentParser from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter class AbstractCommand(ABC): """Abstract class for a Spotty sub-command.""" @property @abstractmethod def name(self) -> str: """The sub-command name.""" raise NotImplementedError @property def description(self) -> str: """The sub-command description. It will be displayed in the help text.""" return '' def configure(self, parser: ArgumentParser): """Adds arguments for the sub-command.""" parser.add_argument('-d', '--debug', action='store_true', help='Show debug messages') @abstractmethod def run(self, args: Namespace, output: AbstractOutputWriter): """Runs the sub-command. Args: args: Arguments provided by argparse. output: Output writer. Raises: ValueError: If command's arguments can't be processed. """ raise NotImplementedError ================================================ FILE: spotty/commands/abstract_config_command.py ================================================ from abc import abstractmethod from typing import List from argparse import Namespace, ArgumentParser from spotty.config.config_utils import load_config from spotty.deployment.abstract_instance_manager import AbstractInstanceManager from spotty.providers.instance_manager_factory import InstanceManagerFactory from spotty.commands.abstract_command import AbstractCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter class AbstractConfigCommand(AbstractCommand): """Abstract class for a Spotty sub-command that needs to use a project's configuration.""" @abstractmethod def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): raise NotImplementedError def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-c', '--config', type=str, default=None, help='Path to the configuration file') parser.add_argument('instance_name', metavar='INSTANCE_NAME', nargs='?', type=str, help='Instance name') def run(self, args: Namespace, output: AbstractOutputWriter): # get project configuration project_config = load_config(args.config) # get instance configuration instance_id = self._get_instance_id(project_config.instances, args.instance_name, output) instance_config = project_config.instances[instance_id] # create an instance manger instance_manager = InstanceManagerFactory.get_instance(project_config, instance_config) # run the command self._run(instance_manager, args, output) @staticmethod def _get_instance_id(instances: List[dict], instance_name: str, output: AbstractOutputWriter): if not instance_name: if len(instances) > 1: # ask user to choose the instance output.write('Select the instance:\n') with output.prefix(' '): for i, instance_config in enumerate(instances): output.write('[%d] %s' % (i + 1, instance_config['name'])) output.write() try: num = int(input('Enter number: ')) output.write() except ValueError: num = 0 if num < 1 or num > len(instances): raise ValueError('The value from 1 to %d was expected.' % len(instances)) instance_id = num - 1 else: instance_id = 0 else: # get instance ID by name instance_ids = [i for i, instance in enumerate(instances) if instance['name'] == instance_name] if not instance_ids: raise ValueError('Instance "%s" not found in the configuration file' % instance_name) instance_id = instance_ids[0] return instance_id ================================================ FILE: spotty/commands/abstract_provider_command.py ================================================ from abc import abstractmethod from argparse import Namespace, ArgumentParser import sys from spotty.commands.abstract_command import AbstractCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter class AbstractProviderCommand(AbstractCommand): """Abstract class for a provider sub-command.""" @property @abstractmethod def commands(self) -> list: """Returns a list of the provider sub-commands.""" raise NotImplementedError def configure(self, parser: ArgumentParser): from spotty.cli import add_subparsers add_subparsers(parser, self.commands) def run(self, args: Namespace, output: AbstractOutputWriter): """If the command is called, it just displays a list of available sub-commands.""" args.parser.print_help() sys.exit(1) ================================================ FILE: spotty/commands/aws.py ================================================ from spotty.commands.abstract_provider_command import AbstractProviderCommand from spotty.providers.aws.commands.clean_logs import CleanLogsCommand from spotty.providers.aws.commands.spot_prices import SpotPricesCommand class AwsCommand(AbstractProviderCommand): name = 'aws' description = 'AWS commands' commands = [ SpotPricesCommand, CleanLogsCommand, ] ================================================ FILE: spotty/commands/download.py ================================================ from argparse import Namespace, ArgumentParser from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.errors.nothing_to_do import NothingToDoError from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class DownloadCommand(AbstractConfigCommand): name = 'download' description = 'Download files from the running instance' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-i', '--include', metavar='PATTERN', action='append', type=str, required=True, help='Download all files that matches the specified pattern (see Include Filters ' 'for the "aws s3 sync" command). Paths must be relative to your project directory, ' 'they cannot be absolute.') parser.add_argument('--dry-run', action='store_true', help='Show files to be downloaded') def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): # check that the instance is started if not instance_manager.is_running(): raise InstanceNotRunningError(instance_manager.instance_config.name) filters = [ {'exclude': ['*']}, {'include': args.include} ] dry_run = args.dry_run with output.prefix('[dry-run] ' if dry_run else ''): try: instance_manager.download(filters, output, dry_run) except NothingToDoError as e: output.write(str(e)) return output.write('Done') ================================================ FILE: spotty/commands/exec.py ================================================ import sys from argparse import ArgumentParser, Namespace from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.utils.cli import shlex_join from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.errors.nothing_to_do import NothingToDoError from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class ExecCommand(AbstractConfigCommand): name = 'exec' description = 'Execute a command in the container' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-i', '--interactive', action='store_true', help='Pass STDIN to the container') parser.add_argument('-t', '--tty', action='store_true', help='Allocate a pseudo-TTY') parser.add_argument('-u', '--user', type=str, default=None, help='Container username or UID (format: [:])') parser.add_argument('--no-sync', action='store_true', help='Don\'t sync the project before running the script') # add the "double-dash" argument to the usage message parser.prog = 'spotty exec' parser.usage = parser.format_usage()[7:-1] + ' -- COMMAND [args...]\n' parser.epilog = 'The double dash (--) separates the command that you want to execute inside the container ' \ 'from the Spotty arguments.' def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): # check that the command is provided if not args.custom_args: raise ValueError('Use the double-dash ("--") to split Spotty arguments from the command that should be ' 'executed inside the container.') # check that the instance is started if not instance_manager.is_running(): raise InstanceNotRunningError(instance_manager.instance_config.name) # sync the project with the instance if not args.no_sync: try: instance_manager.sync(output) except NothingToDoError: pass # generate a "docker exec" command command = shlex_join(args.custom_args) command = instance_manager.container_commands.exec(command, interactive=args.interactive, tty=args.tty, user=args.user) # execute the command on the host OS exit_code = instance_manager.exec(command, tty=args.tty) sys.exit(exit_code) ================================================ FILE: spotty/commands/run.py ================================================ from argparse import ArgumentParser, Namespace from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.utils.commands import get_script_command, get_log_command, get_tmux_session_command, get_bash_command from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.errors.nothing_to_do import NothingToDoError from spotty.deployment.utils.user_scripts import parse_script_parameters, render_script from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class RunCommand(AbstractConfigCommand): name = 'run' description = 'Run a custom script from the configuration file inside the container' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('script_name', metavar='SCRIPT_NAME', type=str, help='Script name') parser.add_argument('-u', '--user', type=str, default=None, help='Container username or UID (format: [:])') parser.add_argument('-s', '--session-name', type=str, default=None, help='tmux session name') parser.add_argument('-l', '--logging', action='store_true', help='Log the script outputs to a file') parser.add_argument('-p', '--parameter', metavar='PARAMETER=VALUE', action='append', type=str, default=[], help='Set a value for the script parameter (format: PARAMETER=VALUE). This ' 'argument can be used multiple times to set several parameters. Parameters can be ' 'used in the script as Mustache variables (for example: {{PARAMETER}}).') parser.add_argument('--no-sync', action='store_true', help='Don\'t sync the project before running the script') # add the "double-dash" argument to the usage message parser.prog = 'spotty run' parser.usage = parser.format_usage()[7:-1] + ' [-- args...]\n' parser.epilog = 'The double dash (--) separates custom arguments that you can pass to the script ' \ 'from the Spotty arguments.' def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): # check that the script exists script_name = args.script_name scripts = instance_manager.project_config.scripts if script_name not in scripts: raise ValueError('Script "%s" is not defined in the configuration file.' % script_name) # replace script parameters params = parse_script_parameters(args.parameter) script_content = render_script(scripts[script_name], params) # check that the instance is started if not instance_manager.is_running(): raise InstanceNotRunningError(instance_manager.instance_config.name) # sync the project with the instance if not args.no_sync: try: instance_manager.sync(output) except NothingToDoError: pass # get a command to run the script with "docker exec" script_command = get_script_command(script_name, script_content, script_args=args.custom_args, logging=args.logging) command = instance_manager.container_commands.exec(script_command, interactive=True, tty=True, user=args.user) # wrap the command with the tmux session if instance_manager.use_tmux: session_name = args.session_name if args.session_name else 'spotty-script-%s' % script_name default_command = instance_manager.container_commands.exec(get_bash_command(), interactive=True, tty=True, user=args.user) command = get_tmux_session_command(command, session_name, script_name, default_command=default_command, keep_pane=True) # execute command on the host OS instance_manager.exec(command) ================================================ FILE: spotty/commands/sh.py ================================================ from argparse import ArgumentParser, Namespace from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.utils.commands import get_bash_command, get_tmux_session_command from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class ShCommand(AbstractConfigCommand): name = 'sh' description = 'Get a shell to the container or to the instance itself' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-u', '--user', type=str, default=None, help='Container username or UID (format: [:])') parser.add_argument('-H', '--host-os', action='store_true', help='Connect to the host OS instead of the Docker ' 'container') parser.add_argument('-s', '--session-name', type=str, default=None, help='tmux session name') parser.add_argument('-l', '--list-sessions', action='store_true', help='List all tmux sessions managed by the ' 'instance') def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): # check that the instance is started if not instance_manager.is_running(): raise InstanceNotRunningError(instance_manager.instance_config.name) if args.list_sessions: if not instance_manager.use_tmux: raise ValueError('The "%s" provider doesn\'t support tmux.' % instance_manager.instance_config.provider_name) # a command to list existing tmux session on the host OS command = 'tmux ls; echo ""' else: if args.host_os: # get a command to open a login shell on the host OS session_name = args.session_name if args.session_name else 'spotty-sh-host' shell_command = '$SHELL' command = get_tmux_session_command(shell_command, session_name, keep_pane=False) \ if instance_manager.use_tmux else shell_command else: # get a command to run bash inside the docker container command = instance_manager.container_commands.exec(get_bash_command(), interactive=True, tty=True, user=args.user) # wrap the command with the tmux session if instance_manager.use_tmux: session_name = args.session_name if args.session_name else 'spotty-sh-container' command = get_tmux_session_command(command, session_name, default_command=command, keep_pane=False) # execute command on the host OS instance_manager.exec(command) ================================================ FILE: spotty/commands/start.py ================================================ from argparse import Namespace, ArgumentParser from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class StartCommand(AbstractConfigCommand): name = 'start' description = 'Start an instance with a container' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-C', '--container', action='store_true', help='Starts or restarts container on the ' 'running instance') parser.add_argument('--dry-run', action='store_true', help='Displays the steps that would be performed ' 'using the specified command without actually ' 'running them') def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): dry_run = args.dry_run if args.container: # check that the instance is started if not instance_manager.is_running(): raise InstanceNotRunningError(instance_manager.instance_config.name) # start a container on the running instance instance_manager.start_container(output, dry_run=dry_run) if not dry_run: instance_name = '' if len(instance_manager.project_config.instances) > 1: instance_name = ' ' + instance_manager.instance_config.name output.write('\nContainer was successfully started.\n' 'Use the "spotty sh%s" command to connect to the container.\n' % instance_name) else: # start the instance with output.prefix('[dry-run] ' if dry_run else ''): instance_manager.start(output, dry_run) if not dry_run: instance_name = '' if len(instance_manager.project_config.instances) > 1: instance_name = ' ' + instance_manager.instance_config.name output.write('\n%s\n' '\nUse the "spotty sh%s" command to connect to the container.\n' % (instance_manager.get_status_text(), instance_name)) ================================================ FILE: spotty/commands/status.py ================================================ from argparse import Namespace from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class StatusCommand(AbstractConfigCommand): name = 'status' description = 'Print information about the instance' def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): output.write(instance_manager.get_status_text()) ================================================ FILE: spotty/commands/stop.py ================================================ from argparse import Namespace from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class StopCommand(AbstractConfigCommand): name = 'stop' description = 'Terminate running instance and apply deletion policies for the volumes' # TODO: the "spotty start" command should restart the instance and the container if the instance was shutdown # def configure(self, parser: ArgumentParser): # super().configure(parser) # parser.add_argument('-s', '--shutdown', action='store_true', # help='Shutdown the instance without terminating it. Deletion policies for the volumes ' # 'won\'t be applied.') def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): instance_manager.stop(only_shutdown=False, output=output) ================================================ FILE: spotty/commands/sync.py ================================================ from argparse import Namespace, ArgumentParser from spotty.commands.abstract_config_command import AbstractConfigCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.errors.nothing_to_do import NothingToDoError from spotty.deployment.abstract_instance_manager import AbstractInstanceManager class SyncCommand(AbstractConfigCommand): name = 'sync' description = 'Synchronize the project with the running instance' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('--dry-run', action='store_true', help='Show files to be synced') def _run(self, instance_manager: AbstractInstanceManager, args: Namespace, output: AbstractOutputWriter): # check that the instance is started if not instance_manager.is_running(): raise InstanceNotRunningError(instance_manager.instance_config.name) dry_run = args.dry_run with output.prefix('[dry-run] ' if dry_run else ''): try: instance_manager.sync(output, dry_run) except NothingToDoError as e: output.write(str(e)) return output.write('Done') ================================================ FILE: spotty/commands/writers/__init__.py ================================================ ================================================ FILE: spotty/commands/writers/abstract_output_writrer.py ================================================ from abc import ABC, abstractmethod from contextlib import contextmanager class AbstractOutputWriter(ABC): def __init__(self): self._prefix = '' self._ignore_prefix = False @abstractmethod def _write(self, msg: str, newline: bool = True): raise NotImplementedError def write(self, msg: str = '', newline: bool = True): if not self._ignore_prefix: msg = '\n'.join([self._prefix + line for line in msg.split('\n')]) self._write(msg, newline=newline) self._ignore_prefix = not newline @contextmanager def prefix(self, prefix): self._prefix += prefix yield self._prefix = self._prefix[:-len(prefix)] ================================================ FILE: spotty/commands/writers/null_output_writrer.py ================================================ from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter class NullOutputWriter(AbstractOutputWriter): def _write(self, msg: str, newline: bool = True): """Does nothing.""" pass ================================================ FILE: spotty/commands/writers/output_writrer.py ================================================ from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter class OutputWriter(AbstractOutputWriter): def _write(self, msg: str, newline: bool = True): """Prints messages to STDOUT.""" print(msg, end=('\n' if newline else ''), flush=True) ================================================ FILE: spotty/config/__init__.py ================================================ ================================================ FILE: spotty/config/abstract_instance_config.py ================================================ import os from abc import ABC, abstractmethod from collections import OrderedDict, namedtuple from typing import List from spotty.config.container_config import ContainerConfig from spotty.config.project_config import ProjectConfig from spotty.config.tmp_dir_volume import TmpDirVolume from spotty.config.validation import DEFAULT_CONTAINER_NAME, is_subdir from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.deployment.abstract_cloud_instance.file_structure import INSTANCE_SPOTTY_TMP_DIR, CONTAINERS_TMP_DIR from spotty.utils import filter_list VolumeMount = namedtuple('VolumeMount', ['name', 'host_path', 'mount_path', 'mode', 'hidden']) class AbstractInstanceConfig(ABC): def __init__(self, instance_config: dict, project_config: ProjectConfig): self._project_config = project_config # set instance parameters self._name = instance_config['name'] self._provider_name = instance_config['provider'] self._params = self._validate_instance_params(instance_config['parameters']) # get container config container_configs = filter_list(project_config.containers, 'name', self.container_name) if not container_configs: raise ValueError('Container configuration with the name "%s" not found.' % self.container_name) self._container_config = ContainerConfig(container_configs[0]) # get volumes self._volumes = self._get_volumes() # get container volume mounts self._volume_mounts = self._get_volume_mounts(self._volumes) # get the host project directory self._host_project_dir = self._get_host_project_dir(self._volume_mounts) @abstractmethod def _validate_instance_params(self, params: dict) -> dict: """Validates instance parameters and fill missing ones with the default values.""" raise NotImplementedError @abstractmethod def _get_instance_volumes(self) -> List[AbstractInstanceVolume]: """Returns specific to the provider volumes that should be mounted on the host OS.""" raise NotImplementedError @property def project_config(self) -> ProjectConfig: return self._project_config @property def container_config(self) -> ContainerConfig: return self._container_config @property @abstractmethod def user(self) -> str: raise NotImplementedError @property def name(self) -> str: """Name of the instance.""" return self._name @property def provider_name(self): """Provider name.""" return self._provider_name @property def container_name(self) -> str: return self._params['containerName'] if self._params['containerName'] else DEFAULT_CONTAINER_NAME @property def full_container_name(self) -> str: """A container name that is used in the "docker run" command.""" return ('spotty-%s-%s-%s' % (self.project_config.project_name, self.name, self.container_name)).lower() @property def docker_data_root(self) -> str: """Data root directory for Docker daemon.""" return self._params['dockerDataRoot'] @property def local_ssh_port(self) -> int: """Local SSH port to connect to the instance (in case of a tunnel).""" return self._params['localSshPort'] @property def commands(self) -> str: """Commands that should be run once an instance is started.""" return self._params['commands'] @property def host_project_dir(self): """Project directory on the host OS.""" return self._host_project_dir @property def volumes(self) -> List[AbstractInstanceVolume]: return self._volumes @property def volume_mounts(self) -> List[VolumeMount]: return self._volume_mounts @property def dockerfile_path(self): """Dockerfile path on the host OS.""" dockerfile_path = self.container_config.file if dockerfile_path: dockerfile_path = self.host_project_dir + '/' + dockerfile_path return dockerfile_path @property def docker_context_path(self): """Docker build's context path on the host OS.""" dockerfile_path = self.dockerfile_path if not dockerfile_path: return '' return os.path.dirname(dockerfile_path) @property def host_container_dir(self): """A temporary directory on the host OS that contains container-related files and directories.""" return '%s/%s' % (CONTAINERS_TMP_DIR, self.full_container_name) @property def host_logs_dir(self): """A directory mainly for the "spotty run" command logs.""" return self.host_container_dir + '/logs' @property def host_volumes_dir(self): """A directory with temporary volumes. If there is a Volume Mount in the configuration file that doesn't have a corresponding instance volume, a temporary directory will be created and attached to the container. """ return self.host_container_dir + '/volumes' def _get_volumes(self) -> List[AbstractInstanceVolume]: """Returns volumes that should be mounted on the host OS.""" volumes = self._get_instance_volumes() # create temporary volumes for the volume mounts that don't have corresponding # volumes in the instance configuration instance_volume_names = set(volume.name for volume in volumes) for container_volume in self.container_config.volume_mounts: if container_volume['name'] not in instance_volume_names: volumes.append(TmpDirVolume(volume_config={ 'name': container_volume['name'], 'parameters': {'path': '%s/%s' % (self.host_volumes_dir, container_volume['name'])} })) return volumes def _get_volume_mounts(self, volumes: List[AbstractInstanceVolume]) \ -> List[VolumeMount]: """Returns container volume mounts and a path to the project directory on the host OS.""" # get mount directories for the volumes host_paths = OrderedDict([(volume.name, volume.host_path) for volume in volumes]) # get container volumes mapping volume_mounts = [] for container_volume in self.container_config.volume_mounts: volume_mounts.append(VolumeMount( name=container_volume['name'], host_path=host_paths[container_volume['name']], mount_path=container_volume['mountPath'], mode='rw', hidden=False, )) return volume_mounts def _get_host_project_dir(self, volume_mounts: List[VolumeMount]) -> str: """Returns the host project directory.""" host_project_dir = None for volume_mount in sorted(volume_mounts, key=lambda x: len(x.mount_path), reverse=True): if is_subdir(self.container_config.project_dir, volume_mount.mount_path): # the project directory is a subdirectory of a Volume Mount directory project_subdir = os.path.relpath(self.container_config.project_dir, volume_mount.mount_path) host_project_dir = os.path.normpath(volume_mount.host_path + '/' + project_subdir) break # this should not be the case as the volume mount for the project directory should be added automatically # if it doesn't exist in the configuration assert host_project_dir is not None, 'A volume mount that contains the project directory not found.' return host_project_dir ================================================ FILE: spotty/config/abstract_instance_volume.py ================================================ from abc import ABC, abstractmethod class AbstractInstanceVolume(ABC): def __init__(self, volume_config: dict): self._name = volume_config['name'] self._params = self._validate_volume_parameters(volume_config['parameters']) @abstractmethod def _validate_volume_parameters(self, params: dict) -> dict: raise NotImplementedError @property def name(self) -> str: """Unique name of the volume that will be used for the deployment.""" return self._name @property @abstractmethod def host_path(self) -> str: """A path on the host OS that will be mounted to the container.""" raise NotImplementedError @property @abstractmethod def title(self) -> str: """A title for the volume type. It will be used to display information about the volumes during the deployment. """ raise NotImplementedError @property def deletion_policy_title(self) -> str: """A title for the volume's deletion policy. It will be used to display information about the volumes during the deployment. """ return '' ================================================ FILE: spotty/config/config_utils.py ================================================ import os from collections import namedtuple import yaml from spotty.config.project_config import ProjectConfig from spotty.config.validation import DEFAULT_CONTAINER_NAME DEFAULT_CONFIG_FILENAME = 'spotty.yaml' OVERRIDE_CONFIG_FILENAME = 'spotty.override.yaml' def load_config(config_path: str = None) -> ProjectConfig: # get project directory if not config_path: config_path = DEFAULT_CONFIG_FILENAME if os.path.isabs(config_path): config_abs_path = config_path else: config_abs_path = os.path.abspath(os.path.join(os.getcwd(), config_path)) if not os.path.exists(config_abs_path): raise ValueError('Configuration file "%s" not found.' % config_path) # get the project directory project_dir = os.path.dirname(config_abs_path) # read the config config = _read_yaml(config_abs_path) # update the config if an override config exists if os.path.basename(config_abs_path) == DEFAULT_CONFIG_FILENAME: override_config_abs_path = os.path.join(project_dir, OVERRIDE_CONFIG_FILENAME) if os.path.isfile(override_config_abs_path): override_config = _read_yaml(override_config_abs_path) config = _merge_configs(config, override_config) # get project configuration project_config = ProjectConfig(config, project_dir) return project_config def _read_yaml(file_path: str): """Returns content of the YAML file.""" with open(file_path, 'r') as f: res = yaml.safe_load(f) return res def _merge_configs(orig_config, override_config): """Merges original config with the override config.""" MergeRule = namedtuple('MergeRule', ['key', 'merge_key', 'default_value', 'has_default_value']) merge_rules = [MergeRule( key='containers', merge_key='name', default_value=DEFAULT_CONTAINER_NAME, has_default_value=True, ), MergeRule( key='instances', merge_key='name', default_value=None, has_default_value=False, )] # validate and merge lists by keys for rule in merge_rules: if override_config and (rule.key in orig_config) and (rule.key in override_config): if not isinstance(orig_config[rule.key], list): raise ValueError('The "%s" key in the config must be a list.' % rule.key) if not isinstance(override_config[rule.key], list): raise ValueError('The "%s" key in the override config must be a list.' % rule.key) # convert lists to dictionaries dicts_to_merge = [] for list_to_merge in [orig_config[rule.key], override_config[rule.key]]: dict_to_merge = {} for item in list_to_merge: if not isinstance(item, dict): raise ValueError('Each item of the "%s" list must be a dictionary.' % rule.key) if rule.merge_key in item: merge_value = item[rule.merge_key] elif rule.has_default_value: merge_value = rule.default_value else: raise ValueError('Each item of the "%s" list must contain the "%s" field.' % (rule.key, rule.merge_key)) if merge_value in dict_to_merge: raise ValueError('Each item of the "%s" list must have a unique "%s" value.' % (rule.key, rule.merge_key)) dict_to_merge[merge_value] = item dicts_to_merge.append(dict_to_merge) # merge lists merged_dict = _update_dict(*dicts_to_merge) orig_config[rule.key] = [{**item, rule.merge_key: key} for key, item in merged_dict.items()] del override_config[rule.key] # merge the rest of the override config config = _update_dict(orig_config, override_config) return config def _update_dict(d, u): if not isinstance(u, dict): return d if not isinstance(d, dict): return u for k, v in u.items(): if isinstance(d, dict): if isinstance(v, dict): d[k] = _update_dict(d.get(k, {}), v) else: d[k] = u[k] else: d = {k: u[k]} return d ================================================ FILE: spotty/config/container_config.py ================================================ from typing import List from spotty.config.validation import is_subdir PROJECT_VOLUME_MOUNT_NAME = '.project' class ContainerConfig(object): def __init__(self, container_config: dict): self._config = container_config self._volume_mounts = self._get_volume_mounts() @property def name(self) -> str: return self._config['name'] @property def project_dir(self) -> str: return self._config['projectDir'] @property def image(self) -> str: return self._config['image'] @property def file(self) -> str: return self._config['file'] @property def run_as_host_user(self) -> str: return self._config['runAsHostUser'] @property def volume_mounts(self) -> list: return self._volume_mounts @property def commands(self) -> str: return self._config['commands'] @property def working_dir(self) -> str: """Working directory for the Docker container.""" working_dir = self._config['workingDir'] if not working_dir: working_dir = self._config['projectDir'] return working_dir @property def env(self) -> dict: return self._config['env'] @property def host_network(self) -> bool: return self._config['hostNetwork'] @property def ports(self) -> List[dict]: return self._config['ports'] @property def runtime_parameters(self) -> list: return self._config['runtimeParameters'] def _get_volume_mounts(self): """Returns container volume mounts from the configuration and adds the project volume mount if necessary.""" volume_mounts = self._config['volumeMounts'] # check if the project directory is a sub-directory of one of the volume mounts project_has_volume = False for volume_mount in volume_mounts: if is_subdir(self.project_dir, volume_mount['mountPath']): project_has_volume = True break # if it's not, then add new volume mount if not project_has_volume: volume_mounts.insert(0, { 'name': PROJECT_VOLUME_MOUNT_NAME, 'mountPath': self.project_dir, }) return volume_mounts ================================================ FILE: spotty/config/host_path_volume.py ================================================ import os from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.config.validation import validate_host_path_volume_parameters class HostPathVolume(AbstractInstanceVolume): TYPE_NAME = 'HostPath' def __init__(self, volume_config: dict, base_dir: str = None): super().__init__(volume_config) self._base_dir = base_dir def _validate_volume_parameters(self, params: dict) -> dict: return validate_host_path_volume_parameters(params) @property def title(self): return 'HostPath volume' @property def name(self): return self._name @property def deletion_policy_title(self) -> str: return '' @property def host_path(self) -> str: """A path on the host OS that will be mounted to the container.""" path = os.path.expanduser(self._params['path']) if not os.path.isabs(path): if self._base_dir is not None: path = os.path.join(self._base_dir, path) else: raise ValueError('Use absolute path for the "%s" volume.' % self.name) return path ================================================ FILE: spotty/config/project_config.py ================================================ from spotty.config.validation import validate_basic_config class ProjectConfig(object): def __init__(self, config: dict, project_dir: str): # validate the config config = validate_basic_config(config) self._project_dir = project_dir self._config = config @property def project_dir(self) -> str: return self._project_dir @property def project_name(self) -> str: return self._config['project']['name'] @property def sync_filters(self) -> list: return self._config['project']['syncFilters'] @property def containers(self) -> list: return self._config['containers'] @property def instances(self) -> list: return self._config['instances'] @property def scripts(self) -> dict: return self._config['scripts'] ================================================ FILE: spotty/config/tmp_dir_volume.py ================================================ from spotty.config.host_path_volume import HostPathVolume class TmpDirVolume(HostPathVolume): @property def title(self): return 'temporary directory' @property def deletion_policy_title(self) -> str: return '' ================================================ FILE: spotty/config/validation.py ================================================ import os from typing import List from schema import Schema, And, Use, Optional, Or, Regex, Hook, SchemaError, SchemaForbiddenKeyError DEFAULT_CONTAINER_NAME = 'default' def validate_basic_config(data): container = And( { Optional('name', default=DEFAULT_CONTAINER_NAME): And(str, Regex(r'^[\w-]+$')), 'projectDir': And(str, And(os.path.isabs, error='Use an absolute path when specifying the project directory'), Use(lambda x: x.rstrip('/')) ), Optional('image', default=''): And(str, len), Optional('file', default=''): And(str, # TODO: a proper regex that the filename is valid Regex(r'^[\w\.\/@-]*$', error='Invalid name for a Dockerfile'), And(lambda x: not x.endswith('/'), error='Invalid name for a Dockerfile'), And(lambda x: not os.path.isabs(x), error='Path to the Dockerfile should be relative to the ' 'project\'s root directory.'), ), Optional('runAsHostUser', default=False): bool, Optional('volumeMounts', default=[]): (And( [{ 'name': And(Or(int, str), Use(str), Regex(r'^[\w-]+$')), 'mountPath': And( str, And(os.path.isabs, error='Use an absolute path when specifying a mount directory'), Use(lambda x: x.rstrip('/')), ), }], And(lambda x: is_unique_value(x, 'name'), error='Each volume mount must have a unique name.'), And(lambda x: not has_prefix([(volume['mountPath'] + '/') for volume in x]), error='Volume mount paths cannot be prefixes for each other.'), )), Optional('workingDir', default=''): And(str, And(os.path.isabs, error='Use an absolute path when specifying a ' 'working directory'), Use(lambda x: x.rstrip('/')) ), Optional('env', default={}): { And(str, Regex(r'^[a-zA-Z_]+[a-zA-Z0-9_]*$')): str, }, Optional('hostNetwork', default=False): bool, Optional('ports', default=[]): [{ 'containerPort': And(int, lambda x: 0 < x < 65536), Optional('hostPort', default=None): And(int, lambda x: 0 < x < 65536), }], Optional('commands', default=''): str, # TODO: allow to use only certain runtime parameters Optional('runtimeParameters', default=[]): And([str], Use(lambda x: [p.strip() for p in x])), }, And(lambda x: x['image'] or x['file'], error='Either "image" or "file" should be specified.'), And(lambda x: not (x['image'] and x['file']), error='"image" and "file" cannot be specified together.'), And(lambda x: not (x['hostNetwork'] and x['ports']), error='Published ports and the host network mode cannot be used together.'), ) schema = Schema({ 'project': { 'name': And(str, Regex(r'^[a-zA-Z0-9][a-zA-Z0-9-]{,26}[a-zA-Z0-9]$')), Optional('syncFilters', default=[]): And( [And( { Optional('exclude'): [And(str, len, And(lambda x: '**' not in x, error='Use single asterisks ("*") in sync filters'))], Optional('include'): [And(str, len, And(lambda x: '**' not in x, error='Use single asterisks ("*") in sync filters'))], }, And(lambda x: x, error='Either "exclude" or "include" filter should be specified.'), And(lambda x: not ('exclude' in x and 'include' in x), error='"exclude" and "include" filters should be specified as different list items.'), )], error='"project.syncFilters" field must be a list.', ) }, WrongKey('container', error='Use "containers" field instead of "container".'): object, Optional('containers', default=[]): And( [container], And(lambda x: is_unique_value(x, 'name'), error='Each container must have a unique name.'), error='"containers" field must be a list.', ), WrongKey('instance', error='Use "instances" field instead of "instance".'): object, 'instances': And( [{ 'name': And(Or(int, str), Use(str), Regex(r'^[\w-]+$')), 'provider': str, Optional('parameters', default={}): { And(str, Regex(r'^[\w]+$')): object, } }], And(lambda x: len(x), error='At least one instance must be specified in the configuration file.'), And(lambda x: is_unique_value(x, 'name'), error='Each instance must have a unique name.'), ), Optional('scripts', default={}): { And(str, Regex(r'^[\w-]+$')): And(str, len), }, }) return validate_config(schema, data) def validate_host_path_volume_parameters(params: dict): schema = Schema({ 'path': And(str, Use(lambda x: x.rstrip('/'))), }) return validate_config(schema, params) def get_instance_parameters_schema(instance_parameters: dict, default_volume_type: str, instance_checks: list = None, volumes_checks: list = None): if not instance_checks: instance_checks = [] if not volumes_checks: volumes_checks = [] schema = Schema(And( { **instance_parameters, Optional('containerName', default=None): And(str, Regex(r'^[\w-]+$')), Optional('dockerDataRoot', default=''): And( str, And(os.path.isabs, error='Use an absolute path when specifying a Docker data root directory'), Use(lambda x: x.rstrip('/')), ), Optional('volumes', default=[]): And( [{ 'name': And(Or(int, str), Use(str), Regex(r'^[\w-]+$')), Optional('type', default=default_volume_type): str, Optional('parameters', default={}): { And(str, Regex(r'^[\w]+$')): object, }, }], And(lambda x: is_unique_value(x, 'name'), error='Each instance volume must have a unique name.'), *volumes_checks, ), Optional('localSshPort', default=None): Or(None, And(int, lambda x: 0 < x < 65536)), Optional('commands', default=''): str, }, And(lambda x: not x['dockerDataRoot'] or any([True for v in x['volumes'] if v['parameters'].get('mountDir') and is_subdir(x['dockerDataRoot'], v['parameters']['mountDir'])]), error='The "mountDir" of one of the volumes must be a prefix for the "dockerDataRoot" path.'), *instance_checks )) return schema def is_unique_value(x: List[dict], key): """Returns "True" if all values of the key in the list of dictionaries are unique.""" return len(x) == len(set([v[key] for v in x])) def has_prefix(x: list): """Returns "True" if some value in the list is a prefix for another value in this list.""" for val in x: if len(list(filter(val.startswith, x))) > 1: return True return False def is_subdir(subdir_path, dir_path): """Returns "True" if it's the second path parameter is a subdirectory of the first path parameter.""" return (subdir_path.rstrip('/') + '/').startswith(dir_path.rstrip('/') + '/') def validate_config(schema: Schema, config): try: validated = schema.validate(config) except SchemaError as e: raise ValueError('Validation error: ' + (e.errors[-1] if e.errors[-1] else e.autos[-1])) return validated class WrongKey(Hook): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs, handler=self.raise_error) def raise_error(self, key, *args): raise SchemaForbiddenKeyError(self._error) ================================================ FILE: spotty/configuration.py ================================================ import os def get_spotty_config_dir(): """Spotty configuration directory.""" path = os.path.join(os.path.expanduser('~'), '.spotty') if not os.path.isdir(path): os.makedirs(path, mode=0o755, exist_ok=True) return path def get_spotty_keys_dir(provider_name: str): """"Spotty keys directory.""" path = os.path.join(get_spotty_config_dir(), 'keys', provider_name) if not os.path.isdir(path): os.makedirs(path, mode=0o755, exist_ok=True) return path ================================================ FILE: spotty/deployment/__init__.py ================================================ ================================================ FILE: spotty/deployment/abstract_cloud_instance/__init__.py ================================================ ================================================ FILE: spotty/deployment/abstract_cloud_instance/abstract_bucket_manager.py ================================================ from abc import ABC from spotty.deployment.abstract_cloud_instance.resources.abstract_bucket import AbstractBucket class AbstractBucketManager(ABC): def __init__(self, project_name: str): self._project_name = project_name @property def project_name(self) -> str: return self._project_name def get_bucket(self) -> AbstractBucket: raise NotImplementedError def create_bucket(self) -> AbstractBucket: raise NotImplementedError ================================================ FILE: spotty/deployment/abstract_cloud_instance/abstract_cloud_instance_manager.py ================================================ import logging from abc import ABC, abstractmethod from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.project_config import ProjectConfig from spotty.deployment.abstract_cloud_instance.abstract_data_transfer import AbstractDataTransfer from spotty.deployment.abstract_cloud_instance.abstract_instance_deployment import AbstractInstanceDeployment from spotty.deployment.abstract_cloud_instance.abstract_bucket_manager import AbstractBucketManager from spotty.deployment.abstract_cloud_instance.errors.bucket_not_found import BucketNotFoundError from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.deployment.abstract_ssh_instance_manager import AbstractSshInstanceManager class AbstractCloudInstanceManager(AbstractSshInstanceManager, ABC): def __init__(self, project_config: ProjectConfig, instance_config: dict): super().__init__(project_config, instance_config) self._bucket_manager = self._get_bucket_manager() self._data_transfer = self._get_data_transfer() self._instance_deployment = self._get_instance_deployment() @abstractmethod def _get_bucket_manager(self) -> AbstractBucketManager: """Returns an bucket manager.""" raise NotImplementedError @abstractmethod def _get_data_transfer(self) -> AbstractDataTransfer: """Returns a data transfer object.""" raise NotImplementedError @abstractmethod def _get_instance_deployment(self) -> AbstractInstanceDeployment: """Returns an instance deployment manager.""" raise NotImplementedError @property def bucket_manager(self) -> AbstractBucketManager: """Returns a bucket manager.""" return self._bucket_manager @property def data_transfer(self) -> AbstractDataTransfer: """Returns a data transfer object.""" return self._data_transfer @property def instance_deployment(self) -> AbstractInstanceDeployment: """Returns an instance deployment manager.""" return self._instance_deployment def is_running(self) -> bool: """Checks if the instance is running.""" instance = self.instance_deployment.get_instance() return instance and instance.is_running def start(self, output: AbstractOutputWriter, dry_run=False): # make sure the Dockerfile exists self._check_dockerfile_exists() if not dry_run: # check if the instance is already running instance = self.instance_deployment.get_instance() if instance: if instance.is_running: print('Instance is already running. Are you sure you want to restart it?') res = input('Type "y" to confirm: ') if res != 'y': raise ValueError('The operation was cancelled.') # terminating the instance to make EBS volumes available (the stack will be deleted later) output.write('Terminating the instance... ', newline=False) instance.terminate() output.write('DONE') elif instance.is_stopped: # TODO: restart the instance if it stopped pass # create or get existing bucket for the project bucket_name = None try: bucket_name = self.bucket_manager.get_bucket().name except BucketNotFoundError: if not dry_run: bucket_name = self.bucket_manager.create_bucket().name output.write('Bucket "%s" was created.' % bucket_name) # deploy the instance self.instance_deployment.deploy( container_commands=self.container_commands, bucket_name=bucket_name, data_transfer=self.data_transfer, output=output, dry_run=dry_run, ) def stop(self, only_shutdown: bool, output: AbstractOutputWriter): if only_shutdown: output.write('Shutting down the instance... ', newline=False) self.instance_deployment.get_instance().stop() output.write('DONE') else: # delete the stack and apply deletion policies self.instance_deployment.delete(output) def clean(self, output: AbstractOutputWriter): pass def sync(self, output: AbstractOutputWriter, dry_run=False): # get the project bucket name bucket_name = self.bucket_manager.get_bucket().name # sync the project with the S3 bucket output.write('Syncing the project with the bucket...') self.data_transfer.upload_local_to_bucket(bucket_name, dry_run=dry_run) if not dry_run: # sync the S3 bucket with the instance output.write('Syncing the bucket with the instance...') remote_cmd = self.data_transfer.get_download_bucket_to_instance_command( bucket_name=bucket_name, use_sudo=(not self.instance_config.container_config.run_as_host_user), ) logging.debug('Remote sync command: ' + remote_cmd) # execute the command on the host OS exit_code = self.exec(remote_cmd) if exit_code != 0: raise ValueError('Failed to download files from the bucket to the instance') def download(self, download_filters: list, output: AbstractOutputWriter, dry_run=False): # get the project bucket name bucket_name = self.bucket_manager.get_bucket().name # sync files from the instance to a temporary S3 directory output.write('Uploading files from the instance to the bucket...') remote_cmd = self.data_transfer.get_upload_instance_to_bucket_command( bucket_name=bucket_name, download_filters=download_filters, use_sudo=(not self.instance_config.container_config.run_as_host_user), dry_run=dry_run, ) logging.debug('Remote sync command: ' + remote_cmd) # execute the command on the host OS exit_code = self.exec(remote_cmd) if exit_code != 0: raise ValueError('Failed to upload files from the instance to the bucket') if not dry_run: # sync the project with the S3 bucket output.write('Downloading files from the bucket to local...') self.data_transfer.download_bucket_to_local(bucket_name=bucket_name, download_filters=download_filters) @property def ssh_host(self): """Returns an IP address that will be used for SSH connections.""" if self._instance_config.local_ssh_port: return '127.0.0.1' # get a public IP address of the running instance instance = self.instance_deployment.get_instance() if not instance or not instance.is_running: raise InstanceNotRunningError(self.instance_config.name) instance_ip_address = instance.public_ip_address if instance.public_ip_address else instance.private_ip_address if not instance_ip_address: raise ValueError('Instance IP address not found') return instance_ip_address @property def ssh_port(self) -> int: if self._instance_config.local_ssh_port: return self._instance_config.local_ssh_port return 22 @property def use_tmux(self) -> bool: return True ================================================ FILE: spotty/deployment/abstract_cloud_instance/abstract_data_transfer.py ================================================ from abc import ABC, abstractmethod class AbstractDataTransfer(ABC): def __init__(self, local_project_dir: str, host_project_dir: str, sync_filters: list, instance_name: str): self._instance_name = instance_name self._local_project_dir = local_project_dir self._host_project_dir = host_project_dir self._sync_filters = sync_filters @property def instance_name(self): return self._instance_name @property @abstractmethod def scheme_name(self) -> str: raise NotImplementedError def _get_bucket_project_path(self, bucket_name: str) -> str: """A bucket path where the project files are located.""" return '%s://%s/project' % (self.scheme_name, bucket_name) def _get_bucket_downloads_path(self, bucket_name: str) -> str: """A bucket path where the downloaded files are located.""" return '%s://%s/download/instance-%s' % (self.scheme_name, bucket_name, self.instance_name) @abstractmethod def upload_local_to_bucket(self, bucket_name: str, dry_run: bool = False): """Uploads files from local to the bucket.""" raise NotImplementedError @abstractmethod def download_bucket_to_local(self, bucket_name: str, download_filters: list): """Downloads files from the bucket to local.""" raise NotImplementedError @abstractmethod def get_download_bucket_to_instance_command(self, bucket_name: str, use_sudo: bool = False) -> str: """A remote command to download files from the bucket to the instance.""" raise NotImplementedError @abstractmethod def get_upload_instance_to_bucket_command(self, bucket_name: str, download_filters: list, use_sudo: bool = False, dry_run: bool = False) -> str: """A remote command to upload files from the instance to the bucket.""" raise NotImplementedError ================================================ FILE: spotty/deployment/abstract_cloud_instance/abstract_instance_deployment.py ================================================ from abc import abstractmethod, ABC from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.abstract_instance_config import AbstractInstanceConfig from spotty.deployment.abstract_cloud_instance.abstract_data_transfer import AbstractDataTransfer from spotty.deployment.abstract_cloud_instance.resources.abstract_instance import AbstractInstance from spotty.deployment.container.abstract_container_commands import AbstractContainerCommands class AbstractInstanceDeployment(ABC): def __init__(self, instance_config: AbstractInstanceConfig): self._instance_config = instance_config @property def instance_config(self) -> AbstractInstanceConfig: return self._instance_config @abstractmethod def get_instance(self) -> AbstractInstance: """Returns information about the instance it it exists.""" raise NotImplementedError @abstractmethod def deploy(self, container_commands: AbstractContainerCommands, bucket_name: str, data_transfer: AbstractDataTransfer, output: AbstractOutputWriter, dry_run: bool = False): """Deploys or redeploys the instance.""" raise NotImplementedError @abstractmethod def delete(self, output: AbstractOutputWriter): """Deletes the stack with the instance and applies deletion policies for the volumes.""" raise NotImplementedError ================================================ FILE: spotty/deployment/abstract_cloud_instance/errors/__init__.py ================================================ ================================================ FILE: spotty/deployment/abstract_cloud_instance/errors/bucket_not_found.py ================================================ class BucketNotFoundError(Exception): def __init__(self): super().__init__('Bucket for the project not found.') ================================================ FILE: spotty/deployment/abstract_cloud_instance/file_structure.py ================================================ """ INSTANCE FILE STRUCTURE """ # a base temporary directory on an instance INSTANCE_SPOTTY_TMP_DIR = '/tmp/spotty' # a base directory for container-related files and directories CONTAINERS_TMP_DIR = INSTANCE_SPOTTY_TMP_DIR + '/containers' # a base directory for instance-related files and directories INSTANCE_DIR = INSTANCE_SPOTTY_TMP_DIR + '/instance' # helper scripts INSTANCE_SCRIPTS_DIR = INSTANCE_DIR + '/scripts' # instance startup scripts INSTANCE_STARTUP_SCRIPTS_DIR = INSTANCE_SCRIPTS_DIR + '/startup' # a path to the script that attaches user to the container CONTAINER_BASH_SCRIPT_PATH = INSTANCE_SCRIPTS_DIR + '/container_bash.sh' ================================================ FILE: spotty/deployment/abstract_cloud_instance/resources/__init__.py ================================================ ================================================ FILE: spotty/deployment/abstract_cloud_instance/resources/abstract_bucket.py ================================================ from abc import ABC class AbstractBucket(ABC): @property def name(self): raise NotImplementedError ================================================ FILE: spotty/deployment/abstract_cloud_instance/resources/abstract_instance.py ================================================ from abc import ABC class AbstractInstance(ABC): @property def public_ip_address(self): raise NotImplementedError @property def private_ip_address(self): raise NotImplementedError @property def is_running(self): """Returns true if the instance is running.""" raise NotImplementedError @property def is_stopped(self): """Returns true if the instance is stopped, so it can be restarted.""" raise NotImplementedError def terminate(self, wait: bool = True): raise NotImplementedError def stop(self, wait: bool = True): raise NotImplementedError ================================================ FILE: spotty/deployment/abstract_docker_instance_manager.py ================================================ import os from abc import ABC from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.utils.commands import get_script_command from spotty.deployment.container.docker.docker_commands import DockerCommands from spotty.deployment.container.docker.scripts.start_container_script import StartContainerScript from spotty.deployment.container.docker.scripts.stop_container_script import StopContainerScript from spotty.deployment.abstract_instance_manager import AbstractInstanceManager from spotty.errors.nothing_to_do import NothingToDoError from spotty.utils import render_table class AbstractDockerInstanceManager(AbstractInstanceManager, ABC): @property def container_commands(self) -> DockerCommands: """A collection of commands to manage a container from the host OS.""" return DockerCommands(self.instance_config) def is_container_running(self) -> bool: """Checks if the container is running.""" is_running_cmd = self.container_commands.is_created(is_running=True) exit_code = self.exec(is_running_cmd) return exit_code == 0 def start_container(self, output: AbstractOutputWriter, dry_run=False): """Starts or restarts container on the host OS.""" # make sure the Dockerfile exists self._check_dockerfile_exists() # sync the project with the instance try: self.sync(output, dry_run=dry_run) except NothingToDoError: pass # generate a script that starts container start_container_script = StartContainerScript(self.container_commands).render() start_container_command = get_script_command('start-container', start_container_script) # start the container exit_code = self.exec(start_container_command) if exit_code != 0: raise ValueError('Failed to start the container') def start(self, output: AbstractOutputWriter, dry_run=False): # start or restart container self.start_container(output, dry_run=dry_run) def stop(self, only_shutdown: bool, output: AbstractOutputWriter): # stop container stop_container_script = StopContainerScript(self.container_commands).render() stop_container_command = get_script_command('stop-container', stop_container_script) exit_code = self.exec(stop_container_command) if exit_code != 0: raise ValueError('Failed to stop the container') def get_status_text(self): if self.is_container_running(): msg = 'Container is running.' else: msg = 'Container is not running.' return render_table([(msg,)]) def _check_dockerfile_exists(self): """Raises an error if a Dockerfile specified in the configuration file but doesn't exist.""" if self.instance_config.container_config.file: dockerfile_path = os.path.join(self.project_config.project_dir, self.instance_config.container_config.file) if not os.path.isfile(dockerfile_path): raise FileNotFoundError('A Dockerfile specified in the container configuration doesn\'t exist:\n ' + dockerfile_path) ================================================ FILE: spotty/deployment/abstract_instance_manager.py ================================================ import subprocess from abc import ABC, abstractmethod from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.abstract_instance_config import AbstractInstanceConfig from spotty.config.project_config import ProjectConfig from spotty.deployment.container.abstract_container_commands import AbstractContainerCommands class AbstractInstanceManager(ABC): def __init__(self, project_config: ProjectConfig, instance_config: dict): self._project_config = project_config self._instance_config = self._get_instance_config(instance_config) @property def project_config(self) -> ProjectConfig: return self._project_config @property def instance_config(self) -> AbstractInstanceConfig: return self._instance_config @abstractmethod def _get_instance_config(self, instance_config: dict) -> AbstractInstanceConfig: """A factory method to create a provider's instance config.""" raise NotImplementedError @property @abstractmethod def container_commands(self) -> AbstractContainerCommands: """A collection of commands to manage a container from the host OS.""" raise NotImplementedError @abstractmethod def is_running(self) -> bool: """Checks if the instance is running.""" raise NotImplementedError @abstractmethod def start(self, output: AbstractOutputWriter, dry_run=False): """Creates a stack with the instance.""" raise NotImplementedError @abstractmethod def start_container(self, output: AbstractOutputWriter, dry_run=False): """Starts or restarts container on the host OS.""" raise NotImplementedError @abstractmethod def stop(self, only_shutdown: bool, output: AbstractOutputWriter): """Deletes the stack.""" raise NotImplementedError def exec(self, command: str, tty: bool = True) -> int: """Executes a command on the host OS.""" return subprocess.call(command, shell=True) @abstractmethod def clean(self, output: AbstractOutputWriter): """Deletes the stack.""" raise NotImplementedError @abstractmethod def sync(self, output: AbstractOutputWriter, dry_run=False): """Synchronizes the project code with the instance.""" raise NotImplementedError @abstractmethod def download(self, download_filters: list, output: AbstractOutputWriter, dry_run=False): """Downloads files from the instance.""" raise NotImplementedError @abstractmethod def get_status_text(self) -> str: """Returns information about the started instance. It will be shown to the user once the instance is started and by using the "status" command. """ raise NotImplementedError @property def use_tmux(self) -> bool: """Use tmux when running a custom script or connecting to the instance.""" return False ================================================ FILE: spotty/deployment/abstract_ssh_instance_manager.py ================================================ import logging import os from abc import abstractmethod from spotty.deployment.utils.commands import get_ssh_command from spotty.deployment.abstract_docker_instance_manager import AbstractDockerInstanceManager class AbstractSshInstanceManager(AbstractDockerInstanceManager): def exec(self, command: str, tty: bool = True) -> int: """Executes a command on the host OS.""" if not os.path.isfile(self.ssh_key_path): raise ValueError('SSH key doesn\'t exist: ' + self.ssh_key_path) ssh_command = get_ssh_command(self.ssh_host, self.ssh_port, self.ssh_user, self.ssh_key_path, command, env_vars=self.ssh_env_vars, tty=tty) logging.debug('SSH command: ' + ssh_command) return super().exec(ssh_command) @property @abstractmethod def ssh_host(self): raise NotImplementedError @property @abstractmethod def ssh_port(self) -> int: raise NotImplementedError @property @abstractmethod def ssh_key_path(self) -> str: raise NotImplementedError @property def ssh_user(self) -> str: return self.instance_config.user @property def ssh_env_vars(self) -> dict: """Environmental variables that will be set when ssh to the instance.""" return { 'SPOTTY_CONTAINER_NAME': self.instance_config.full_container_name, 'SPOTTY_CONTAINER_WORKING_DIR': self.instance_config.container_config.working_dir, } @property def use_tmux(self) -> bool: """Use tmux when running a custom script or connecting to the instance.""" return True ================================================ FILE: spotty/deployment/container/__init__.py ================================================ ================================================ FILE: spotty/deployment/container/abstract_container_commands.py ================================================ from abc import ABC, abstractmethod from spotty.config.abstract_instance_config import AbstractInstanceConfig class AbstractContainerCommands(ABC): def __init__(self, instance_config: AbstractInstanceConfig): self._instance_config = instance_config @property def instance_config(self) -> AbstractInstanceConfig: return self._instance_config @abstractmethod def exec(self, command: str, interactive: bool = False, tty: bool = False, user: str = None, container_name: str = None, working_dir: str = None) -> str: raise NotImplementedError ================================================ FILE: spotty/deployment/container/abstract_container_script.py ================================================ from abc import ABC, abstractmethod from spotty.deployment.container.abstract_container_commands import AbstractContainerCommands class AbstractContainerScript(ABC): def __init__(self, container_commands: AbstractContainerCommands): self._commands = container_commands @property def commands(self) -> AbstractContainerCommands: return self._commands @abstractmethod def render(self) -> str: raise NotImplementedError ================================================ FILE: spotty/deployment/container/docker/__init__.py ================================================ ================================================ FILE: spotty/deployment/container/docker/docker_commands.py ================================================ import shlex from spotty.deployment.container.abstract_container_commands import AbstractContainerCommands from spotty.deployment.utils.cli import shlex_join class DockerCommands(AbstractContainerCommands): def build(self, image_name: str) -> str: if not self._instance_config.dockerfile_path: raise ValueError('Cannot generate the "build" command as Dockerfile path is not specified') if not self._instance_config.docker_context_path: raise ValueError('Cannot generate the "build" command as Docker context path is not set') build_cmd = 'docker build -t %s -f %s %s' % (image_name, shlex.quote(self._instance_config.dockerfile_path), shlex.quote(self._instance_config.docker_context_path)) if self._instance_config.container_config.run_as_host_user: build_cmd += ' --build-arg USER_ID=$(id -u %s) --build-arg GROUP_ID=$(id -g %s)' \ % (self._instance_config.user, self._instance_config.user) return build_cmd def pull(self) -> str: return 'docker pull ' + self._instance_config.container_config.image def run(self, image_name: str = None) -> str: image_name = image_name if image_name else self._instance_config.container_config.image # prepare "docker run" arguments args = ['-td'] + self._instance_config.container_config.runtime_parameters if self._instance_config.container_config.host_network: args += ['--net=host'] for port in self._instance_config.container_config.ports: host_port = port['hostPort'] container_port = port['containerPort'] args += ['-p', ('%d:%d' % (host_port, container_port)) if host_port else str(container_port)] for volume_mount in self._instance_config.volume_mounts: args += ['-v', '%s:%s:%s' % (volume_mount.host_path, volume_mount.mount_path, volume_mount.mode)] for env_name, env_value in self._instance_config.container_config.env.items(): args += ['-e', '%s=%s' % (env_name, env_value)] args += ['--name', self._instance_config.full_container_name] run_cmd = 'docker run $(nvidia-smi &> /dev/null && echo "--gpus all")' if self._instance_config.container_config.run_as_host_user: run_cmd += ' -u $(id -u %s):$(id -g %s) -e HOST_USER_ID=$(id -u %s) -e HOST_GROUP_ID=$(id -g %s)' \ % tuple([self._instance_config.user] * 4) run_cmd += ' %s %s /bin/sh > /dev/null' % (shlex_join(args), image_name) return run_cmd def is_created(self, container_name: str = None, is_running: bool = False) -> str: container_name = container_name if container_name else self._instance_config.full_container_name show_all = '' if is_running else 'a' test_cmd = '[ $(docker ps -q%s --filter name="%s" | wc -c) -ne 0 ]' % (show_all, container_name) return test_cmd def remove(self): return 'docker rm -f "%s" > /dev/null' % self._instance_config.full_container_name def exec(self, command: str, interactive: bool = False, tty: bool = False, user: str = None, container_name: str = None, working_dir: str = None) -> str: container_name = container_name if container_name else self._instance_config.full_container_name working_dir = working_dir if working_dir else self._instance_config.container_config.working_dir exec_cmd = 'docker exec' if interactive: exec_cmd += ' -i' if tty: exec_cmd += ' -t' if user: exec_cmd += ' -u ' + shlex.quote(user) if working_dir: # no quoting, it can be environmental variable exec_cmd += ' -w ' + working_dir exec_cmd += ' %s %s' % (container_name, command) # run "exec" command only if the container is running test_cmd = self.is_created(container_name, is_running=True) error_msg = 'Container is not running.\\nUse the "spotty start -C" command to start it.\\n' cond_exec_cmd = 'if %s; then %s; else printf %s; exit 1; fi' % (test_cmd, exec_cmd, shlex.quote(error_msg)) return cond_exec_cmd ================================================ FILE: spotty/deployment/container/docker/scripts/__init__.py ================================================ ================================================ FILE: spotty/deployment/container/docker/scripts/abstract_docker_script.py ================================================ from abc import ABC from spotty.deployment.container.docker.docker_commands import DockerCommands from spotty.deployment.container.abstract_container_script import AbstractContainerScript class AbstractDockerScript(AbstractContainerScript, ABC): @property def commands(self) -> DockerCommands: return self._commands ================================================ FILE: spotty/deployment/container/docker/scripts/container_bash_script.py ================================================ import os import chevron from spotty.deployment.utils.commands import get_bash_command from spotty.deployment.container.docker.scripts.abstract_docker_script import AbstractDockerScript class ContainerBashScript(AbstractDockerScript): def render(self) -> str: # read template file template_path = os.path.join(os.path.dirname(__file__), 'data', 'container_bash.sh.tpl') with open(template_path) as f: template = f.read() # render the script content = chevron.render(template, data={ 'docker_exec_bash': self.commands.exec(get_bash_command(), interactive=True, tty=True, container_name='$SPOTTY_CONTAINER_NAME', working_dir='$SPOTTY_CONTAINER_WORKING_DIR'), }) return content ================================================ FILE: spotty/deployment/container/docker/scripts/data/container_bash.sh.tpl ================================================ #!/bin/bash -e if [ -z "$SPOTTY_CONTAINER_NAME" ]; then echo -e "\nSPOTTY_CONTAINER_NAME environmental variable is not set.\n" exit 1 fi SPOTTY_CONTAINER_WORKING_DIR=${SPOTTY_CONTAINER_WORKING_DIR:-/} {{{docker_exec_bash}}} ================================================ FILE: spotty/deployment/container/docker/scripts/data/start_container.sh.tpl ================================================ #!/usr/bin/env bash {{bash_flags}} if {{{is_created_cmd}}}; then printf 'Removing existing container... ' {{{remove_cmd}}} echo 'DONE' fi {{> before_image_build}} {{#build_image_cmd}} echo 'Building Docker image...' {{{build_image_cmd}}} {{/build_image_cmd}} {{> before_container_run}} {{#pull_image_cmd}} i=0 until [ "$i" -ge 3 ] do if [ "$i" -ne 0 ]; then echo "Retrying to pull the image $i..." fi PULL_EXIT_CODE=0 {{{pull_image_cmd}}} || PULL_EXIT_CODE=$? if [ "$PULL_EXIT_CODE" -ne 125 ]; then break fi i=$((i+1)) sleep 10 done if [ "$PULL_EXIT_CODE" -ne 0 ]; then exit $PULL_EXIT_CODE fi {{/pull_image_cmd}} printf 'Starting container... ' {{{start_container_cmd}}} echo 'DONE' {{> before_startup_commands}} {{#docker_exec_startup_script_cmd}} echo 'Running startup commands...' {{{docker_exec_startup_script_cmd}}} {{/docker_exec_startup_script_cmd}} ================================================ FILE: spotty/deployment/container/docker/scripts/data/stop_container.sh.tpl ================================================ #!/usr/bin/env bash set -e if {{{is_created_cmd}}}; then printf 'Removing the container... ' {{{remove_cmd}}} echo 'DONE' else echo 'Container is not running.' fi ================================================ FILE: spotty/deployment/container/docker/scripts/start_container_script.py ================================================ import os import time import chevron from spotty.deployment.utils.commands import get_script_command from spotty.deployment.container.docker.scripts.abstract_docker_script import AbstractDockerScript class StartContainerScript(AbstractDockerScript): def _partials(self) -> dict: return { 'before_image_build': '', 'before_container_run': '', 'before_startup_commands': '', } def render(self, print_trace: bool = False) -> str: # read template file template_path = os.path.join(os.path.dirname(__file__), 'data', 'start_container.sh.tpl') with open(template_path) as f: template = f.read() # generate "docker build" command if necessary if self.commands.instance_config.dockerfile_path: image_name = '%s:%d' % (self.commands.instance_config.full_container_name, time.time()) build_image_cmd = self.commands.build(image_name) pull_image_cmd = '' else: image_name = self.commands.instance_config.container_config.image build_image_cmd = '' pull_image_cmd = self.commands.pull() # generate a command to run the startup script exec_script_cmd = '' if self.commands.instance_config.container_config.commands: startup_script_cmd = get_script_command('container-startup-commands', self.commands.instance_config.container_config.commands) exec_script_cmd = self.commands.exec(startup_script_cmd, user='root') # render the script content = chevron.render(template, data={ 'bash_flags': 'set -xe' if print_trace else 'set -e', 'is_created_cmd': self.commands.is_created(), 'remove_cmd': self.commands.remove(), 'build_image_cmd': build_image_cmd, 'pull_image_cmd': pull_image_cmd, 'tmp_container_dir': self.commands.instance_config.host_container_dir, 'start_container_cmd': self.commands.run(image_name), 'docker_exec_startup_script_cmd': exec_script_cmd, }, partials_dict=self._partials()) return content ================================================ FILE: spotty/deployment/container/docker/scripts/stop_container_script.py ================================================ import os import chevron from spotty.deployment.container.docker.scripts.abstract_docker_script import AbstractDockerScript class StopContainerScript(AbstractDockerScript): def render(self) -> str: # read template file template_path = os.path.join(os.path.dirname(__file__), 'data', 'stop_container.sh.tpl') with open(template_path) as f: template = f.read() # render the script content = chevron.render(template, data={ 'is_created_cmd': self.commands.is_created(), 'remove_cmd': self.commands.remove(), }) return content ================================================ FILE: spotty/deployment/utils/__init__.py ================================================ ================================================ FILE: spotty/deployment/utils/cli.py ================================================ import shlex def shlex_join(split_command: list): """Return a shell-escaped string from *split_command*. Copy-pasted from the Python 3.8 code. """ return ' '.join(shlex.quote(arg) for arg in split_command) ================================================ FILE: spotty/deployment/utils/commands.py ================================================ import base64 import os import shlex import time from spotty.deployment.utils.cli import shlex_join def get_bash_command() -> str: return '/usr/bin/env bash' def get_script_command(script_name: str, script_content: str, script_args: list = None, logging: bool = False) -> str: """Encodes a multi-line script into base64 and returns a one-line command that unpacks the script to a temporary file and runs it.""" # encode the script content to base64 script_base64 = base64.b64encode(script_content.encode('utf-8')).decode('utf-8') # command to decode the script, save it to a temporary file and run inside the container script_args = shlex_join(script_args) if script_args else '' script_cmd = ' && '.join([ 'TMPDIR=${TMPDIR%/}', 'TMP_SCRIPT_PATH=$(mktemp ${TMPDIR:-/tmp}/spotty-%s.XXXXXXXX)' % script_name, 'chmod +x $TMP_SCRIPT_PATH', 'echo %s | base64 -d > $TMP_SCRIPT_PATH' % script_base64, '$TMP_SCRIPT_PATH ' + script_args, ]) # log the command output to a file if logging: log_file_path = '/var/log/spotty/run/%s-%d.log' % (script_name, time.time()) script_cmd = get_log_command(script_cmd, log_file_path) # execute the command with bash script_cmd = '%s -c %s' % (get_bash_command(), shlex.quote(script_cmd)) return script_cmd def get_log_command(command: str, log_file_path: str) -> str: # log the command outputs to a file on the host OS log_dir = os.path.dirname(log_file_path) log_cmd = '; '.join([ 'set -o pipefail', ' && '.join([ 'mkdir -pm 777 ' + shlex.quote(log_dir), '(%s) 2>&1 | tee %s' % (command, shlex.quote(log_file_path)), ]), ]) return log_cmd def get_tmux_session_command(command: str, session_name: str, window_name: str = None, default_command: str = None, keep_pane: bool = False) -> str: session_cmd = 'tmux new -A -s ' + session_name if window_name: session_cmd += ' -n ' + window_name if command: # keep the pane alive when the script is finished keep_pane_cmd = 'tmux set -w remain-on-exit on; ' if keep_pane else '' # set the default command (to automatically run bash inside the container when a new window is created) default_command_cmd = ('tmux set default-command %s; ' % shlex.quote(default_command)) \ if default_command else '' # keep the pane alive if the script is failed tmux_cmd = '%s%s(%s) || tmux set -w remain-on-exit on' % (keep_pane_cmd, default_command_cmd, command) # run the command inside the tmux session session_cmd += ' ' + shlex.quote(tmux_cmd) # use tmux only if it's installed session_cmd = 'if command -v tmux &> /dev/null; then %s; else %s; fi' % (session_cmd, command) return session_cmd def get_ssh_command(host: str, port: int, user: str, key_path: str, command: str, env_vars: dict = None, tty: bool = True, quiet: bool = False) -> str: ssh_command = 'ssh -i %s -o StrictHostKeyChecking=no -o ConnectTimeout=10' % shlex.quote(key_path) if tty: ssh_command += ' -t' if port != 22: ssh_command += ' -p %d' % port if quiet: ssh_command += ' -q' # export environmental variables if env_vars: export_cmd = '; '.join(['export %s=%s' % (name, shlex.quote(val)) for name, val in env_vars.items()]) command = '%s; %s' % (export_cmd, command) # final SSH command ssh_command += ' %s@%s %s' % (user, host, shlex.quote(command)) return ssh_command ================================================ FILE: spotty/deployment/utils/print_info.py ================================================ from typing import List from spotty.config.abstract_instance_config import VolumeMount from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.config.container_config import PROJECT_VOLUME_MOUNT_NAME from spotty.utils import render_table def render_volumes_info_table(volume_mounts: List[VolumeMount], volumes: List[AbstractInstanceVolume]): table = [('Name', 'Mount Path', 'Type', 'Deletion Policy')] # add volume mounts to the info table volumes_dict = {volume.name: volume for volume in volumes} for volume_mount in volume_mounts: if not volume_mount.hidden: # the volume will be mounted to the container volume = volumes_dict[volume_mount.name] vol_mount_name = '-' if volume_mount.name == PROJECT_VOLUME_MOUNT_NAME else volume_mount.name deletion_policy = volume.deletion_policy_title if volume.deletion_policy_title else '-' table.append((vol_mount_name, volume_mount.mount_path, volume.title, deletion_policy)) # add volumes that were not mounted to the container to the info table volume_mounts_dict = {volume_mount.name for volume_mount in volume_mounts} for volume in volumes: if volume.name not in volume_mounts_dict: deletion_policy = volume.deletion_policy_title if volume.deletion_policy_title else '-' table.append((volume.name, '-', volume.title, deletion_policy)) return render_table(table, separate_title=True) ================================================ FILE: spotty/deployment/utils/user_scripts.py ================================================ import re import chevron from spotty.deployment.utils.commands import get_bash_command def parse_script_parameters(script_params: str): """Parses script parameters.""" params = {} for param in script_params: match = re.match('(\\w+)=(.*)', param) if not match: raise ValueError('Invalid format for the script parameter: "%s" (the "PARAMETER=VALUE" format is expected).' % param) param_name, param_value = match.groups() if param_name in params: raise ValueError('Parameter "%s" was defined twice.' % param_name) params[param_name] = param_value return params def render_script(template: str, params: dict): """Renders a script template. It based on the Mustache templates, but only variables and delimiter changes are allowed. Raises an exception if one of the provided parameters doesn't exist in the template. """ tokens = list(chevron.tokenizer.tokenize(template)) template_keys = set() for tag, key in tokens: if tag not in ['literal', 'no escape', 'variable', 'set delimiter']: raise ValueError('Script templates support only variables and delimiter changes.') template_keys.add(key) # check that the script contains keys for all provided parameters for key in params: if key not in template_keys: raise ValueError('Parameter "%s" doesn\'t exist in the script.' % key) content = chevron.render(tokens, params) if content[:2] != '#!': content = ('#!%s\n\nset -xe\n\n' % get_bash_command()) + content return content ================================================ FILE: spotty/errors/__init__.py ================================================ ================================================ FILE: spotty/errors/instance_not_running.py ================================================ class InstanceNotRunningError(Exception): def __init__(self, instance_name: str): super().__init__('Instance "%s" is not running.\n' 'Use the "spotty start %s" command to start the instance.' % (instance_name, instance_name)) ================================================ FILE: spotty/errors/nothing_to_do.py ================================================ class NothingToDoError(Exception): pass ================================================ FILE: spotty/providers/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/cfn_templates/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/cfn_templates/instance/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/files/tmux.conf ================================================ bind-key x kill-pane ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/startup_scripts/01_prepare_instance.sh ================================================ #!/bin/bash -xe cfn-signal -e 0 --stack ${AWS::StackName} --region ${AWS::Region} --resource PreparingInstanceSignal # install AWS CLI update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 apt-get update && apt-get install -y python3-pip pip3 install -U awscli aws configure set default.region ${AWS::Region} # install jq apt-get install -y jq # create an alias to connect to the docker container CONTAINER_BASH_ALIAS=container echo "alias $CONTAINER_BASH_ALIAS=\"{{CONTAINER_BASH_SCRIPT_PATH}}\"" >> /home/ubuntu/.bashrc echo "alias $CONTAINER_BASH_ALIAS=\"{{CONTAINER_BASH_SCRIPT_PATH}}\"" >> /root/.bashrc # create common temporary directories mkdir -pm 777 '{{SPOTTY_TMP_DIR}}' mkdir -pm 777 '{{CONTAINERS_TMP_DIR}}' ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/startup_scripts/02_mount_volumes.sh ================================================ #!/bin/bash -xe cfn-signal -e 0 --stack ${AWS::StackName} --region ${AWS::Region} --resource MountingVolumesSignal # mount volumes DEVICE_LETTERS=(f g h i j k l m n o p) MOUNT_DIRS=({{{MOUNT_DIRS}}}) for i in ${!!MOUNT_DIRS[*]} do MOUNT_DIR=${!MOUNT_DIRS[$i]} DEVICE=/dev/xvd${!DEVICE_LETTERS[$i]} # NVMe EBS volume (see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html) if [ ! -b $DEVICE ]; then VOLUME_ID=$(cfn-get-metadata --stack ${AWS::StackName} --region ${AWS::Region} --resource VolumeAttachment${!DEVICE_LETTERS[$i]^} -k VolumeId) DEVICE=$(lsblk -o NAME,SERIAL -dpJ | jq -rc ".blockdevices[] | select(.serial == \"${!VOLUME_ID//-}\") | .name") if [ -z "$DEVICE" ]; then echo "Device for the volume $VOLUME_ID not found" exit 1 fi fi blkid -o value -s TYPE $DEVICE || mkfs -t ext4 $DEVICE mkdir -p $MOUNT_DIR mount $DEVICE $MOUNT_DIR chmod 777 $MOUNT_DIR resize2fs $DEVICE done # create directories for temporary container volumes {{#TMP_VOLUME_DIRS}} mkdir -p {{PATH}} chmod 777 {{PATH}} {{/TMP_VOLUME_DIRS}} ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/startup_scripts/03_set_docker_root.sh ================================================ #!/bin/bash -xe cfn-signal -e 0 --stack ${AWS::StackName} --region ${AWS::Region} --resource SettingDockerRootSignal # change docker data root directory if [ -n "${DockerDataRootDirectory}" ]; then jq '. + { "data-root": "${DockerDataRootDirectory}" }' /etc/docker/daemon.json > /tmp/docker_daemon.json \ && mv /tmp/docker_daemon.json /etc/docker/daemon.json service docker restart fi ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/startup_scripts/04_sync_project.sh ================================================ #!/bin/bash -xe cfn-signal -e 0 --stack ${AWS::StackName} --region ${AWS::Region} --resource SyncingProjectSignal # create a project directory if [ -n "${HostProjectDirectory}" ]; then mkdir -p 777 ${HostProjectDirectory} chmod 777 ${HostProjectDirectory} if [ -d '${HostProjectDirectory}/lost+found' ]; then chmod 777 '${HostProjectDirectory}/lost+found' fi fi # sync project files from S3 bucket to the instance {{{SYNC_PROJECT_CMD}}} ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/startup_scripts/05_run_instance_startup_commands.sh ================================================ #!/bin/bash -xe cfn-signal -e 0 --stack ${AWS::StackName} --region ${AWS::Region} --resource RunningInstanceStartupCommandsSignal /bin/bash -xe {{INSTANCE_STARTUP_SCRIPTS_DIR}}/instance_startup_commands.sh ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/startup_scripts/user_data.sh ================================================ #!/bin/bash -x cd /root || exit 1 # install CloudFormation tools if they are not installed yet if [ ! -e /usr/local/bin/cfn-init ]; then apt-get update apt-get install -y python-setuptools mkdir -p aws-cfn-bootstrap-latest curl https://s3.amazonaws.com/cloudformation-examples/aws-cfn-bootstrap-latest.tar.gz | tar xz -C aws-cfn-bootstrap-latest --strip-components 1 python2 -m easy_install aws-cfn-bootstrap-latest fi # prepare the instance and run Docker container cfn-init \ --stack ${AWS::StackName} \ --region ${AWS::Region} \ --resource InstanceLaunchTemplate \ -c init \ -v STACK_CREATED=$? # uplooad cfn-init logs to the bucket if [ $STACK_CREATED -ne 0 ]; then STACK_ID=${AWS::StackId} STACK_UUID=${!STACK_ID##*/} aws s3 cp /var/log/cfn-init-cmd.log ${LogsS3Path}/$STACK_UUID/cfn-init-cmd.log fi # send signal that the Docker container is ready or failed cfn-signal \ -e $STACK_CREATED \ --stack ${AWS::StackName} \ --region ${AWS::Region} \ --resource DockerReadyWaitCondition ================================================ FILE: spotty/providers/aws/cfn_templates/instance/data/template.yaml ================================================ Description: Spotty EC2 Instance Parameters: VpcId: Description: VPC ID Type: AWS::EC2::VPC::Id InstanceProfileArn: Description: Instance Profile ARN Type: String InstanceType: Description: Instance type Type: String KeyName: Description: EC2 Key Pair name Type: AWS::EC2::KeyPair::KeyName ImageId: Description: AMI ID Type: AWS::EC2::Image::Id RootVolumeSize: Description: Root volume size Type: String DockerDataRootDirectory: Description: Docker data root directory Type: String Default: '' InstanceNameTag: Description: Name for the instance Type: String Default: '' HostProjectDirectory: Description: Destination directory for the project Type: String Default: '' LogsS3Path: Description: An S3 path where logs will be uploaded in the case of a failure Type: String Default: '' Resources: Instance: Type: AWS::EC2::Instance Properties: LaunchTemplate: LaunchTemplateId: !Ref InstanceLaunchTemplate Version: !GetAtt InstanceLaunchTemplate.LatestVersionNumber InstanceLaunchTemplate: Type: AWS::EC2::LaunchTemplate Properties: LaunchTemplateData: InstanceType: !Ref InstanceType ImageId: !Ref ImageId KeyName: !Ref KeyName EbsOptimized: 'false' TagSpecifications: - ResourceType: instance Tags: - Key: Name Value: !Ref InstanceNameTag IamInstanceProfile: Arn: !Ref InstanceProfileArn SecurityGroupIds: - !Ref InstanceSecurityGroup InstanceInitiatedShutdownBehavior: terminate InstanceMarketOptions: MarketType: spot SpotOptions: SpotInstanceType: one-time InstanceInterruptionBehavior: terminate BlockDeviceMappings: - DeviceName: /dev/sda1 Ebs: VolumeSize: !Ref RootVolumeSize DeleteOnTermination: true UserData: '' Metadata: 'AWS::CloudFormation::Init': {} InstanceSecurityGroup: Type: AWS::EC2::SecurityGroup Properties: VpcId: !Ref VpcId GroupDescription: Spotty security group SecurityGroupEgress: - CidrIp: 0.0.0.0/0 IpProtocol: -1 FromPort: 0 ToPort: 65535 - CidrIpv6: ::/0 IpProtocol: -1 FromPort: 0 ToPort: 65535 SecurityGroupIngress: - CidrIp: 0.0.0.0/0 IpProtocol: tcp FromPort: 22 ToPort: 22 - CidrIpv6: ::/0 IpProtocol: tcp FromPort: 22 ToPort: 22 PreparingInstanceSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT30M MountingVolumesSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT30M SettingDockerRootSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT60M SyncingProjectSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT60M RunningInstanceStartupCommandsSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT60M BuildingDockerImageSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT60M StartingContainerSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT60M RunningContainerStartupCommandsSignal: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT60M DockerReadyWaitCondition: Type: AWS::CloudFormation::WaitCondition DependsOn: Instance CreationPolicy: ResourceSignal: Timeout: PT30M Outputs: InstanceId: Value: !Ref Instance AvailabilityZone: Value: !GetAtt Instance.AvailabilityZone ================================================ FILE: spotty/providers/aws/cfn_templates/instance/start_container_script.py ================================================ from spotty.deployment.container.docker.scripts.start_container_script import StartContainerScript class StartContainerScriptWithCfnSignals(StartContainerScript): @staticmethod def _get_signal_command(resource_name: str): return 'cfn-signal -e 0 --stack $_{AWS::StackName} --region $_{AWS::Region} --resource ' + resource_name def _partials(self) -> dict: return { 'before_image_build': self._get_signal_command('BuildingDockerImageSignal'), 'before_container_run': self._get_signal_command('StartingContainerSignal'), 'before_startup_commands': self._get_signal_command('RunningContainerStartupCommandsSignal'), } def render(self, print_trace: bool = False) -> str: content = super().render(print_trace=print_trace) content = content.replace('${', '${!') content = content.replace('$_{', '${') return content ================================================ FILE: spotty/providers/aws/cfn_templates/instance/template.py ================================================ from typing import List import os import chevron import yaml from cfn_tools import CfnYamlLoader, CfnYamlDumper from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.tmp_dir_volume import TmpDirVolume from spotty.config.validation import is_subdir from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.deployment.container.docker.docker_commands import DockerCommands from spotty.deployment.container.docker.scripts.container_bash_script import ContainerBashScript from spotty.deployment.abstract_cloud_instance.file_structure import INSTANCE_SPOTTY_TMP_DIR, \ CONTAINER_BASH_SCRIPT_PATH, \ INSTANCE_STARTUP_SCRIPTS_DIR, CONTAINERS_TMP_DIR from spotty.providers.aws.cfn_templates.instance.start_container_script import StartContainerScriptWithCfnSignals from spotty.providers.aws.helpers.ami import get_ami from spotty.providers.aws.helpers.vpc import get_vpc_id from spotty.providers.aws.resources.snapshot import Snapshot from spotty.providers.aws.resources.volume import Volume from spotty.providers.aws.config.instance_config import InstanceConfig from spotty.providers.aws.config.ebs_volume import EbsVolume from spotty.providers.aws.helpers.logs import get_logs_s3_path def prepare_instance_template(ec2, instance_config: InstanceConfig, docker_commands: DockerCommands, availability_zone: str, sync_project_cmd: str, output: AbstractOutputWriter): """Prepares CloudFormation template to run a Spot Instance.""" # read and update CF template with open(os.path.join(os.path.dirname(__file__), 'data', 'template.yaml')) as f: template = yaml.load(f, Loader=CfnYamlLoader) # get volume resources and updated availability zone volume_resources = _get_volume_resources(ec2, instance_config.volumes, output) # add volume resources to the template template['Resources'].update(volume_resources) # set availability zone if availability_zone: template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData']['Placement'] = { 'AvailabilityZone': availability_zone, } output.write('- availability zone: %s' % availability_zone) else: output.write('- availability zone: auto') # set subnet if instance_config.subnet_id: template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData']['NetworkInterfaces'] = [ { 'SubnetId': instance_config.subnet_id, 'DeviceIndex': 0, 'Groups': template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData'][ 'SecurityGroupIds'], }] del template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData']['SecurityGroupIds'] # add ports to the security group for port in instance_config.ports: if port != 22: template['Resources']['InstanceSecurityGroup']['Properties']['SecurityGroupIngress'] += [{ 'CidrIp': '0.0.0.0/0', 'IpProtocol': 'tcp', 'FromPort': port, 'ToPort': port, }, { 'CidrIpv6': '::/0', 'IpProtocol': 'tcp', 'FromPort': port, 'ToPort': port, }] if instance_config.is_spot_instance: # set maximum price if instance_config.max_price: template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData'] \ ['InstanceMarketOptions']['SpotOptions']['MaxPrice'] = instance_config.max_price output.write('- maximum Spot Instance price: %s' % (('%.04f' % instance_config.max_price) if instance_config.max_price else 'on-demand')) else: # run on-demand instance del template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData'][ 'InstanceMarketOptions'] output.write('- on-demand instance') # set the user data script template['Resources']['InstanceLaunchTemplate']['Properties']['LaunchTemplateData']['UserData'] = { 'Fn::Base64': { 'Fn::Sub': _read_template_file(os.path.join('startup_scripts', 'user_data.sh')), }, } # run sync command as a non-root user if instance_config.container_config.run_as_host_user: sync_project_cmd = 'sudo -u %s %s' % (instance_config.user, sync_project_cmd) # get mount directories mount_dirs = [volume.mount_dir for volume in instance_config.volumes if isinstance(volume, EbsVolume)] # set CloudFormation configs cfn_init_configs = [ { 'name': 'prepare_instance', 'files': { INSTANCE_STARTUP_SCRIPTS_DIR + '/01_prepare_instance.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': { 'Fn::Sub': _read_template_file(os.path.join('startup_scripts', '01_prepare_instance.sh'), { 'CONTAINER_BASH_SCRIPT_PATH': CONTAINER_BASH_SCRIPT_PATH, 'SPOTTY_TMP_DIR': INSTANCE_SPOTTY_TMP_DIR, 'CONTAINERS_TMP_DIR': CONTAINERS_TMP_DIR, }), }, }, CONTAINER_BASH_SCRIPT_PATH: { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': ContainerBashScript(docker_commands).render(), }, '/home/ubuntu/.tmux.conf': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000644', 'content': { 'Fn::Sub': _read_template_file(os.path.join('files', 'tmux.conf')), }, }, }, 'command': INSTANCE_STARTUP_SCRIPTS_DIR + '/01_prepare_instance.sh', }, { 'name': 'mount_volumes', 'files': { INSTANCE_STARTUP_SCRIPTS_DIR + '/02_mount_volumes.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': { 'Fn::Sub': _read_template_file(os.path.join('startup_scripts', '02_mount_volumes.sh'), { 'MOUNT_DIRS': ('"%s"' % '" "'.join(mount_dirs)) if mount_dirs else '', 'TMP_VOLUME_DIRS': [{'PATH': volume.host_path} for volume in instance_config.volumes if isinstance(volume, TmpDirVolume)], }), }, }, }, 'command': INSTANCE_STARTUP_SCRIPTS_DIR + '/02_mount_volumes.sh', }, { 'name': 'set_docker_root', 'files': { INSTANCE_STARTUP_SCRIPTS_DIR + '/03_set_docker_root.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': { 'Fn::Sub': _read_template_file(os.path.join('startup_scripts', '03_set_docker_root.sh')), }, }, }, 'command': INSTANCE_STARTUP_SCRIPTS_DIR + '/03_set_docker_root.sh', }, { 'name': 'sync_project', 'files': { INSTANCE_STARTUP_SCRIPTS_DIR + '/04_sync_project.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': { 'Fn::Sub': _read_template_file(os.path.join('startup_scripts', '04_sync_project.sh'), { 'SYNC_PROJECT_CMD': sync_project_cmd, }), }, }, }, 'command': INSTANCE_STARTUP_SCRIPTS_DIR + '/04_sync_project.sh', }, { 'name': 'run_instance_startup_commands', 'files': { INSTANCE_STARTUP_SCRIPTS_DIR + '/05_run_instance_startup_commands.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': { 'Fn::Sub': _read_template_file( os.path.join('startup_scripts', '05_run_instance_startup_commands.sh'), { 'INSTANCE_STARTUP_SCRIPTS_DIR': INSTANCE_STARTUP_SCRIPTS_DIR, }), }, }, INSTANCE_STARTUP_SCRIPTS_DIR + '/instance_startup_commands.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000644', 'content': instance_config.commands or '#', }, }, 'command': INSTANCE_STARTUP_SCRIPTS_DIR + '/05_run_instance_startup_commands.sh', }, { 'name': 'start_container', 'files': { INSTANCE_STARTUP_SCRIPTS_DIR + '/06_start_container.sh': { 'owner': 'ubuntu', 'group': 'ubuntu', 'mode': '000755', 'content': { 'Fn::Sub': StartContainerScriptWithCfnSignals(docker_commands).render(print_trace=True), }, }, }, 'command': INSTANCE_STARTUP_SCRIPTS_DIR + '/06_start_container.sh', }, ] template['Resources']['InstanceLaunchTemplate']['Metadata']['AWS::CloudFormation::Init']['configSets'] = { 'init': [config['name'] for config in cfn_init_configs], } for config in cfn_init_configs: template['Resources']['InstanceLaunchTemplate']['Metadata']['AWS::CloudFormation::Init'][config['name']] = { 'files': config.get('files', {}), 'commands': { config['name']: { 'command': config['command'], } }, } return yaml.dump(template, Dumper=CfnYamlDumper) def _read_template_file(filename: str, params: dict = None): with open(os.path.join(os.path.dirname(__file__), 'data', filename)) as f: content = f.read() if params: content = chevron.render(content, params) return content def _get_volume_attachment_resource(volume_id, device_name): attachment_resource = { 'Type': 'AWS::EC2::VolumeAttachment', 'Properties': { 'Device': device_name, 'InstanceId': {'Ref': 'Instance'}, 'VolumeId': volume_id if isinstance(volume_id, str) else dict(volume_id), # avoid YAML aliases }, 'Metadata': { 'Device': device_name, 'VolumeId': volume_id if isinstance(volume_id, str) else dict(volume_id), # avoid YAML aliases }, } return attachment_resource def _get_volume_resource(ec2, volume: EbsVolume, output: AbstractOutputWriter): # new volume will be created volume_resource = { 'Type': 'AWS::EC2::Volume', 'DeletionPolicy': 'Retain', 'Properties': { 'AvailabilityZone': {'Fn::GetAtt': ['Instance', 'AvailabilityZone']}, 'Tags': [{ 'Key': 'Name', 'Value': volume.ec2_volume_name, }], 'VolumeType': volume.type, }, } # check if the snapshot exists and restore the volume from it snapshot = Snapshot.get_by_name(ec2, volume.ec2_volume_name) if snapshot: # volume will be restored from the snapshot # check size of the volume if volume.size and (volume.size < snapshot.size): raise ValueError('Specified size for the "%s" volume (%dGB) is less than size of the ' 'snapshot (%dGB).' % (volume.name, volume.size, snapshot.size)) # set snapshot ID volume_resource['Properties']['SnapshotId'] = snapshot.snapshot_id output.write('- volume "%s" will be restored from the snapshot' % volume.ec2_volume_name) else: # empty volume will be created, check that the size is specified if not volume.size: raise ValueError('Size for the new volume is required.') output.write('- volume "%s" will be created' % volume.ec2_volume_name) # set size of the volume if volume.size: volume_resource['Properties']['Size'] = volume.size # set a name for the new volume volume_resource['Properties']['Tags'] = [{'Key': 'Name', 'Value': volume.ec2_volume_name}] return volume_resource def _get_volume_resources(ec2, volumes: List[AbstractInstanceVolume], output: AbstractOutputWriter): resources = {} # ending letters for the devices (see: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html) device_letters = 'fghijklmnop' # create and attach volumes for i, volume in enumerate(volumes): if isinstance(volume, EbsVolume): device_letter = device_letters[i] ec2_volume = Volume.get_by_name(ec2, volume.ec2_volume_name) if ec2_volume: # check if the volume is available if not ec2_volume.is_available(): raise ValueError('EBS volume "%s" is not available (state: %s).' % (volume.ec2_volume_name, ec2_volume.state)) # check size of the volume if volume.size and (volume.size != ec2_volume.size): raise ValueError('Specified size for the "%s" volume (%dGB) doesn\'t match the size of the ' 'existing volume (%dGB).' % (volume.name, volume.size, ec2_volume.size)) output.write('- volume "%s" (%s) will be attached' % (ec2_volume.name, ec2_volume.volume_id)) volume_id = ec2_volume.volume_id else: # create Volume resource vol_resource_name = 'Volume' + device_letter.upper() vol_resource = _get_volume_resource(ec2, volume, output) resources[vol_resource_name] = vol_resource volume_id = {'Ref': vol_resource_name} # create VolumeAttachment resource vol_attachment_resource_name = 'VolumeAttachment' + device_letter.upper() device_name = '/dev/sd' + device_letter vol_attachment_resource = _get_volume_attachment_resource(volume_id, device_name) resources[vol_attachment_resource_name] = vol_attachment_resource return resources def get_template_parameters(ec2, instance_config: InstanceConfig, instance_profile_arn: str, bucket_name: str, key_pair_name: str, output: AbstractOutputWriter): # get AMI ami = get_ami(ec2, instance_config.ami_id, instance_config.ami_name) output.write('- AMI: "%s" (%s)' % (ami.name, ami.image_id)) # check root volume size root_volume_size = instance_config.root_volume_size if root_volume_size and root_volume_size < ami.size: raise ValueError('Root volume size cannot be less than the size of AMI (%dGB).' % ami.size) elif not root_volume_size: # if a root volume size is not specified, make it 5GB larger than the AMI size root_volume_size = ami.size + 5 # print info about the Docker data root ebs_volumes = [volume for volume in instance_config.volumes if isinstance(volume, EbsVolume)] if instance_config.docker_data_root: docker_data_volume_name = [volume.name for volume in ebs_volumes if is_subdir(instance_config.docker_data_root, volume.mount_dir)][0] output.write('- Docker data will be stored on the "%s" volume' % docker_data_volume_name) # create stack parameters = { 'VpcId': get_vpc_id(ec2, instance_config.subnet_id), 'InstanceProfileArn': instance_profile_arn, 'InstanceType': instance_config.instance_type, 'KeyName': key_pair_name, 'ImageId': ami.image_id, 'RootVolumeSize': str(root_volume_size), 'DockerDataRootDirectory': instance_config.docker_data_root, 'InstanceNameTag': instance_config.ec2_instance_name, 'HostProjectDirectory': instance_config.host_project_dir, 'LogsS3Path': get_logs_s3_path(bucket_name, instance_config.name), } return parameters ================================================ FILE: spotty/providers/aws/cfn_templates/instance_profile/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/cfn_templates/instance_profile/data/template.yaml ================================================ Description: Spotty EC2 Instance Profile Resources: InstanceProfile: Type: AWS::IAM::InstanceProfile Properties: Roles: - Ref: InstanceRole InstanceRole: Type: AWS::IAM::Role Properties: AssumeRolePolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Principal: Service: - ec2.amazonaws.com Action: - sts:AssumeRole {{#HAS_MANAGED_POLICIES}} ManagedPolicyArns: {{#MANAGED_POLICY_ARNS}} - {{MANAGED_POLICY_ARN}} {{/MANAGED_POLICY_ARNS}} {{/HAS_MANAGED_POLICIES}} Policies: - PolicyName: S3AccessPolicy PolicyDocument: Version: '2012-10-17' Statement: - Effect: Allow Action: - s3:ListAllMyBuckets - s3:GetBucketLocation - s3:ListBucket - s3:GetObject - s3:PutObject - s3:DeleteObject Resource: - arn:aws:s3:::* Outputs: ProfileArn: Value: !GetAtt InstanceProfile.Arn ================================================ FILE: spotty/providers/aws/cfn_templates/instance_profile/template.py ================================================ import os import chevron def prepare_instance_profile_template(managed_policy_arns: list): with open(os.path.join(os.path.dirname(__file__), 'data', 'template.yaml')) as f: content = f.read() parameters = { 'HAS_MANAGED_POLICIES': len(managed_policy_arns), 'MANAGED_POLICY_ARNS': [{'MANAGED_POLICY_ARN': arn} for arn in managed_policy_arns] } template = chevron.render(content, parameters) return template ================================================ FILE: spotty/providers/aws/commands/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/commands/clean_logs.py ================================================ from argparse import ArgumentParser, Namespace from time import time import boto3 from spotty.commands.abstract_command import AbstractCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter class CleanLogsCommand(AbstractCommand): name = 'clean-logs' description = 'Delete expired CloudFormation log groups with Spotty prefixes' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-r', '--region', type=str, required=True, help='AWS region') parser.add_argument('-a', '--delete-all', action='store_true', help='Delete all Spotty log groups, ' 'not just expired ones') def run(self, args: Namespace, output: AbstractOutputWriter): region = args.region logs = boto3.client('logs', region_name=region) prefixes = ['spotty-', '/aws/lambda/spotty-'] only_empty = not args.delete_all output.write('Deleting %s Spotty log groups...' % ('empty' if only_empty else 'all')) res = logs.describe_log_groups() self._delete_log_groups(logs, res['logGroups'], prefixes, only_empty, output) while 'nextToken' in res: res = logs.describe_log_groups(nextToken=res['nextToken']) self._delete_log_groups(logs, res['logGroups'], prefixes, only_empty, output) output.write('Done') @staticmethod def _delete_log_groups(logs, log_groups: list, prefixes: list, only_empty: bool, output: AbstractOutputWriter): for log_group in log_groups: for prefix in prefixes: if log_group['logGroupName'].startswith(prefix): delete = True if only_empty: delete = False days_passed = (int(time()) - log_group['creationTime'] // 1000) // 86400 if ('retentionInDays' in log_group) and (days_passed >= log_group['retentionInDays']): delete = True if delete: output.write('[x] %s' % log_group['logGroupName']) logs.delete_log_group(logGroupName=log_group['logGroupName']) break ================================================ FILE: spotty/providers/aws/commands/spot_prices.py ================================================ from argparse import ArgumentParser, Namespace import boto3 from spotty.commands.abstract_command import AbstractCommand from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.providers.aws.helpers.instance_prices import get_spot_prices class SpotPricesCommand(AbstractCommand): name = 'spot-prices' description = 'Get Spot Instance prices for an instance type across all AWS regions or within a specific region.' def configure(self, parser: ArgumentParser): super().configure(parser) parser.add_argument('-i', '--instance-type', type=str, required=True, help='Instance type') parser.add_argument('-r', '--region', type=str, help='AWS region') def run(self, args: Namespace, output: AbstractOutputWriter): # get all regions if not args.region: ec2 = boto3.client('ec2') res = ec2.describe_regions() regions = [row['RegionName'] for row in res['Regions']] else: regions = [args.region] instance_type = args.instance_type output.write('Getting spot instance prices for "%s"...\n' % instance_type) prices = [] for region in regions: ec2 = boto3.client('ec2', region_name=region) res = get_spot_prices(ec2, instance_type) prices += [(price, zone) for zone, price in res.items()] # sort availability zones by price prices.sort(key=lambda x: x[0]) if prices: output.write('Price Zone') for price, zone in prices: output.write('%.04f %s' % (price, zone)) else: output.write('Spot instances of this type are not available.') ================================================ FILE: spotty/providers/aws/config/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/config/ebs_volume.py ================================================ from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.aws.config.validation import validate_ebs_volume_parameters class EbsVolume(AbstractInstanceVolume): TYPE_NAME = 'EBS' DP_CREATE_SNAPSHOT = 'CreateSnapshot' DP_UPDATE_SNAPSHOT = 'UpdateSnapshot' DP_RETAIN = 'Retain' DP_DELETE = 'Delete' def __init__(self, volume_config: dict, project_name: str, instance_name: str): super().__init__(volume_config) self._project_name = project_name self._instance_name = instance_name def _validate_volume_parameters(self, params: dict) -> dict: return validate_ebs_volume_parameters(params) @property def title(self): return 'EBS volume' @property def size(self) -> int: return self._params['size'] @property def type(self) -> str: return self._params['type'] @property def deletion_policy(self) -> str: return self._params['deletionPolicy'] @property def deletion_policy_title(self) -> str: return { EbsVolume.DP_CREATE_SNAPSHOT: 'Create Snapshot', EbsVolume.DP_UPDATE_SNAPSHOT: 'Update Snapshot', EbsVolume.DP_RETAIN: 'Retain Volume', EbsVolume.DP_DELETE: 'Delete Volume', }[self.deletion_policy] @property def ec2_volume_name(self) -> str: """Returns EBS volume name.""" volume_name = self._params['volumeName'] if not volume_name: volume_name = '%s-%s-%s' % (self._project_name.lower(), self._instance_name.lower(), self.name.lower()) return volume_name @property def mount_dir(self) -> str: """A directory where the volume will be mounted on the host OS.""" if self._params['mountDir']: mount_dir = self._params['mountDir'] else: mount_dir = '/mnt/%s' % self.ec2_volume_name return mount_dir @property def host_path(self) -> str: """A path on the host OS that will be mounted to the container.""" return self.mount_dir ================================================ FILE: spotty/providers/aws/config/instance_config.py ================================================ from typing import List from spotty.config.abstract_instance_config import AbstractInstanceConfig, VolumeMount from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.aws.config.validation import validate_instance_parameters from spotty.providers.aws.config.ebs_volume import EbsVolume DEFAULT_AMI_NAME = 'SpottyAMI' class InstanceConfig(AbstractInstanceConfig): def _validate_instance_params(self, params: dict) -> dict: return validate_instance_parameters(params) def _get_instance_volumes(self) -> List[AbstractInstanceVolume]: volumes = [] for volume_config in self._params['volumes']: volume_type = volume_config['type'] if volume_type == EbsVolume.TYPE_NAME: volumes.append(EbsVolume(volume_config, self.project_config.project_name, self.name)) else: raise ValueError('AWS volume type "%s" not supported.' % volume_type) return volumes @property def user(self): return 'ubuntu' @property def ec2_instance_name(self) -> str: return '%s-%s' % (self.project_config.project_name.lower(), self.name.lower()) @property def region(self) -> str: return self._params['region'] @property def availability_zone(self) -> str: return self._params['availabilityZone'] @property def subnet_id(self) -> str: return self._params['subnetId'] @property def instance_type(self) -> str: return self._params['instanceType'] @property def is_spot_instance(self) -> bool: return self._params['spotInstance'] @property def ami_name(self) -> str: return self._params['amiName'] @property def ami_id(self) -> str: return self._params['amiId'] @property def root_volume_size(self) -> int: return self._params['rootVolumeSize'] @property def ports(self) -> List[int]: return list(set(self._params['ports'])) @property def max_price(self) -> float: return self._params['maxPrice'] @property def managed_policy_arns(self) -> list: return self._params['managedPolicyArns'] @property def instance_profile_arn(self) -> str: return self._params['instanceProfileArn'] ================================================ FILE: spotty/providers/aws/config/validation.py ================================================ import os from schema import Schema, Optional, And, Regex, Or, Use from spotty.config.validation import validate_config, get_instance_parameters_schema, has_prefix def validate_instance_parameters(params: dict): from spotty.providers.aws.config.ebs_volume import EbsVolume instance_parameters = { 'region': And(str, Regex(r'^[a-z0-9-]+$')), Optional('availabilityZone', default=''): And(str, Regex(r'^[a-z0-9-]+$')), Optional('subnetId', default=''): And(str, Regex(r'^subnet-[a-z0-9]+$')), 'instanceType': str, Optional('spotInstance', default=False): bool, Optional('amiName', default=None): And(str, len, Regex(r'^[\w\(\)\[\]\s\.\/\'@-]{3,128}$')), Optional('amiId', default=None): And(str, len, Regex(r'^ami-[a-z0-9]+$')), Optional('rootVolumeSize', default=0): And(Or(int, str), Use(str), Regex(r'^\d+$', error='Incorrect value for "rootVolumeSize".'), Use(int), And(lambda x: x > 0, error='"rootVolumeSize" should be greater than 0 or should ' 'not be specified.'), ), Optional('ports', default=[]): [And(int, lambda x: 0 < x < 65536)], Optional('maxPrice', default=0): And(Or(float, int, str), Use(str), Regex(r'^\d+(\.\d{1,6})?$', error='Incorrect value for "maxPrice".'), Use(float), And(lambda x: x > 0, error='"maxPrice" should be greater than 0 or ' 'should not be specified.'), ), Optional('managedPolicyArns', default=[]): [str], Optional('instanceProfileArn', default=None): str, } volumes_checks = [ And(lambda x: len(x) < 12, error='Maximum 11 volumes are supported at the moment.'), And(lambda x: not has_prefix([(volume['parameters']['mountDir'] + '/') for volume in x if volume['parameters'].get('mountDir')]), error='Mount directories cannot be prefixes for each other.'), ] instance_checks = [ And(lambda x: not (x['maxPrice'] and not x['spotInstance']), error='"maxPrice" can be specified only for spot instances.'), And(lambda x: not (x['amiName'] and x['amiId']), error='"amiName" and "amiId" parameters cannot be used together.'), ] schema = get_instance_parameters_schema(instance_parameters, EbsVolume.TYPE_NAME, instance_checks, volumes_checks) return validate_config(schema, params) def validate_ebs_volume_parameters(params: dict): from spotty.providers.aws.config.ebs_volume import EbsVolume old_deletion_policies_map = { 'create_snapshot': EbsVolume.DP_CREATE_SNAPSHOT, 'update_snapshot': EbsVolume.DP_UPDATE_SNAPSHOT, 'retain': EbsVolume.DP_RETAIN, 'delete': EbsVolume.DP_DELETE, } schema = Schema({ Optional('volumeName', default=''): And(str, Regex(r'^[\w-]{1,255}$')), Optional('mountDir', default=''): And( str, And(os.path.isabs, error='Use absolute paths in the "mountDir" parameters'), Use(lambda x: x.rstrip('/')) ), Optional('size', default=0): And(int, lambda x: x > 0), # TODO: add the "iops" parameter to support the "io1" EBS volume type Optional('type', default='gp2'): lambda x: x in ['gp2', 'sc1', 'st1', 'standard'], Optional('deletionPolicy', default=EbsVolume.DP_RETAIN): And( str, lambda x: x in [EbsVolume.DP_CREATE_SNAPSHOT, EbsVolume.DP_UPDATE_SNAPSHOT, EbsVolume.DP_RETAIN, EbsVolume.DP_DELETE] + list(old_deletion_policies_map.keys()), Use(lambda x: old_deletion_policies_map.get(x, x)), error='Incorrect value for "deletionPolicy".', ), }) return validate_config(schema, params) ================================================ FILE: spotty/providers/aws/data_transfer.py ================================================ import logging import subprocess from spotty.deployment.abstract_cloud_instance.abstract_data_transfer import AbstractDataTransfer from spotty.providers.aws.helpers.s3_sync import get_s3_sync_command, check_aws_installed class DataTransfer(AbstractDataTransfer): def __init__(self, local_project_dir: str, host_project_dir: str, sync_filters: list, instance_name: str, region: str): super().__init__(local_project_dir, host_project_dir, sync_filters, instance_name) self._region = region @property def scheme_name(self) -> str: return 's3' def upload_local_to_bucket(self, bucket_name: str, dry_run: bool = False): """Uploads files from local to the bucket.""" # check AWS CLI is installed check_aws_installed() # sync the project with S3, deleted files will be deleted from S3 local_cmd = get_s3_sync_command(self._local_project_dir, self._get_bucket_project_path(bucket_name), region=self._region, filters=self._sync_filters, delete=True, dry_run=dry_run) # execute the command locally logging.debug('Local sync command: ' + local_cmd) exit_code = subprocess.call(local_cmd, shell=True) if exit_code != 0: raise ValueError('Failed to upload the project files to the S3 bucket.') def download_bucket_to_local(self, bucket_name: str, download_filters: list): """Downloads files from the bucket to local.""" # check AWS CLI is installed check_aws_installed() # download files from S3 bucket to local local_cmd = get_s3_sync_command(self._get_bucket_downloads_path(bucket_name), self._local_project_dir, region=self._region, filters=download_filters, exact_timestamp=True) # execute the command locally logging.debug('Local sync command: ' + local_cmd) exit_code = subprocess.call(local_cmd, shell=True) if exit_code != 0: raise ValueError('Failed to download files from the S3 bucket to local') def get_download_bucket_to_instance_command(self, bucket_name: str, use_sudo: bool = False) -> str: """A remote command to download files from the bucket to the instance.""" remote_cmd = get_s3_sync_command(self._get_bucket_project_path(bucket_name), self._host_project_dir, region=self._region, filters=self._sync_filters, exact_timestamp=True) if use_sudo: remote_cmd = 'sudo ' + remote_cmd return remote_cmd def get_upload_instance_to_bucket_command(self, bucket_name: str, download_filters: list, use_sudo: bool = False, dry_run: bool = False) -> str: """A remote command to upload files from the instance to the bucket. It uses a temporary S3 directory that is unique for the instance. This directory keeps all downloaded from the instance files to sync only changed files with local. """ # "sudo" should be called with the "-i" flag to use the root environment, so aws-cli will read # the config file from the root home directory remote_cmd = get_s3_sync_command(self._host_project_dir, self._get_bucket_downloads_path(bucket_name), region=self._region, filters=download_filters, delete=True, dry_run=dry_run) if use_sudo: remote_cmd = 'sudo ' + remote_cmd return remote_cmd ================================================ FILE: spotty/providers/aws/deletion_policies.py ================================================ from typing import List from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.aws.resources.snapshot import Snapshot from spotty.providers.aws.resources.volume import Volume from spotty.providers.aws.config.ebs_volume import EbsVolume def apply_deletion_policies(ec2, volumes: List[AbstractInstanceVolume], output: AbstractOutputWriter): """Applies deletion policies to the EBS volumes.""" # get volumes ebs_volumes = [volume for volume in volumes if isinstance(volume, EbsVolume)] # no volumes if not ebs_volumes: output.write('- no EBS volumes configured') return # apply deletion policies wait_snapshots = [] for volume in ebs_volumes: # get EC2 volume try: ec2_volume = Volume.get_by_name(ec2, volume.ec2_volume_name) except Exception as e: output.write('- volume "%s" not found. Error: %s' % (volume.ec2_volume_name, str(e))) continue if not ec2_volume: output.write('- volume "%s" not found' % volume.ec2_volume_name) continue if not ec2_volume.is_available(): output.write('- volume "%s" is not available (state: %s)' % (volume.ec2_volume_name, ec2_volume.state)) continue # apply deletion policies if volume.deletion_policy == EbsVolume.DP_RETAIN: # do nothing output.write('- volume "%s" is retained' % ec2_volume.name) elif volume.deletion_policy == EbsVolume.DP_DELETE: # delete EBS volume _delete_ec2_volume(ec2_volume, output) elif volume.deletion_policy == EbsVolume.DP_CREATE_SNAPSHOT \ or volume.deletion_policy == EbsVolume.DP_UPDATE_SNAPSHOT: try: # rename a previous snapshot prev_snapshot = Snapshot.get_by_name(ec2, volume.ec2_volume_name) if prev_snapshot: prev_snapshot.rename('%s-%d' % (prev_snapshot.name, prev_snapshot.creation_time)) output.write('- creating a snapshot for the volume "%s"...' % ec2_volume.name) # create a new snapshot new_snapshot = ec2_volume.create_snapshot() # delete the EBS volume and a previous snapshot only after a new snapshot will be created wait_snapshots.append({ 'new_snapshot': new_snapshot, 'prev_snapshot': prev_snapshot, 'ec2_volume': ec2_volume, 'deletion_policy': volume.deletion_policy, }) except Exception as e: output.write('- snapshot for the volume "%s" was not created. Error: %s' % (volume.ec2_volume_name, str(e))) else: raise ValueError('Unsupported deletion policy: "%s".' % volume.deletion_policy) # wait until all snapshots will be created for resources in wait_snapshots: try: resources['new_snapshot'].wait_snapshot_completed() output.write('- snapshot for the volume "%s" was created' % resources['new_snapshot'].name) except Exception as e: output.write('- snapshot "%s" was not created. Error: %s' % (resources['new_snapshot'].name, str(e))) continue # delete a previous snapshot if it's the "update_snapshot" deletion policy if (resources['deletion_policy'] == EbsVolume.DP_UPDATE_SNAPSHOT) and resources['prev_snapshot']: _delete_snapshot(resources['prev_snapshot'], output) # delete the EBS volume _delete_ec2_volume(resources['ec2_volume'], output) def _delete_ec2_volume(ec2_volume: Volume, output: AbstractOutputWriter): try: ec2_volume.delete() output.write('- volume "%s" was deleted' % ec2_volume.name) except Exception as e: output.write('- volume "%s" was not deleted. Error: %s' % (ec2_volume.name, str(e))) def _delete_snapshot(snapshot: Snapshot, output: AbstractOutputWriter): try: snapshot.delete() output.write('- previous snapshot "%s" was deleted' % snapshot.name) except Exception as e: output.write('- previous snapshot "%s" was not deleted. Error: %s' % (snapshot.name, str(e))) ================================================ FILE: spotty/providers/aws/errors/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/errors/volume_not_found.py ================================================ class VolumeNotFoundError(Exception): def __init__(self, volume_name): super().__init__('Volume "%s" not found' % volume_name) ================================================ FILE: spotty/providers/aws/helpers/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/helpers/ami.py ================================================ from spotty.providers.aws.config.instance_config import DEFAULT_AMI_NAME from spotty.providers.aws.resources.image import Image def get_ami(ec2, ami_id: str = None, ami_name: str = None) -> Image: """Returns an AMI that should be used for deployment. Raises: ValueError: If an AMI not found. """ if ami_id: # get an AMI by ID if the "amiId" parameter is specified image = Image.get_by_id(ec2, ami_id) if not image: raise ValueError('AMI with ID=%s not found.' % ami_id) elif ami_name: # get an AMI by name if the "amiName" parameter is specified image = Image.get_by_name(ec2, ami_name) if not image: # if an AMI name was explicitly specified in the config, but the AMI was not found, raise an error raise ValueError('AMI with the name "%s" was not found.' % ami_name) else: # if the "amiName" parameter is not specified, try to use the default AMI name image = Image.get_by_name(ec2, DEFAULT_AMI_NAME) if not image: # get the latest "Deep Learning Base AMI" res = ec2.describe_images( Owners=['amazon'], Filters=[{'Name': 'name', 'Values': ['Deep Learning AMI (Ubuntu 16.04) Version*']}], ) if not len(res['Images']): raise ValueError('AWS Deep Learning AMI not found.\n' 'Use the "spotty aws create-ami" command to create an AMI with NVIDIA Docker.') image_info = sorted(res['Images'], key=lambda x: x['CreationDate'], reverse=True)[0] image = Image(ec2, image_info) return image ================================================ FILE: spotty/providers/aws/helpers/availability_zone.py ================================================ from typing import List from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.aws.config.ebs_volume import EbsVolume from spotty.providers.aws.resources.volume import Volume def update_availability_zone(ec2, availability_zone: str, volumes: List[AbstractInstanceVolume]): """Checks that existing volumes located in the same AZ and the AZ from the config file matches volumes AZ. Args: ec2: EC2 boto3 client availability_zone: Availability Zone from the configuration. volumes: List of volume objects. Returns: The final AZ where the instance should be run or an empty string if the instance can be run in any AZ. Raises: ValueError: AZ in the config file doesn't match the AZs of the volumes or AZs of the volumes are different. """ availability_zone = availability_zone for volume in volumes: if isinstance(volume, EbsVolume): ec2_volume = Volume.get_by_name(ec2, volume.ec2_volume_name) if ec2_volume: if availability_zone and (availability_zone != ec2_volume.availability_zone): raise ValueError( 'The availability zone in the configuration file doesn\'t match the availability zone ' 'of the existing volume or you have two existing volumes in different availability ' 'zones.') # update availability zone availability_zone = ec2_volume.availability_zone return availability_zone ================================================ FILE: spotty/providers/aws/helpers/instance_prices.py ================================================ import datetime import json import logging import boto3 from pkg_resources import resource_filename def get_spot_prices(ec2, instance_type: str): """Returns current Spot Instance prices for all availability zones for particular instance type and region. AWS region specified implicitly in the "ec2" object. """ tomorrow_date = datetime.datetime.today() + datetime.timedelta(days=1) res = ec2.describe_spot_price_history(InstanceTypes=[instance_type], StartTime=tomorrow_date, ProductDescriptions=['Linux/UNIX']) prices_by_zone = {} for row in res['SpotPriceHistory']: prices_by_zone[row['AvailabilityZone']] = float(row['SpotPrice']) return prices_by_zone def get_current_spot_price(ec2, instance_type, availability_zone=''): """Returns the current Spot price for an availability zone. If an availability zone is not specified, returns the minimum price for the region. """ spot_prices = get_spot_prices(ec2, instance_type) if availability_zone: if availability_zone not in spot_prices: raise ValueError('Spot price for the "%s" availability zone not found.' % availability_zone) current_price = spot_prices[availability_zone] else: current_price = min(spot_prices.values()) return current_price def get_on_demand_price(instance_type: str, region: str): client = boto3.client('pricing', region_name='us-east-1') # the API available only in "us-east-1" try: response = client.get_products( ServiceCode='AmazonEC2', Filters=[ {'Type': 'TERM_MATCH', 'Field': 'location', 'Value': _get_region_name(region)}, {'Type': 'TERM_MATCH', 'Field': 'instanceType', 'Value': instance_type}, {'Type': 'TERM_MATCH', 'Field': 'operatingSystem', 'Value': 'Linux'}, {'Type': 'TERM_MATCH', 'Field': 'tenancy', 'Value': 'shared'}, {'Type': 'TERM_MATCH', 'Field': 'capacitystatus', 'Value': 'Used'}, ], ) prices = json.loads(response['PriceList'][0])['terms']['OnDemand'] price = float(list(list(prices.values())[0]['priceDimensions'].values())[0]['pricePerUnit']['USD']) except Exception as e: logging.debug('Couldn\'t find a price for the instance: ' + str(e)) price = None return price def _get_region_name(region: str): endpoint_file = resource_filename('botocore', 'data/endpoints.json') try: with open(endpoint_file, 'r') as f: data = json.load(f) region_name = data['partitions'][0]['regions'][region]['description'] except Exception as e: logging.debug('Couldn\'t obtain the region name: ' + str(e)) region_name = None return region_name def check_max_spot_price(ec2, instance_type: str, is_spot_instance: bool, max_price: float, availability_zone: str = ''): """Checks that the specified maximum Spot price is less than the current Spot price. Args: ec2: EC2 client instance_type (str): Instance Type is_spot_instance (bool): True if it's a spot instance max_price (float): requested maximum price for the instance availability_zone (str): Availability zone to check. If it's an empty string, checks the cheapest AZ. Raises: ValueError: Current price for the instance is higher than the maximum price in the configuration file. """ if is_spot_instance and max_price: current_price = get_current_spot_price(ec2, instance_type, availability_zone) if current_price > max_price: raise ValueError('Current price for the instance (%.04f) is higher than the maximum price in the ' 'configuration file (%.04f).' % (current_price, max_price)) ================================================ FILE: spotty/providers/aws/helpers/logs.py ================================================ import os import subprocess import tempfile from glob import glob from spotty.providers.aws.helpers.s3_sync import get_s3_sync_command def get_logs_s3_path(bucket_name: str, instance_name: str) -> str: return 's3://%s/logs/aws/%s' % (bucket_name, instance_name) def download_logs(bucket_name: str, instance_name: str, stack_uuid: str, region: str) -> list: """Downloads logs from S3 bucket to temporary directory.""" logs_s3_path = '%s/%s' % (get_logs_s3_path(bucket_name, instance_name), stack_uuid) local_logs_dir = tempfile.mkdtemp() # download logs download_cmd = get_s3_sync_command(logs_s3_path, local_logs_dir, region=region, exact_timestamp=True, quiet=True) subprocess.call(download_cmd, shell=True) # get paths to the downloaded files log_paths = glob(os.path.join(local_logs_dir, '**', '*'), recursive=True) return log_paths ================================================ FILE: spotty/providers/aws/helpers/s3_sync.py ================================================ from shutil import which from spotty.deployment.utils.cli import shlex_join def check_aws_installed(): """Checks that AWS CLI is installed.""" if which('aws') is None: raise ValueError('AWS CLI is not installed.') def get_s3_sync_command(from_path: str, to_path: str, profile: str = None, region: str = None, filters: list = None, exact_timestamp: bool = False, delete: bool = False, quiet: bool = False, dry_run: bool = False): """Builds an "aws s3 sync" command.""" args = ['aws'] if profile: args += ['--profile', profile] if region: args += ['--region', region] args += ['s3', 'sync', from_path, to_path] if filters: for sync_filter in filters: if ('exclude' in sync_filter and 'include' in sync_filter) \ or ('exclude' not in sync_filter and 'include' not in sync_filter): raise ValueError('S3 sync filter has wrong format.') if 'exclude' in sync_filter: for path in sync_filter['exclude']: args += ['--exclude', path] if 'include' in sync_filter: for path in sync_filter['include']: args += ['--include', path] if exact_timestamp: args.append('--exact-timestamp') if delete: args.append('--delete') if quiet: args.append('--quiet') if dry_run: args.append('--dryrun') command = shlex_join(args) return command ================================================ FILE: spotty/providers/aws/helpers/subnet.py ================================================ from spotty.providers.aws.resources.subnet import Subnet def check_az_and_subnet(ec2, region: str, availability_zone: str, subnet_id: str): # get all availability zones for the region zones = ec2.describe_availability_zones() zone_names = [zone['ZoneName'] for zone in zones['AvailabilityZones']] # check availability zone if availability_zone and availability_zone not in zone_names: raise ValueError('Availability zone "%s" doesn\'t exist in the "%s" region.' % (availability_zone, region)) if availability_zone: if subnet_id: subnet = Subnet.get_by_id(ec2, subnet_id) if not subnet: raise ValueError('Subnet "%s" not found.' % subnet_id) if subnet.availability_zone != availability_zone: raise ValueError('Availability zone of the subnet doesn\'t match the specified availability zone') else: default_subnets = Subnet.get_default_subnets(ec2) default_subnet = [subnet for subnet in default_subnets if subnet.availability_zone == availability_zone] if not default_subnet: raise ValueError('Default subnet for the "%s" availability zone not found.\n' 'Use the "subnetId" parameter to specify a subnet for this availability zone.' % availability_zone) else: if subnet_id: raise ValueError('An availability zone should be specified if a custom subnet is used.') else: default_subnets = Subnet.get_default_subnets(ec2) default_azs = {subnet.availability_zone for subnet in default_subnets} zones_wo_subnet = [zone_name for zone_name in zone_names if zone_name not in default_azs] if zones_wo_subnet: raise ValueError('Default subnets for the following availability zones were not found: %s.\n' 'Use "subnetId" and "availabilityZone" parameters or create missing default ' 'subnets.' % ', '.join(zones_wo_subnet)) ================================================ FILE: spotty/providers/aws/helpers/vpc.py ================================================ from spotty.providers.aws.resources.subnet import Subnet from spotty.providers.aws.resources.vpc import Vpc def get_vpc_id(ec2, subnet_id: str = None) -> str: """Returns VPC ID that should be used for deployment.""" if subnet_id: vpc_id = Subnet.get_by_id(ec2, subnet_id).vpc_id else: default_vpc = Vpc.get_default_vpc(ec2) if not default_vpc: raise ValueError('Default VPC not found') vpc_id = default_vpc.vpc_id return vpc_id ================================================ FILE: spotty/providers/aws/instance_deployment.py ================================================ import boto3 from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.abstract_cloud_instance.abstract_instance_deployment import AbstractInstanceDeployment from spotty.deployment.container.docker.docker_commands import DockerCommands from spotty.providers.aws.cfn_templates.instance.template import prepare_instance_template, get_template_parameters from spotty.providers.aws.data_transfer import DataTransfer from spotty.providers.aws.helpers.availability_zone import update_availability_zone from spotty.providers.aws.helpers.instance_prices import check_max_spot_price from spotty.providers.aws.helpers.subnet import check_az_and_subnet from spotty.providers.aws.resource_managers.key_pair_manager import KeyPairManager from spotty.deployment.utils.print_info import render_volumes_info_table from spotty.providers.aws.resources.instance import Instance from spotty.providers.aws.config.instance_config import InstanceConfig from spotty.providers.aws.deletion_policies import apply_deletion_policies from spotty.providers.aws.resource_managers.instance_profile_stack_manager import InstanceProfileStackManager from spotty.providers.aws.helpers.logs import download_logs from spotty.providers.aws.resource_managers.instance_stack_manager import InstanceStackManager class InstanceDeployment(AbstractInstanceDeployment): instance_config: InstanceConfig def __init__(self, instance_config: InstanceConfig): super().__init__(instance_config) self._project_name = instance_config.project_config.project_name self._ec2 = boto3.client('ec2', region_name=instance_config.region) @property def stack_manager(self) -> InstanceStackManager: return InstanceStackManager(self._project_name, self.instance_config.name, self.instance_config.region) @property def key_pair_manager(self) -> KeyPairManager: return KeyPairManager(self._ec2, self._project_name, self.instance_config.region) def get_instance(self) -> Instance: return Instance.get_by_stack_name(self._ec2, self.stack_manager.name) def deploy(self, container_commands: DockerCommands, bucket_name: str, data_transfer: DataTransfer, output: AbstractOutputWriter, dry_run: bool = False): # get deployment availability zone availability_zone = update_availability_zone(self._ec2, self.instance_config.availability_zone, self.instance_config.volumes) # check availability zone and subnet configuration check_az_and_subnet(self._ec2, self.instance_config.region, availability_zone, self.instance_config.subnet_id) # check the maximum price for a spot instance check_max_spot_price(self._ec2, self.instance_config.instance_type, self.instance_config.is_spot_instance, self.instance_config.max_price, availability_zone) # sync the project with the S3 bucket if bucket_name is not None: output.write('Syncing the project with the S3 bucket...') data_transfer.upload_local_to_bucket(bucket_name, dry_run=dry_run) # create or update instance profile if not dry_run: instance_profile_stack_manager = InstanceProfileStackManager( self._project_name, self.instance_config.name, self.instance_config.region) if not self.instance_config.instance_profile_arn: instance_profile_arn = instance_profile_stack_manager.create_or_update_stack( self.instance_config.managed_policy_arns, output=output) else: instance_profile_arn = self.instance_config.instance_profile_arn else: instance_profile_arn = None # create a key pair if it doesn't exist if not dry_run: self.key_pair_manager.maybe_create_key() output.write('Preparing CloudFormation template...') # prepare CloudFormation template with output.prefix(' '): template = prepare_instance_template( ec2=self._ec2, instance_config=self.instance_config, docker_commands=container_commands, availability_zone=availability_zone, sync_project_cmd=data_transfer.get_download_bucket_to_instance_command(bucket_name=bucket_name), output=output, ) # get parameters for the template parameters = get_template_parameters( ec2=self._ec2, instance_config=self.instance_config, instance_profile_arn=instance_profile_arn, bucket_name=bucket_name, key_pair_name=self.key_pair_manager.key_name, output=output, ) # print information about the volumes output.write('\nVolumes:\n%s\n' % render_volumes_info_table(self.instance_config.volume_mounts, self.instance_config.volumes)) # create stack if not dry_run: stack = self.stack_manager.create_or_update_stack(template, parameters, self.instance_config, output) if stack.status != 'CREATE_COMPLETE': logs_str = 'Please, see CloudFormation logs for the details.' # download CloudFormation logs from the instance if it was created if self.get_instance(): log_paths = download_logs( bucket_name=bucket_name, instance_name=self.instance_config.name, stack_uuid=stack.stack_uuid, region=self.instance_config.region, ) logs_str = 'Please, see the logs for the details:\n ' logs_str += '\n '.join(log_paths) raise ValueError('Stack "%s" was not created.\n%s' % (stack.name, logs_str)) def delete(self, output: AbstractOutputWriter): # terminate the instance instance = self.get_instance() if instance: output.write('Terminating the instance... ', newline=False) instance.terminate() output.write('DONE') else: output.write('The instance was already terminated.') # delete the stack in background if it exists self.stack_manager.delete_stack(output, no_wait=True) output.write('Applying deletion policies for the volumes...') # apply deletion policies for the volumes with output.prefix(' '): apply_deletion_policies(self._ec2, self.instance_config.volumes, output) ================================================ FILE: spotty/providers/aws/instance_manager.py ================================================ from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.deployment.abstract_cloud_instance.abstract_cloud_instance_manager import AbstractCloudInstanceManager from spotty.providers.aws.resource_managers.bucket_manager import BucketManager from spotty.providers.aws.config.instance_config import InstanceConfig from spotty.providers.aws.data_transfer import DataTransfer from spotty.providers.aws.instance_deployment import InstanceDeployment from spotty.utils import render_table class InstanceManager(AbstractCloudInstanceManager): instance_config: InstanceConfig bucket_manager: BucketManager data_transfer: DataTransfer instance_deployment: InstanceDeployment def _get_instance_config(self, instance_config: dict) -> InstanceConfig: """Validates the instance config and returns an InstanceConfig object.""" return InstanceConfig(instance_config, self.project_config) def _get_bucket_manager(self) -> BucketManager: """Returns an bucket manager.""" return BucketManager(self.instance_config.project_config.project_name, self.instance_config.region) def _get_data_transfer(self) -> DataTransfer: """Returns a data transfer object.""" return DataTransfer( local_project_dir=self.project_config.project_dir, host_project_dir=self.instance_config.host_project_dir, sync_filters=self.project_config.sync_filters, instance_name=self.instance_config.name, region=self.instance_config.region, ) def _get_instance_deployment(self) -> InstanceDeployment: """Returns an instance deployment manager.""" return InstanceDeployment(self.instance_config) def get_status_text(self): instance = self.instance_deployment.get_instance() if not instance: raise InstanceNotRunningError(self.instance_config.name) table = [ ('Instance State', instance.state), ('Instance Type', instance.instance_type), ('Availability Zone', instance.availability_zone), ] if instance.public_ip_address: table.append(('Public IP Address', instance.public_ip_address)) elif instance.private_ip_address: table.append(('Private IP Address', instance.private_ip_address)) if instance.lifecycle == 'spot': spot_price = instance.get_spot_price() table.append(('Purchasing Option', 'Spot Instance')) table.append(('Spot Instance Price', '$%.04f' % spot_price)) else: on_demand_price = instance.get_on_demand_price() table.append(('Purchasing Option', 'On-Demand Instance')) table.append(('Instance Price', ('$%.04f (us-east-1)' % on_demand_price) if on_demand_price else 'Unknown')) return render_table(table) @property def ssh_key_path(self): return self.instance_deployment.key_pair_manager.key_path ================================================ FILE: spotty/providers/aws/resource_managers/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/resource_managers/bucket_manager.py ================================================ import boto3 import re from spotty.deployment.abstract_cloud_instance.abstract_bucket_manager import AbstractBucketManager from spotty.deployment.abstract_cloud_instance.errors.bucket_not_found import BucketNotFoundError from spotty.providers.aws.resources.bucket import Bucket from spotty.utils import random_string class BucketManager(AbstractBucketManager): def __init__(self, project_name: str, region: str): super().__init__(project_name) self._s3 = boto3.client('s3', region_name=region) self._region = region self._bucket_prefix = 'spotty-%s' % project_name.lower() def get_bucket(self) -> Bucket: res = self._s3.list_buckets() regex = re.compile('-'.join([self._bucket_prefix, '[a-z0-9]{12}', self._region])) buckets = [bucket for bucket in res['Buckets'] if regex.match(bucket['Name']) is not None] if len(buckets) > 1: raise ValueError('Found several buckets in the same region: %s.' % ', '.join(bucket['Name'] for bucket in buckets)) if not len(buckets): raise BucketNotFoundError bucket = Bucket(buckets[0]) return bucket def create_bucket(self) -> Bucket: bucket_name = '-'.join([self._bucket_prefix, random_string(12), self._region]) # a fix for the boto3 issue: https://github.com/boto/boto3/issues/125 if self._region == 'us-east-1': self._s3.create_bucket(ACL='private', Bucket=bucket_name) else: self._s3.create_bucket(ACL='private', Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': self._region}) return Bucket({'Name': bucket_name}) def delete_bucket(self): pass ================================================ FILE: spotty/providers/aws/resource_managers/instance_profile_stack_manager.py ================================================ import boto3 from botocore.exceptions import ClientError, WaiterError from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.providers.aws.cfn_templates.instance_profile.template import prepare_instance_profile_template from spotty.providers.aws.resources.stack import Stack class InstanceProfileStackManager(object): def __init__(self, project_name: str, instance_name: str, region: str): self._cf = boto3.client('cloudformation', region_name=region) self._region = region self._stack_name = 'spotty-instance-profile-%s-%s' % (project_name.lower(), instance_name.lower()) def create_or_update_stack(self, managed_policy_arns: list, output: AbstractOutputWriter): """Creates or updates an instance profile. It was moved to a separate stack because creating of an instance profile resource takes 2 minutes. """ # check that policies exist iam = boto3.client('iam', region_name=self._region) for policy_arn in managed_policy_arns: # if the policy doesn't exist, an error will be raised iam.get_policy(PolicyArn=policy_arn) template = prepare_instance_profile_template(managed_policy_arns) stack = Stack.get_by_name(self._cf, self._stack_name) try: if stack: # update the stack and wait until it will be updated self._update_stack(template, output) else: # create the stack and wait until it will be created self._create_stack(template, output) stack = Stack.get_by_name(self._cf, self._stack_name) except WaiterError: stack = None if not stack or stack.status not in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']: raise ValueError('Stack "%s" was not created.\n' 'Please, see CloudFormation logs for the details.' % self._stack_name) profile_arn = [row['OutputValue'] for row in stack.outputs if row['OutputKey'] == 'ProfileArn'][0] return profile_arn def _create_stack(self, template: str, output: AbstractOutputWriter): """Creates the stack and waits until it will be created.""" output.write('Creating IAM role for the instance...') stack = Stack.create_stack( cf=self._cf, StackName=self._stack_name, TemplateBody=template, Capabilities=['CAPABILITY_IAM'], OnFailure='DELETE', ) # wait for the stack to be created stack.wait_stack_created(delay_secs=15) def _update_stack(self, template: str, output: AbstractOutputWriter): """Updates the stack and waits until it will be updated.""" try: updated_stack = Stack.update_stack( cf=self._cf, StackName=self._stack_name, TemplateBody=template, Capabilities=['CAPABILITY_IAM'], ) except ClientError as e: # the stack was not updated because there are no changes updated_stack = None error_code = e.response.get('Error', {}).get('Code', 'Unknown') if error_code != 'ValidationError': raise e if updated_stack: # wait for the stack to be updated output.write('Updating IAM role for the instance...') updated_stack.wait_stack_updated(delay=15) ================================================ FILE: spotty/providers/aws/resource_managers/instance_stack_manager.py ================================================ import boto3 from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.providers.aws.resources.stack import Stack, Task from spotty.providers.aws.config.instance_config import InstanceConfig class InstanceStackManager(object): def __init__(self, project_name: str, instance_name: str, region: str): self._cf = boto3.client('cloudformation', region_name=region) self._ec2 = boto3.client('ec2', region_name=region) self._region = region self._stack_name = 'spotty-instance-%s-%s' % (project_name.lower(), instance_name.lower()) @property def name(self): return self._stack_name def create_or_update_stack(self, template: str, parameters: dict, instance_config: InstanceConfig, output: AbstractOutputWriter): """Runs CloudFormation template.""" # delete the stack if it exists stack = Stack.get_by_name(self._cf, self._stack_name) if stack: self.delete_stack(output) # create new stack stack = Stack.create_stack( cf=self._cf, StackName=self._stack_name, TemplateBody=template, Parameters=[{'ParameterKey': key, 'ParameterValue': value} for key, value in parameters.items()], Capabilities=['CAPABILITY_IAM'], OnFailure='DO_NOTHING', ) output.write('Waiting for the stack to be created...') tasks = [ Task( message='launching the instance', start_resource=None, finish_resource='Instance', enabled=True, ), Task( message='preparing the instance', start_resource='Instance', finish_resource='MountingVolumesSignal', enabled=True, ), Task( message='mounting volumes', start_resource='MountingVolumesSignal', finish_resource='SettingDockerRootSignal', enabled=bool(instance_config.volumes), ), Task( message='setting Docker data root', start_resource='SettingDockerRootSignal', finish_resource='SyncingProjectSignal', enabled=bool(instance_config.docker_data_root), ), Task( message='syncing project files', start_resource='SyncingProjectSignal', finish_resource='RunningInstanceStartupCommandsSignal', enabled=True, ), Task( message='running instance startup commands', start_resource='RunningInstanceStartupCommandsSignal', finish_resource='BuildingDockerImageSignal', enabled=bool(instance_config.commands), ), Task( message='building Docker image', start_resource='BuildingDockerImageSignal', finish_resource='StartingContainerSignal', enabled=bool(instance_config.dockerfile_path), ), Task( message='starting container', start_resource='StartingContainerSignal', finish_resource='RunningContainerStartupCommandsSignal', enabled=True, ), Task( message='running container startup commands', start_resource='RunningContainerStartupCommandsSignal', finish_resource='DockerReadyWaitCondition', enabled=bool(instance_config.container_config.commands), ), ] # wait for the stack to be created with output.prefix(' '): stack.wait_tasks(tasks, resource_success_status='CREATE_COMPLETE', resource_fail_status='CREATE_FAILED', output=output) stack = stack.wait_status_changed(stack_waiting_status='CREATE_IN_PROGRESS', output=output) return stack def delete_stack(self, output: AbstractOutputWriter, no_wait=False): stack = Stack.get_by_name(self._cf, self._stack_name) if not stack: return if not no_wait: output.write('Waiting for the stack to be deleted...') # delete the stack try: stack.delete() if not no_wait: stack.wait_stack_deleted() except Exception as e: raise ValueError('Stack "%s" was not deleted. Error: %s\n' 'See CloudFormation logs for details.' % (self._stack_name, str(e))) ================================================ FILE: spotty/providers/aws/resource_managers/key_pair_manager.py ================================================ import os from spotty.configuration import get_spotty_keys_dir from spotty.providers.instance_manager_factory import PROVIDER_AWS class KeyPairManager(object): def __init__(self, ec2, project_name: str, region: str): self._ec2 = ec2 self._key_name = 'spotty-key-%s-%s' % (project_name.lower(), region) self._key_path = os.path.join(get_spotty_keys_dir(PROVIDER_AWS), self._key_name) @property def key_name(self): return self._key_name @property def key_path(self): return self._key_path def maybe_create_key(self): key_file_exists = os.path.isfile(self.key_path) ec2_key_exists = self._ec2_key_exists() if not ec2_key_exists or not key_file_exists: # remove key from AWS (key file not found) if ec2_key_exists: self._ec2.delete_key_pair(KeyName=self._key_name) # remove the key file (in case it was the old path) if key_file_exists: os.unlink(self.key_path) # create new key res = self._ec2.create_key_pair(KeyName=self._key_name) # create a provider subdirectory keys_dir = os.path.dirname(self.key_path) if not os.path.isdir(keys_dir): os.makedirs(keys_dir, mode=0o755, exist_ok=True) # save the key to the new path with open(self.key_path, 'w') as f: f.write(res['KeyMaterial']) os.chmod(self.key_path, 0o600) def delete_key(self): # delete EC2 Key Pair if self._ec2_key_exists(): self._ec2.delete_key_pair(KeyName=self._key_name) # delete the key file if os.path.isfile(self.key_path): os.unlink(self.key_path) def _ec2_key_exists(self): res = self._ec2.describe_key_pairs(Filters=[{'Name': 'key-name', 'Values': [self._key_name]}]) if 'KeyPairs' not in res: return False if len(res['KeyPairs']) > 1: raise ValueError('Several keys with the name "%s" found.' % self._key_name) return bool(res['KeyPairs']) ================================================ FILE: spotty/providers/aws/resources/__init__.py ================================================ ================================================ FILE: spotty/providers/aws/resources/bucket.py ================================================ from spotty.deployment.abstract_cloud_instance.resources.abstract_bucket import AbstractBucket class Bucket(AbstractBucket): def __init__(self, data: dict): self._data = data @property def name(self) -> str: return self._data['Name'] ================================================ FILE: spotty/providers/aws/resources/image.py ================================================ class Image(object): def __init__(self, ec2, ami_info): self._ec2 = ec2 self._ami_info = ami_info @staticmethod def get_by_name(ec2, ami_name: str): """Returns a AMI by its name.""" res = ec2.describe_images(Owners=['self'], Filters=[ {'Name': 'name', 'Values': [ami_name]}, ]) if len(res['Images']) > 1: raise ValueError('Several AMIs use the same name: "%s".' % ami_name) if not len(res['Images']): return None return Image(ec2, res['Images'][0]) @staticmethod def get_by_id(ec2, ami_id: str): """Returns a AMI by its ID.""" res = ec2.describe_images(Filters=[{'Name': 'image-id', 'Values': [ami_id]}]) if not len(res['Images']): return None return Image(ec2, res['Images'][0]) @property def image_id(self) -> str: return self._ami_info['ImageId'] @property def name(self) -> str: return self._ami_info['Name'] @property def size(self) -> int: return self._ami_info['BlockDeviceMappings'][0]['Ebs']['VolumeSize'] def get_tag_value(self, tag_name): tag_values = [tag['Value'] for tag in self._ami_info['Tags'] if tag['Key'] == tag_name] if not tag_values: return None return tag_values[0] ================================================ FILE: spotty/providers/aws/resources/instance.py ================================================ from datetime import datetime from spotty.deployment.abstract_cloud_instance.resources.abstract_instance import AbstractInstance from spotty.providers.aws.helpers.instance_prices import get_current_spot_price, get_on_demand_price class Instance(AbstractInstance): def __init__(self, ec2, data: dict): self._ec2 = ec2 self._data = data @staticmethod def get_by_stack_name(ec2, stack_name): """Returns the running instance by its stack name or None if the instance is not running. """ res = ec2.describe_instances(Filters=[ {'Name': 'tag:aws:cloudformation:stack-name', 'Values': [stack_name]}, {'Name': 'instance-state-name', 'Values': ['running']}, ]) if len(res['Reservations']) > 1: raise ValueError('Several running instances for the stack "%s" are found.' % stack_name) if not len(res['Reservations']): return None return Instance(ec2, res['Reservations'][0]['Instances'][0]) @property def instance_id(self): return self._data['InstanceId'] @property def public_ip_address(self) -> str: return self._data.get('PublicIpAddress', None) @property def private_ip_address(self) -> str: return self._data.get('PrivateIpAddress', None) @property def state(self) -> str: return self._data['State']['Name'] @property def instance_type(self) -> str: return self._data['InstanceType'] @property def availability_zone(self) -> str: return self._data['Placement']['AvailabilityZone'] @property def launch_time(self) -> datetime: return self._data['LaunchTime'] @property def lifecycle(self) -> str: return self._data.get('InstanceLifecycle') @property def is_running(self): return self.state == 'running' @property def is_stopped(self): return self.state == 'stopped' def get_spot_price(self): """Get current Spot Instance price for this instance.""" return get_current_spot_price(self._ec2, self.instance_type, self.availability_zone) def get_on_demand_price(self): """Get On-demand Instance price for the same instance in the us-east-1 region.""" return get_on_demand_price(self.instance_type, 'us-east-1') def terminate(self, wait: bool = True): self._ec2.terminate_instances(InstanceIds=[self.instance_id]) if wait: waiter = self._ec2.get_waiter('instance_terminated') waiter.wait(InstanceIds=[self.instance_id]) def stop(self, wait: bool = True): self._ec2.stop_instances(InstanceIds=[self.instance_id]) if wait: waiter = self._ec2.get_waiter('instance_stopped') waiter.wait(InstanceIds=[self.instance_id]) ================================================ FILE: spotty/providers/aws/resources/snapshot.py ================================================ import time class Snapshot(object): def __init__(self, ec2, snapshot_info): self._ec2 = ec2 self._snapshot_info = snapshot_info @staticmethod def get_by_name(ec2, snapshot_name: str): """Returns a snapshot by its name.""" res = ec2.describe_snapshots(Filters=[ {'Name': 'tag:Name', 'Values': [snapshot_name]}, ]) if len(res['Snapshots']) > 1: raise ValueError('Several snapshots with Name=%s found.' % snapshot_name) if not len(res['Snapshots']): return None return Snapshot(ec2, res['Snapshots'][0]) @property def name(self) -> str: snapshot_name = [tag['Value'] for tag in self._snapshot_info['Tags'] if tag['Key'] == 'Name'] if not snapshot_name: return '' return snapshot_name[0] @property def snapshot_id(self): return self._snapshot_info['SnapshotId'] @property def size(self) -> int: return self._snapshot_info['VolumeSize'] @property def creation_time(self) -> int: return int(time.mktime(self._snapshot_info['StartTime'].timetuple())) def rename(self, new_name): return self._ec2.create_tags(Resources=[self.snapshot_id], Tags=[{'Key': 'Name', 'Value': new_name}]) def delete(self): return self._ec2.delete_snapshot(SnapshotId=self.snapshot_id) def wait_snapshot_completed(self): waiter = self._ec2.get_waiter('snapshot_completed') waiter.wait(SnapshotIds=[self.snapshot_id]) ================================================ FILE: spotty/providers/aws/resources/stack.py ================================================ from collections import namedtuple from time import sleep from typing import List, Dict from botocore.exceptions import EndpointConnectionError, ClientError from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter import logging Task = namedtuple('Task', ['message', 'start_resource', 'finish_resource', 'enabled']) class Stack(object): def __init__(self, cf, stack_info): self._cf = cf self._stack_info = stack_info @staticmethod def get_by_name(cf, stack_name: str): """Returns a Stack by its name.""" try: res = cf.describe_stacks(StackName=stack_name) except ClientError as e: # ignore an exception if it raised because the stack doesn't exist error_code = e.response.get('Error', {}).get('Code') if error_code != 'ValidationError': raise e res = {'Stacks': []} if not len(res['Stacks']): return None return Stack(cf, res['Stacks'][0]) @staticmethod def create_stack(cf, *args, **kwargs): res = cf.create_stack(*args, **kwargs) return Stack(cf, res) @staticmethod def update_stack(cf, *args, **kwargs): res = cf.update_stack(*args, **kwargs) return Stack(cf, res) @property def stack_id(self) -> str: return self._stack_info['StackId'] @property def stack_uuid(self) -> str: return self.stack_id.rsplit('/', 1)[-1] @property def name(self) -> str: return self._stack_info['StackName'] @property def status(self) -> str: return self._stack_info['StackStatus'] @property def outputs(self) -> str: return self._stack_info['Outputs'] def delete(self): return self._cf.delete_stack(StackName=self.stack_id) def wait_stack_created(self, delay_secs: int = 30): waiter = self._cf.get_waiter('stack_create_complete') waiter.wait(StackName=self.stack_id, WaiterConfig={'Delay': delay_secs}) def wait_stack_updated(self, delay_secs: int = 30): waiter = self._cf.get_waiter('stack_update_complete') waiter.wait(StackName=self.stack_id, WaiterConfig={'Delay': delay_secs}) def wait_stack_deleted(self, delay_secs: int = 30): waiter = self._cf.get_waiter('stack_delete_complete') waiter.wait(StackName=self.stack_id, WaiterConfig={'Delay': delay_secs}) def wait_status_changed(self, stack_waiting_status: str, output: AbstractOutputWriter, delay_secs: int = 5): stack = None while True: # get the latest status of the stack try: stack = self.get_by_name(self._cf, self.stack_id) except EndpointConnectionError as e: output.write(str(e)) continue if stack.status != stack_waiting_status: break sleep(delay_secs) return stack def wait_tasks(self, tasks: List[Task], resource_success_status: str, resource_fail_status: str, output: AbstractOutputWriter, delay_secs: int = 5): resource_statuses = self._get_resource_statuses() for task in tasks: if not task.enabled: continue task_started = task_finished = False while not task_finished: start_status = resource_statuses.get(task.start_resource) finish_status = resource_statuses.get(task.finish_resource) if not task_started and (not task.start_resource or (start_status == resource_success_status)): task_started = True output.write('- %s... ' % task.message, newline=False) elif task_started and (finish_status == resource_success_status): task_finished = True output.write('DONE') else: sleep(delay_secs) resource_statuses = self._get_resource_statuses() # check that the stack is not failed for status in resource_statuses.values(): if status == resource_fail_status: if task_started and not task_finished: output.write('') return def _get_resource_statuses(self) -> Dict[str, str]: stack_resources = None try: stack_resources = self._cf.list_stack_resources(StackName=self.stack_id) except Exception as e: logging.warning(str(e)) resource_statuses = {} if stack_resources: resource_statuses = {row['LogicalResourceId']: row['ResourceStatus'] for row in stack_resources['StackResourceSummaries']} return resource_statuses ================================================ FILE: spotty/providers/aws/resources/subnet.py ================================================ class Subnet(object): def __init__(self, ec2, subnet_info): self._ec2 = ec2 self._subnet_info = subnet_info @staticmethod def get_by_id(ec2, subnet_id: str): """Returns a subnet by its ID.""" res = ec2.describe_subnets(Filters=[ {'Name': 'subnet-id', 'Values': [subnet_id]}, ]) if not len(res['Subnets']): return None return Subnet(ec2, res['Subnets'][0]) @staticmethod def get_default_subnets(ec2): res = ec2.describe_subnets(Filters=[ {'Name': 'defaultForAz', 'Values': ['true']}, ]) subnets = [Subnet(ec2, subnet_info) for subnet_info in res['Subnets']] return subnets @property def availability_zone(self) -> str: return self._subnet_info['AvailabilityZone'] @property def vpc_id(self) -> str: return self._subnet_info['VpcId'] ================================================ FILE: spotty/providers/aws/resources/volume.py ================================================ from spotty.providers.aws.resources.snapshot import Snapshot class Volume(object): def __init__(self, ec2, volume_info): self._ec2 = ec2 self._volume_info = volume_info @staticmethod def get_by_name(ec2, volume_name: str): """Returns a volume by its name.""" res = ec2.describe_volumes(Filters=[ {'Name': 'tag:Name', 'Values': [volume_name]}, ]) if len(res['Volumes']) > 1: raise ValueError('Several volumes with Name=%s found.' % volume_name) if not len(res['Volumes']): return None return Volume(ec2, res['Volumes'][0]) @property def name(self) -> str: volume_name = [tag['Value'] for tag in self._volume_info['Tags'] if tag['Key'] == 'Name'] if not volume_name: return '' return volume_name[0] @property def volume_id(self) -> str: return self._volume_info['VolumeId'] @property def size(self) -> int: return self._volume_info['Size'] @property def availability_zone(self) -> str: return self._volume_info['AvailabilityZone'] @property def state(self) -> str: return self._volume_info['State'] def is_available(self): return self.state == 'available' def create_snapshot(self) -> Snapshot: snapshot_info = self._ec2.create_snapshot( VolumeId=self._volume_info['VolumeId'], TagSpecifications=[{ 'ResourceType': 'snapshot', 'Tags': [{ 'Key': 'Name', 'Value': self.name, }], }], ) return Snapshot(self._ec2, snapshot_info) def delete(self): return self._ec2.delete_volume(VolumeId=self._volume_info['VolumeId']) ================================================ FILE: spotty/providers/aws/resources/vpc.py ================================================ class Vpc(object): def __init__(self, ec2, vpc_info): self._ec2 = ec2 self._vpc_info = vpc_info @staticmethod def get_default_vpc(ec2): """Returns a default VPC.""" res = ec2.describe_vpcs(Filters=[{'Name': 'isDefault', 'Values': ['true']}]) if not len(res['Vpcs']): return None return Vpc(ec2, res['Vpcs'][0]) @property def vpc_id(self) -> str: return self._vpc_info['VpcId'] ================================================ FILE: spotty/providers/gcp/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/config/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/config/disk_volume.py ================================================ from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.gcp.config.validation import validate_disk_volume_parameters class DiskVolume(AbstractInstanceVolume): TYPE_NAME = 'Disk' DP_CREATE_SNAPSHOT = 'CreateSnapshot' DP_UPDATE_SNAPSHOT = 'UpdateSnapshot' DP_RETAIN = 'Retain' DP_DELETE = 'Delete' def __init__(self, volume_config: dict, project_name: str, instance_name: str): super().__init__(volume_config) self._project_name = project_name self._instance_name = instance_name def _validate_volume_parameters(self, params: dict) -> dict: return validate_disk_volume_parameters(params) @property def title(self): return 'Disk' @property def size(self) -> int: return self._params['size'] @property def deletion_policy(self) -> str: return self._params['deletionPolicy'] @property def deletion_policy_title(self) -> str: return { DiskVolume.DP_CREATE_SNAPSHOT: 'Create Snapshot', DiskVolume.DP_UPDATE_SNAPSHOT: 'Update Snapshot', DiskVolume.DP_RETAIN: 'Retain Volume', DiskVolume.DP_DELETE: 'Delete Volume', }[self.deletion_policy] @property def disk_name(self) -> str: """Returns the disk name.""" disk_name = self._params['diskName'] if not disk_name: disk_name = '%s-%s-%s' % (self._project_name.lower(), self._instance_name.lower(), self.name.lower()) return disk_name @property def mount_dir(self) -> str: """A directory where the volume will be mounted on the host OS.""" if self._params['mountDir']: mount_dir = self._params['mountDir'] else: mount_dir = '/mnt/%s' % self.disk_name return mount_dir @property def host_path(self) -> str: """A path on the host OS that will be mounted to the container.""" return self.mount_dir ================================================ FILE: spotty/providers/gcp/config/image_uri.py ================================================ import re IMAGE_URI_REGEX = '^(?:(?:https://compute.googleapis.com/compute/v1/)?projects/([a-z](?:[-a-z0-9]*[a-z0-9])?)/)?' \ 'global/images/(family/)?([a-z](?:[-a-z0-9]*[a-z0-9])?)$' class ImageUri(object): def __init__(self, image_uri: str): res = re.match(IMAGE_URI_REGEX, image_uri) if not res: raise ValueError('Image URI has a wrong format') self._project_id, self._is_family, self._name = res.groups() @property def project_id(self) -> str: return self._project_id @property def is_family(self): return bool(self._is_family) @property def name(self): """Image name or image family name.""" return self._name ================================================ FILE: spotty/providers/gcp/config/instance_config.py ================================================ from typing import List from spotty.config.abstract_instance_config import AbstractInstanceConfig, VolumeMount from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.gcp.config.disk_volume import DiskVolume from spotty.providers.gcp.config.validation import validate_instance_parameters VOLUME_TYPE_DISK = 'Disk' DEFAULT_IMAGE_NAME = 'spotty' class InstanceConfig(AbstractInstanceConfig): def _validate_instance_params(self, params: dict) -> dict: return validate_instance_parameters(params) def _get_instance_volumes(self) -> List[AbstractInstanceVolume]: volumes = [] for volume_config in self._params['volumes']: volume_type = volume_config['type'] if volume_type == DiskVolume.TYPE_NAME: volumes.append(DiskVolume(volume_config, self.project_config.project_name, self.name)) else: raise ValueError('GCP volume type "%s" not supported.' % volume_type) return volumes @property def user(self): return 'spotty' @property def machine_name(self) -> str: """Name of the Compute Engine instance.""" return '%s-%s' % (self.project_config.project_name.lower(), self.name.lower()) @property def project_id(self) -> str: return self._params['projectId'] @property def zone(self) -> str: return self._params['zone'] @property def machine_type(self) -> str: return self._params['machineType'] @property def gpu(self) -> dict: return self._params['gpu'] @property def is_preemptible_instance(self) -> bool: return self._params['preemptibleInstance'] @property def boot_disk_size(self) -> int: return self._params['bootDiskSize'] @property def ports(self) -> List[int]: return list(set(self._params['ports'])) @property def image_name(self) -> str: return self._params['imageName'] @property def has_image_name(self) -> bool: return bool(self._params['imageName']) @property def image_uri(self) -> str: return self._params['imageUri'] ================================================ FILE: spotty/providers/gcp/config/validation.py ================================================ import os from schema import Schema, Optional, And, Regex, Or, Use from spotty.config.validation import validate_config, get_instance_parameters_schema, has_prefix from spotty.providers.gcp.config.image_uri import IMAGE_URI_REGEX def validate_instance_parameters(params: dict): from spotty.providers.gcp.config.disk_volume import DiskVolume instance_parameters = { 'zone': And(str, Regex(r'^[a-z0-9-]+$')), 'machineType': str, Optional('gpu', default=None): { 'type': str, Optional('count', default=1): int, }, Optional('preemptibleInstance', default=False): bool, Optional('imageName', default=None): And(str, len, Regex(r'^[\w-]+$')), Optional('imageUri', default=None): And(str, len, Regex(IMAGE_URI_REGEX)), Optional('bootDiskSize', default=0): And(Or(int, str), Use(str), Regex(r'^\d+$', error='Incorrect value for "bootDiskSize".'), Use(int), And(lambda x: x > 0, error='"rootVolumeSize" should be greater than 0 or should ' 'not be specified.'), ), Optional('ports', default=[]): [And(int, lambda x: 0 < x < 65536)], } instance_checks = [ And(lambda x: not (x['imageName'] and x['imageUri']), error='"imageName" and "imageUri" parameters cannot be used together.'), ] volume_checks = [ And(lambda x: not has_prefix([(volume['parameters']['mountDir'] + '/') for volume in x if volume['parameters'].get('mountDir')]), error='Mount directories cannot be prefixes for each other.'), ] schema = get_instance_parameters_schema(instance_parameters, DiskVolume.TYPE_NAME, instance_checks, volume_checks) return validate_config(schema, params) def validate_disk_volume_parameters(params: dict): from spotty.providers.gcp.config.disk_volume import DiskVolume schema = Schema({ Optional('diskName', default=''): And(str, Regex(r'^[\w-]{1,255}$')), Optional('mountDir', default=''): And( str, And(os.path.isabs, error='Use absolute paths in the "mountDir" parameters'), Use(lambda x: x.rstrip('/')) ), Optional('size', default=0): And(int, lambda x: x > 0), Optional('deletionPolicy', default=DiskVolume.DP_RETAIN): And( str, lambda x: x in [DiskVolume.DP_CREATE_SNAPSHOT, DiskVolume.DP_UPDATE_SNAPSHOT, DiskVolume.DP_RETAIN, DiskVolume.DP_DELETE], error='Incorrect value for "deletionPolicy".' ), }) return validate_config(schema, params) ================================================ FILE: spotty/providers/gcp/data_transfer.py ================================================ import logging import subprocess from spotty.deployment.abstract_cloud_instance.abstract_data_transfer import AbstractDataTransfer from spotty.providers.gcp.helpers.gsutil_rsync import check_gsutil_installed, get_rsync_command class DataTransfer(AbstractDataTransfer): @property def scheme_name(self) -> str: return 'gs' def upload_local_to_bucket(self, bucket_name: str, dry_run: bool = False): """Uploads files from local to the bucket.""" # check gsutil is installed check_gsutil_installed() # sync the project with S3, deleted files will be deleted from S3 local_cmd = get_rsync_command(self._local_project_dir, self._get_bucket_project_path(bucket_name), filters=self._sync_filters, delete=True, dry_run=dry_run) # execute the command locally logging.debug('Local sync command: ' + local_cmd) exit_code = subprocess.call(local_cmd, shell=True) if exit_code != 0: raise ValueError('Failed to upload the project files to the GS bucket.') def download_bucket_to_local(self, bucket_name: str, download_filters: list): """Downloads files from the bucket to local.""" raise NotImplementedError def get_download_bucket_to_instance_command(self, bucket_name: str, use_sudo: bool = False) -> str: """A remote command to download files from the bucket to the instance.""" remote_cmd = get_rsync_command(self._get_bucket_project_path(bucket_name), self._host_project_dir, filters=self._sync_filters) if use_sudo: remote_cmd = 'sudo ' + remote_cmd return remote_cmd def get_upload_instance_to_bucket_command(self, bucket_name: str, download_filters: list, use_sudo: bool = False, dry_run: bool = False) -> str: """A remote command to upload files from the instance to the bucket. It uses a temporary directory on the bucket that is unique for the instance. This directory keeps all downloaded from the instance files to sync only changed files with local. """ raise NotImplementedError ================================================ FILE: spotty/providers/gcp/dm_templates/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/dm_templates/instance/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/startup_script.sh.tpl ================================================ #!/usr/bin/env bash set -x mkdir -p "{{INSTANCE_STARTUP_SCRIPTS_DIR}}" # create startup scripts {{#STARTUP_SCRIPTS}} cat <<'EOF' > {{INSTANCE_STARTUP_SCRIPTS_DIR}}/{{filename}} {{{content}}} EOF chmod +x {{INSTANCE_STARTUP_SCRIPTS_DIR}}/{{filename}} {{/STARTUP_SCRIPTS}} # run startup scripts {{#STARTUP_SCRIPTS}} {{INSTANCE_STARTUP_SCRIPTS_DIR}}/{{filename}} && \ {{/STARTUP_SCRIPTS}} true # send signal that the Docker container is ready or failed EXIT_CODE=$? if [ $EXIT_CODE -eq 0 ]; then gcloud beta runtime-config configs variables set /success/1 1 --config-name {{MACHINE_NAME}}-docker-status --is-text else gcloud beta runtime-config configs variables set /failure/1 1 --config-name {{MACHINE_NAME}}-docker-status --is-text exit $EXIT_CODE fi ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/startup_scripts/01_prepare_instance.sh ================================================ #!/bin/bash -xe # install jq apt-get install -y jq # create tmux config echo "bind-key x kill-pane" > /home/{{SSH_USERNAME}}/.tmux.conf # create the "container bash" script mkdir -p "$(dirname '{{CONTAINER_BASH_SCRIPT_PATH}}')" cat > "{{CONTAINER_BASH_SCRIPT_PATH}}" <<'EOF2' {{{CONTAINER_BASH_SCRIPT}}} EOF2 chmod +x "{{CONTAINER_BASH_SCRIPT_PATH}}" # create an alias to connect to the docker container CONTAINER_BASH_ALIAS=container echo "alias $CONTAINER_BASH_ALIAS=\"{{CONTAINER_BASH_SCRIPT_PATH}}\"" >> /home/{{SSH_USERNAME}}/.bashrc echo "alias $CONTAINER_BASH_ALIAS=\"{{CONTAINER_BASH_SCRIPT_PATH}}\"" >> /root/.bashrc {{#IS_GPU_INSTANCE}} # install NVIDIA driver if ! command -v nvidia-smi &> /dev/null; then DRIVER_INSTALLER_PATH=/opt/deeplearning/install-driver.sh if [ -f "$DRIVER_INSTALLER_PATH" ]; then $DRIVER_INSTALLER_PATH fi fi {{/IS_GPU_INSTANCE}} # create common temporary directories mkdir -pm 777 '{{SPOTTY_TMP_DIR}}' mkdir -pm 777 '{{CONTAINERS_TMP_DIR}}' ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/startup_scripts/02_mount_volumes.sh ================================================ #!/bin/bash -xe DEVICE_NAMES=({{{DISK_DEVICE_NAMES}}}) MOUNT_DIRS=({{{DISK_MOUNT_DIRS}}}) for i in ${!DEVICE_NAMES[*]} do DEVICE=/dev/disk/by-id/google-${DEVICE_NAMES[$i]} MOUNT_DIR=${MOUNT_DIRS[$i]} blkid -o value -s TYPE $DEVICE || mkfs -t ext4 $DEVICE mkdir -p $MOUNT_DIR mount $DEVICE $MOUNT_DIR chmod 777 $MOUNT_DIR resize2fs $DEVICE done # create directories for temporary container volumes {{#TMP_VOLUME_DIRS}} mkdir -p {{PATH}} chmod 777 {{PATH}} {{/TMP_VOLUME_DIRS}} ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/startup_scripts/03_set_docker_root.sh ================================================ #!/bin/bash -xe # change docker data root directory if [ -n "{{DOCKER_DATA_ROOT_DIR}}" ]; then jq '. + { "data-root": "{{DOCKER_DATA_ROOT_DIR}}" }' /etc/docker/daemon.json > /tmp/docker_daemon.json \ && mv /tmp/docker_daemon.json /etc/docker/daemon.json service docker restart fi ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/startup_scripts/04_sync_project.sh ================================================ #!/bin/bash -xe # create a project directory if [ -n "{{HOST_PROJECT_DIR}}" ]; then mkdir -p "{{HOST_PROJECT_DIR}}" chmod 777 "{{HOST_PROJECT_DIR}}" fi {{{SYNC_PROJECT_CMD}}} ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/startup_scripts/05_run_instance_startup_commands.sh ================================================ #!/bin/bash -xe mkdir -p "{{INSTANCE_STARTUP_SCRIPTS_DIR}}" cat > "{{INSTANCE_STARTUP_SCRIPTS_DIR}}/instance_startup_commands.sh" <<'EOF2' {{{INSTANCE_STARTUP_COMMANDS}}} EOF2 /bin/bash -xe "{{INSTANCE_STARTUP_SCRIPTS_DIR}}/instance_startup_commands.sh" ================================================ FILE: spotty/providers/gcp/dm_templates/instance/data/template.yaml ================================================ resources: - name: {{MACHINE_NAME}} type: compute.v1.instance properties: zone: {{ZONE}} machineType: zones/{{ZONE}}/machineTypes/{{MACHINE_TYPE}} scheduling: {{#GPU_TYPE}} onHostMaintenance: TERMINATE automaticRestart: false {{/GPU_TYPE}} preemptible: {{PREEMPTIBLE}} serviceAccounts: - email: {{SERVICE_ACCOUNT_EMAIL}} scopes: ['https://www.googleapis.com/auth/cloud-platform'] tags: items: - {{MACHINE_NAME}} disks: - deviceName: boot type: PERSISTENT boot: true autoDelete: true initializeParams: sourceImage: {{SOURCE_IMAGE}} {{#BOOT_DISK_SIZE}} diskSizeGb: {{BOOT_DISK_SIZE}} {{/BOOT_DISK_SIZE}} {{#DISK_ATTACHMENTS}} - source: {{DISK_LINK}} deviceName: {{DEVICE_NAME}} type: PERSISTENT mode: READ_WRITE boot: false autoDelete: false {{/DISK_ATTACHMENTS}} networkInterfaces: - network: global/networks/default accessConfigs: - name: External NAT type: ONE_TO_ONE_NAT {{#GPU_TYPE}} guestAccelerators: - acceleratorType: zones/{{ZONE}}/acceleratorTypes/{{GPU_TYPE}} acceleratorCount: {{GPU_COUNT}} {{/GPU_TYPE}} metadata: items: - key: 'ssh-keys' value: | {{SSH_USERNAME}}:ssh-rsa {{{PUB_KEY_VALUE}}} {{SSH_USERNAME}} - key: 'startup-script' value: | {{> STARTUP_SCRIPT}} - name: {{MACHINE_NAME}}-firewall-rule type: compute.v1.firewall properties: network: global/networks/default sourceRanges: - 0.0.0.0/0 targetTags: - {{MACHINE_NAME}} allowed: - IPProtocol: tcp ports: [{{PORTS}}] - name: {{MACHINE_NAME}}-docker-status type: runtimeconfig.v1beta1.config properties: config: {{MACHINE_NAME}}-docker-status description: Docker status - name: {{MACHINE_NAME}}-docker-waiter type: runtimeconfig.v1beta1.waiter metadata: dependsOn: - {{MACHINE_NAME}} properties: parent: $(ref.{{MACHINE_NAME}}-docker-status.name) waiter: {{MACHINE_NAME}}-docker-waiter timeout: 1800s success: cardinality: path: /success number: 1 failure: cardinality: path: /failure number: 1 ================================================ FILE: spotty/providers/gcp/dm_templates/instance/instance_template.py ================================================ import os from typing import List import chevron from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.tmp_dir_volume import TmpDirVolume from spotty.config.validation import is_subdir from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.deployment.container.docker.docker_commands import DockerCommands from spotty.deployment.container.docker.scripts.container_bash_script import ContainerBashScript from spotty.deployment.container.docker.scripts.start_container_script import StartContainerScript from spotty.deployment.abstract_cloud_instance.file_structure import CONTAINER_BASH_SCRIPT_PATH, \ INSTANCE_STARTUP_SCRIPTS_DIR, CONTAINERS_TMP_DIR, INSTANCE_SPOTTY_TMP_DIR from spotty.providers.gcp.config.disk_volume import DiskVolume from spotty.providers.gcp.config.instance_config import InstanceConfig def prepare_instance_template(instance_config: InstanceConfig, docker_commands: DockerCommands, image_link: str, bucket_name: str, sync_project_cmd: str, public_key_value: str, service_account_email: str, output: AbstractOutputWriter): """Prepares deployment template to run an instance.""" # get disk attachments disk_attachments, disk_device_names, disk_mount_dirs = \ _get_disk_attachments(instance_config.volumes, instance_config.zone) # run sync command as a non-root user if instance_config.container_config.run_as_host_user: sync_project_cmd = 'sudo -u %s %s' % (instance_config.user, sync_project_cmd) startup_scripts_templates = [ { 'filename': '01_prepare_instance.sh', 'params': { 'CONTAINER_BASH_SCRIPT_PATH': CONTAINER_BASH_SCRIPT_PATH, 'CONTAINER_BASH_SCRIPT': ContainerBashScript(docker_commands).render(), 'IS_GPU_INSTANCE': bool(instance_config.gpu), 'SSH_USERNAME': instance_config.user, 'SPOTTY_TMP_DIR': INSTANCE_SPOTTY_TMP_DIR, 'CONTAINERS_TMP_DIR': CONTAINERS_TMP_DIR, }, }, { 'filename': '02_mount_volumes.sh', 'params': { 'DISK_DEVICE_NAMES': ('"%s"' % '" "'.join(disk_device_names)) if disk_device_names else '', 'DISK_MOUNT_DIRS': ('"%s"' % '" "'.join(disk_mount_dirs)) if disk_mount_dirs else '', 'TMP_VOLUME_DIRS': [{'PATH': volume.host_path} for volume in instance_config.volumes if isinstance(volume, TmpDirVolume)], }, }, { 'filename': '03_set_docker_root.sh', 'params': { 'DOCKER_DATA_ROOT_DIR': instance_config.docker_data_root, }, }, { 'filename': '04_sync_project.sh', 'params': { 'HOST_PROJECT_DIR': instance_config.host_project_dir, 'SYNC_PROJECT_CMD': sync_project_cmd, }, }, { 'filename': '05_run_instance_startup_commands.sh', 'params': { 'INSTANCE_STARTUP_SCRIPTS_DIR': INSTANCE_STARTUP_SCRIPTS_DIR, 'INSTANCE_STARTUP_COMMANDS': instance_config.commands, }, }, ] # render startup scripts startup_scripts_content = [] for template in startup_scripts_templates: with open(os.path.join(os.path.dirname(__file__), 'data', 'startup_scripts', template['filename'])) as f: content = f.read() startup_scripts_content.append({ 'filename': template['filename'], 'content': chevron.render(content, template['params']) }) startup_scripts_content.append({ 'filename': '06_start_container.sh', 'content': StartContainerScript(docker_commands).render(print_trace=True), }) # render the main startup script with open(os.path.join(os.path.dirname(__file__), 'data', 'startup_script.sh.tpl')) as f: startup_script = f.read() startup_script = chevron.render(startup_script, { 'MACHINE_NAME': instance_config.machine_name, 'INSTANCE_STARTUP_SCRIPTS_DIR': INSTANCE_STARTUP_SCRIPTS_DIR, 'STARTUP_SCRIPTS': startup_scripts_content, }) # render the template with open(os.path.join(os.path.dirname(__file__), 'data', 'template.yaml')) as f: template = f.read() template = chevron.render(template, { 'SERVICE_ACCOUNT_EMAIL': service_account_email, 'ZONE': instance_config.zone, 'MACHINE_TYPE': instance_config.machine_type, 'SOURCE_IMAGE': image_link, 'BOOT_DISK_SIZE': instance_config.boot_disk_size, 'MACHINE_NAME': instance_config.machine_name, 'PREEMPTIBLE': 'true' if instance_config.is_preemptible_instance else 'false', 'GPU_TYPE': instance_config.gpu['type'] if instance_config.gpu else '', 'GPU_COUNT': instance_config.gpu['count'] if instance_config.gpu else 0, 'DISK_ATTACHMENTS': disk_attachments, 'SSH_USERNAME': instance_config.user, 'PUB_KEY_VALUE': public_key_value, 'PORTS': ', '.join([str(port) for port in set([22] + instance_config.ports)]), }, partials_dict={ 'STARTUP_SCRIPT': startup_script, }) # print some information about the deployment output.write('- image URL: ' + '/'.join(image_link.split('/')[-5:])) output.write('- zone: ' + instance_config.zone) output.write('- preemptible VM' if instance_config.is_preemptible_instance else '- on-demand VM') output.write(('- GPUs: %d x %s' % (instance_config.gpu['count'], instance_config.gpu['type'])) if instance_config.gpu else '- no GPUs') # print name of the volume where Docker data will be stored if instance_config.docker_data_root: docker_data_volume_name = [volume.name for volume in instance_config.volumes if is_subdir(instance_config.docker_data_root, volume.host_path)][0] output.write('- Docker data will be stored on the "%s" volume' % docker_data_volume_name) return template def _get_disk_attachments(volumes: List[AbstractInstanceVolume], zone: str): disk_attachments = [] disk_device_names = [] disk_mount_dirs = [] for i, volume in enumerate(volumes): if isinstance(volume, DiskVolume): device_name = 'disk-%d' % (i + 1) disk_device_names.append(device_name) disk_mount_dirs.append(volume.mount_dir) disk_attachments.append({ 'DISK_LINK': 'zones/%s/disks/%s' % (zone, volume.disk_name), 'DEVICE_NAME': device_name, }) return disk_attachments, disk_device_names, disk_mount_dirs ================================================ FILE: spotty/providers/gcp/errors/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/errors/image_not_found.py ================================================ class ImageNotFoundError(Exception): def __init__(self, image_name): super().__init__('The image "%s" was not found.\n' 'Use the "spotty gcp create-image" command to create an image with NVIDIA Docker.' % image_name) ================================================ FILE: spotty/providers/gcp/helpers/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/helpers/ce_client.py ================================================ from collections import OrderedDict from time import sleep import googleapiclient.discovery class CEClient(object): """Compute Engine client.""" def __init__(self, project_id: str, zone: str): self._project_id = project_id self._zone = zone self._client = googleapiclient.discovery.build('compute', 'v1', cache_discovery=False) @property def zone(self): return self._zone def list_images(self, image_name: str = None, project_id: str = None): """Returns a list of images that satisfy the name. This method is used instead of the "get" because it doesn't raise an exception if an image doesn't exist. """ if not project_id: project_id = self._project_id filter_str = ('name=%s' % image_name) if image_name else None res = self._client.images().list(project=project_id, filter=filter_str).execute() if not res.get('items'): return [] return res['items'] def get_image_from_family(self, family_name: str, project_id: str = None): if not project_id: project_id = self._project_id res = self._client.images().getFromFamily(project=project_id, family=family_name).execute() return res def list_instances(self, machine_name=None): filter_str = ('name=%s' % machine_name) if machine_name else None res = self._client.instances().list(project=self._project_id, zone=self._zone, filter=filter_str).execute() if not res.get('items'): return [] return res['items'] def list_disks(self, disk_name=None): filter_str = ('name=%s' % disk_name) if disk_name else None res = self._client.disks().list(project=self._project_id, zone=self._zone, filter=filter_str).execute() if not res.get('items'): return [] return res['items'] def list_snapshots(self, snapshot_name=None): filter_str = ('name=%s' % snapshot_name) if snapshot_name else None res = self._client.snapshots().list(project=self._project_id, filter=filter_str).execute() if not res.get('items'): return [] return res['items'] def get_accelerator_types(self) -> OrderedDict: res = self._client.acceleratorTypes().list(project=self._project_id, zone=self._zone).execute() accelerator_types = OrderedDict([(item['name'], item['maximumCardsPerInstance']) for item in res.get('items', [])]) return accelerator_types def create_disk(self, name: str, size: int = None, snapshot_link: str = None) -> str: params = { 'name': name, 'type': 'zones/%s/diskTypes/pd-standard' % self._zone, 'physicalBlockSizeBytes': 4096, } if size: params['sizeGb'] = size if snapshot_link: params['sourceSnapshot'] = snapshot_link res = self._client.disks().insert(project=self._project_id, zone=self._zone, body=params).execute() return res['targetLink'] def get_machine_types(self, machine_type: str = None): """Returns a list of images that satisfy the name. This method is used instead of the "get" because it doesn't raise an exception if an image doesn't exist. """ filter_str = ('name=%s' % machine_type) if machine_type else None res = self._client.machineTypes().list(project=self._project_id, zone=self._zone, filter=filter_str).execute() if not res.get('items'): return [] return res['items'] def stop_instance(self, machine_name: str, wait: bool = True) -> str: """Stops the instance.""" operation = self._client.instances().stop(project=self._project_id, zone=self._zone, instance=machine_name).execute() if wait: operation = self._wait_operation(operation) return operation['targetLink'] def delete_instance(self, machine_name: str, wait: bool = True) -> str: """Deletes the instance.""" operation = self._client.instances().delete(project=self._project_id, zone=self._zone, instance=machine_name).execute() if wait: operation = self._wait_operation(operation) return operation['targetLink'] def _wait_operation(self, operation: dict): """Waits util the operation is finished/""" while operation['status'] != 'DONE': sleep(5) operation = self._client.zoneOperations().wait(project=self._project_id, zone=self._zone, operation=operation['name']).execute() return operation ================================================ FILE: spotty/providers/gcp/helpers/deployment.py ================================================ import logging from collections import OrderedDict from time import sleep from httplib2 import ServerNotFoundError from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.providers.gcp.resources.instance import Instance from spotty.providers.gcp.resources.stack import Stack from spotty.providers.gcp.helpers.ce_client import CEClient from spotty.providers.gcp.helpers.dm_client import DMClient from spotty.providers.gcp.helpers.dm_resource import DMResource def wait_resources(dm: DMClient, ce: CEClient, deployment_name: str, resource_messages: OrderedDict, instance_resource_name: str, machine_name: str, output: AbstractOutputWriter, delay: int = 5): # make sure that the instance resource is in the messages list assert any(resource_name == instance_resource_name for resource_name, _ in resource_messages.items()) created_resources = set() for resource_name, message in resource_messages.items(): output.write('- %s...' % message) is_created = False while not is_created: sleep(delay) # get the resource info try: # check that the deployment is not failed stack = Stack.get_by_name(dm, deployment_name) if stack.error: raise ValueError('Deployment "%s" failed.\n' 'Error: %s' % (deployment_name, stack.error['message'])) # check if the instance was preempted, terminated or deleted right after creation if instance_resource_name in created_resources: instance = Instance.get_by_name(ce, machine_name) if not instance or instance.is_stopped: raise ValueError('Error: the instance was unexpectedly terminated. Please, check out the ' 'instance logs to find out the reason.\n') # get resource resource = DMResource.get_by_name(dm, deployment_name, resource_name) except (ConnectionResetError, ServerNotFoundError): logging.warning('Connection problem') continue # resource doesn't exist yet if not resource: continue # resource failed if resource.is_failed: error_msg = ('Error: ' + resource.error_message) if resource.error_message \ else 'Please, see Deployment Manager logs for the details.' % deployment_name raise ValueError('Deployment "%s" failed.\n%s' % (deployment_name, error_msg)) # resource was successfully created is_created = resource.is_created created_resources.add(resource_name) def check_gpu_configuration(ce: CEClient, gpu_parameters: dict): if not gpu_parameters: return # check GPU type accelerator_types = ce.get_accelerator_types() gpu_type = gpu_parameters['type'] if gpu_type not in accelerator_types: if accelerator_types: error_msg = 'GPU type "%s" is not supported in the "%s" zone.\nAvailable GPU types are: %s.' \ % (gpu_type, ce.zone, ', '.join(accelerator_types.keys())) else: error_msg = 'The "%s" zone doesn\'t support any GPU accelerators.' % ce.zone raise ValueError(error_msg) # check the number of GPUs is not exceed the maximum max_cards_per_instance = accelerator_types[gpu_parameters['type']] if gpu_parameters['count'] > max_cards_per_instance: raise ValueError('Maximum allowed number of cards per instance for the "%s" type is %d.' % (gpu_parameters['type'], max_cards_per_instance)) ================================================ FILE: spotty/providers/gcp/helpers/dm_client.py ================================================ import json import googleapiclient.discovery from googleapiclient.errors import HttpError class DMClient(object): """Deployment Manager client.""" def __init__(self, project_id: str, zone: str): self._project_id = project_id self._zone = zone self._client = googleapiclient.discovery.build('deploymentmanager', 'v2', cache_discovery=False) def get(self, deployment_name: str): try: res = self._client.deployments().get(project=self._project_id, deployment=deployment_name).execute() except HttpError as e: data = json.loads(e.content.decode('utf-8')) if data['error']['code'] != 404: raise e res = None return res def deploy(self, deployment_name: str, template: str, dry_run: bool = False): res = self._client.deployments().insert(project=self._project_id, body={ 'name': deployment_name, 'target': { 'config': { 'content': template, }, }, }, preview=dry_run).execute() return res def stop(self, deployment_name: str, fingerprint: str): res = self._client.deployments().stop(project=self._project_id, deployment=deployment_name, body={ 'fingerprint': fingerprint, }).execute() return res def delete(self, deployment_name: str): """Deletes a deployment and all of the resources in the deployment.""" res = self._client.deployments().delete(project=self._project_id, deployment=deployment_name).execute() return res def get_resource(self, deployment_name: str, resource_name: str) -> dict: try: res = self._client.resources().get(project=self._project_id, deployment=deployment_name, resource=resource_name).execute() except HttpError as e: data = json.loads(e.content.decode('utf-8')) if data['error']['code'] != 404: raise e res = None return res ================================================ FILE: spotty/providers/gcp/helpers/dm_resource.py ================================================ from spotty.providers.gcp.helpers.dm_client import DMClient class DMResource(object): def __init__(self, dm: DMClient, data: dict): """ Args: dm (DMClient): Deployment Manager client data (dict): Stack info. Example #1: {'id': '1760655646875625396', 'insertTime': '2019-08-25T16:27:23.544-07:00', 'name': 'x11-test-i2-docker-waiter', 'type': 'runtimeconfig.v1beta1.waiter', 'update': {'finalProperties': 'failure:\n' ' cardinality:\n' ' number: 1\n' ' path: /failure\n' 'parent: ' 'projects/spotty-221422/configs/x11-test-i2-docker-status\n' 'success:\n' ' cardinality:\n' ' number: 1\n' ' path: /success\n' 'timeout: 1800s\n' 'waiter: x11-test-i2-docker-waiter\n', 'intent': 'CREATE_OR_ACQUIRE', 'manifest': 'https://www.googleapis.com/deploymentmanager/v2/projects/spotty-221422/global/deployments/spotty-instance-x11-test-i2/manifests/manifest-1566775635906', 'properties': 'failure:\n' ' cardinality:\n' ' number: 1\n' ' path: /failure\n' 'parent: $(ref.x11-test-i2-docker-status.name)\n' 'success:\n' ' cardinality:\n' ' number: 1\n' ' path: /success\n' 'timeout: 1800s\n' 'waiter: x11-test-i2-docker-waiter\n', 'state': 'IN_PROGRESS'}, 'updateTime': '2019-08-25T16:27:23.544-07:00'} Example #2: {'finalProperties': 'config: x11-test-i2-docker-status\n' 'description: Docker status\n', 'id': '314866945194106123', 'insertTime': '2019-08-25T17:12:20.140-07:00', 'manifest': 'https://www.googleapis.com/deploymentmanager/v2/projects/spotty-221422/global/deployments/spotty-instance-x11-test-i2/manifests/manifest-1566778333272', 'name': 'x11-test-i2-docker-status', 'properties': 'config: x11-test-i2-docker-status\n' 'description: Docker status\n', 'type': 'runtimeconfig.v1beta1.config', 'updateTime': '2019-08-25T17:12:30.254-07:00', 'url': 'https://runtimeconfig.googleapis.com/v1beta1/projects/spotty-221422/configs/x11-test-i2-docker-status'} """ self._dm = dm self._data = data @staticmethod def get_by_name(dm: DMClient, deployment_name: str, resource_name: str): """Returns an instance by its stack name.""" res = dm.get_resource(deployment_name, resource_name) if not res: return None return DMResource(dm, res) @property def is_created(self) -> bool: return 'finalProperties' in self._data @property def error_message(self) -> str: if 'error' not in self._data.get('update', {}): return None return self._data['update']['error']['errors'][0]['message'] @property def state(self) -> str: return self._data['update']['state'] if 'state' in self._data.get('update', {}) else None @property def is_in_progress(self) -> bool: return self.state == 'IN_PROGRESS' @property def is_failed(self) -> bool: # an error occurred or the resource is in an unexpected status return self.error_message or (self.state is not None and self.state not in ['PENDING', 'IN_PROGRESS', 'COMPLETED', 'IN_PREVIEW']) ================================================ FILE: spotty/providers/gcp/helpers/gcp_credentials.py ================================================ from google.auth import default class GcpCredentials(object): def __init__(self): credentials, effective_project_id = default() self._credentials = credentials self._project_id = effective_project_id @property def project_id(self): return self._project_id @property def service_account_email(self): return self._credentials.service_account_email ================================================ FILE: spotty/providers/gcp/helpers/gs_client.py ================================================ from typing import List from google.cloud import storage from google.cloud.storage import Bucket class GSClient(object): """Google Storage client.""" def __init__(self): self._client = storage.Client() def list_buckets(self) -> List[Bucket]: res = list(self._client.list_buckets()) return res def create_bucket(self, bucket_name: str, region: str) -> Bucket: bucket = Bucket(self._client, name=bucket_name) bucket.create(location=region) return bucket def create_dir(self, bucket_name: str, path: str): bucket = Bucket(self._client, name=bucket_name) blob = bucket.blob(path.rstrip('/') + '/') blob.upload_from_string('') ================================================ FILE: spotty/providers/gcp/helpers/gsutil_rsync.py ================================================ import fnmatch from shutil import which import os from typing import List from spotty.deployment.utils.cli import shlex_join def check_gsutil_installed(): """Checks that gsutil is installed.""" if which('gsutil') is None: raise ValueError('gsutil is not installed.') def get_rsync_command(from_path: str, to_path: str, filters: List[dict] = None, delete: bool = False, quiet: bool = False, dry_run: bool = False): args = ['gsutil', '-m'] if quiet: args.append('-q') args += ['rsync', '-r'] if filters: if (len(filters) > 1) or (len(filters[0]) > 1) or ('include' in filters[0]): raise ValueError('At the moment GCP provider supports only one list of exclude filters.') path_regs = [] for path in filters[0]['exclude']: path = path.replace('/', os.sep) # fix for Windows machines path_regs.append(fnmatch.translate(path)[4:-3]) filter_regex = '^(%s)$' % '|'.join(path_regs) args += ['-x', filter_regex] if delete: args.append('-d') if dry_run: args.append('-n') args += [from_path, to_path] return shlex_join(args) ================================================ FILE: spotty/providers/gcp/helpers/image.py ================================================ from spotty.providers.gcp.config.instance_config import DEFAULT_IMAGE_NAME from spotty.providers.gcp.helpers.ce_client import CEClient from spotty.providers.gcp.resources.image import Image def get_image(ce: CEClient, image_uri: str = None, image_name: str = None) -> Image: """Returns an image that should be used for deployment. Raises: ValueError: If an image not found. """ if image_uri: # get an image by its URL if the "imageUri" parameter is specified image = Image.get_by_uri(ce, image_uri) if not image: raise ValueError('Image "%s" not found.' % image_uri) elif image_name: # get an image by name if the "imageName" parameter is specified image = Image.get_by_name(ce, image_name) if not image: # if an image name was explicitly specified, but the image was not found, raise an error raise ValueError('Image with the name "%s" was not found.' % image_name) else: # if the "imageName" parameter is not specified, try to use the default image name image = Image.get_by_name(ce, DEFAULT_IMAGE_NAME) if not image: # get the latest "common-gce-gpu-image" image image_family_url = 'projects/ml-images/global/images/family/common-gce-gpu-image' image = Image.get_by_uri(ce, image_family_url) if not image: raise ValueError('The "common-gce-gpu-image" image was not found.') return image ================================================ FILE: spotty/providers/gcp/helpers/rtc_client.py ================================================ import googleapiclient.discovery class RtcClient(object): def __init__(self, project_id: str, zone: str): self._project_id = project_id self._zone = zone self._rtc = googleapiclient.discovery.build('runtimeconfig', 'v1beta1', cache_discovery=False) def get_value(self, config_name, template): config_name = 'projects/%s/configs/%s' % (self._project_id, config_name) fields = ['/failure'] res = self._rtc.projects().get(name=config_name, fields=fields).execute() return res def set_value(self, config_name: str, variable_name: str, value: str): config_name = 'projects/%s/configs/%s' % (self._project_id, config_name) res = self._rtc.projects().configs().variables().create(parent=config_name, body={ 'name': '%s/variables/%s' % (config_name, variable_name), 'text': str(value), }).execute() return res ================================================ FILE: spotty/providers/gcp/helpers/volumes.py ================================================ from typing import List from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.providers.gcp.config.disk_volume import DiskVolume from spotty.providers.gcp.helpers.ce_client import CEClient from spotty.providers.gcp.resources.disk import Disk from spotty.providers.gcp.resources.snapshot import Snapshot def create_disks(ce: CEClient, volumes: List[AbstractInstanceVolume], output: AbstractOutputWriter, dry_run: bool = False): disks_to_create = [] # do some checks and prepare disk parameters for i, volume in enumerate(volumes): if isinstance(volume, DiskVolume): # check if the disk already exists disk = Disk.get_by_name(ce, volume.disk_name) if disk: # check if the volume is available if not disk.is_available(): raise ValueError('Disk "%s" is not available (status: %s).' % (volume.disk_name, disk.status)) # check size of the volume if volume.size and (volume.size != disk.size): raise ValueError('Specified size for the "%s" volume (%dGB) doesn\'t match the size of the ' 'existing disk (%dGB).' % (volume.name, volume.size, disk.size)) output.write('- disk "%s" will be attached' % disk.name) else: # check if the snapshot exists snapshot = Snapshot.get_by_name(ce, volume.disk_name) if snapshot: # disk will be restored from the snapshot # check size of the volume if volume.size and (volume.size < snapshot.size): raise ValueError('Specified size for the "%s" volume (%dGB) is less than size of the ' 'snapshot (%dGB).' % (volume.name, volume.size, snapshot.size)) output.write('- disk "%s" will be restored from the snapshot' % volume.disk_name) disks_to_create.append((volume.disk_name, volume.size, snapshot.self_link)) else: # empty volume will be created, check that the size is specified if not volume.size: raise ValueError('Size for the new disk is required.') if volume.size < 10: raise ValueError('Size of a disk cannot be less than 10GB.') disks_to_create.append((volume.disk_name, volume.size, None)) # create disks for disk_name, disk_size, snapshot_link in disks_to_create: if not dry_run: ce.create_disk(disk_name, disk_size, snapshot_link) output.write('- disk "%s" was created' % disk_name) ================================================ FILE: spotty/providers/gcp/instance_deployment.py ================================================ from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.abstract_cloud_instance.abstract_instance_deployment import AbstractInstanceDeployment from spotty.deployment.container.docker.docker_commands import DockerCommands from spotty.deployment.utils.print_info import render_volumes_info_table from spotty.providers.gcp.config.instance_config import InstanceConfig from spotty.providers.gcp.data_transfer import DataTransfer from spotty.providers.gcp.dm_templates.instance.instance_template import prepare_instance_template from spotty.providers.gcp.helpers.image import get_image from spotty.providers.gcp.helpers.volumes import create_disks from spotty.providers.gcp.resource_managers.instance_stack_manager import InstanceStackManager from spotty.providers.gcp.helpers.ce_client import CEClient from spotty.providers.gcp.helpers.gcp_credentials import GcpCredentials from spotty.providers.gcp.resource_managers.ssh_key_manager import SshKeyManager from spotty.providers.gcp.resources.instance import Instance from spotty.providers.gcp.helpers.deployment import check_gpu_configuration class InstanceDeployment(AbstractInstanceDeployment): instance_config: InstanceConfig def __init__(self, instance_config: InstanceConfig): super().__init__(instance_config) self._project_name = instance_config.project_config.project_name self._credentials = GcpCredentials() self._ce = CEClient(self._credentials.project_id, instance_config.zone) @property def stack_manager(self) -> InstanceStackManager: return InstanceStackManager(self.instance_config.machine_name, self._credentials.project_id, self.instance_config.zone) @property def ssh_key_manager(self) -> SshKeyManager: return SshKeyManager(self._project_name, self.instance_config.zone) def get_instance(self) -> Instance: return Instance.get_by_name(self._ce, self.instance_config.machine_name) def deploy(self, container_commands: DockerCommands, bucket_name: str, data_transfer: DataTransfer, output: AbstractOutputWriter, dry_run: bool = False): # check machine type if not self._ce.get_machine_types(self.instance_config.machine_type): raise ValueError('"%s" machine type is not available in the "%s" zone.' % (self.instance_config.machine_type, self.instance_config.zone)) # check GPU configuration check_gpu_configuration(self._ce, self.instance_config.gpu) # remove the stack it it exists to make all the disks available stack_manager = self.stack_manager stack_manager.delete_stack(output=output) # sync the project with the S3 bucket if bucket_name is not None: output.write('Syncing the project with the bucket...') data_transfer.upload_local_to_bucket(bucket_name, dry_run=dry_run) # create volumes if self.instance_config.volumes: # create disks output.write('\nCreating disks...') with output.prefix(' '): create_disks(self._ce, self.instance_config.volumes, output=output, dry_run=dry_run) output.write('') # prepare Deployment Manager template output.write('Preparing the deployment template...') with output.prefix(' '): # get an image image_link = get_image(self._ce, self.instance_config.image_uri, self.instance_config.image_name).self_link # get or create an SSH key public_key_value = self.ssh_key_manager.get_public_key_value() # prepare the deployment template sync_project_cmd = data_transfer.get_download_bucket_to_instance_command(bucket_name=bucket_name) template = prepare_instance_template( instance_config=self.instance_config, docker_commands=container_commands, image_link=image_link, bucket_name=bucket_name, sync_project_cmd=sync_project_cmd, public_key_value=public_key_value, service_account_email=self._credentials.service_account_email, output=output, ) output.write('') # print information about the volumes output.write('Volumes:\n%s\n' % render_volumes_info_table(self.instance_config.volume_mounts, self.instance_config.volumes)) # create stack if not dry_run: stack_manager.create_stack(template, output=output) def delete(self, output: AbstractOutputWriter): self.stack_manager.delete_stack(output) # TODO: apply deletion policies ================================================ FILE: spotty/providers/gcp/instance_manager.py ================================================ from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.errors.instance_not_running import InstanceNotRunningError from spotty.deployment.abstract_cloud_instance.abstract_cloud_instance_manager import AbstractCloudInstanceManager from spotty.providers.gcp.config.instance_config import InstanceConfig from spotty.providers.gcp.data_transfer import DataTransfer from spotty.providers.gcp.instance_deployment import InstanceDeployment from spotty.providers.gcp.resource_managers.bucket_manager import BucketManager from spotty.utils import render_table class InstanceManager(AbstractCloudInstanceManager): instance_config: InstanceConfig bucket_manager: BucketManager data_transfer: DataTransfer instance_deployment: InstanceDeployment def _get_instance_config(self, instance_config: dict) -> InstanceConfig: """Validates the instance config and returns an InstanceConfig object.""" return InstanceConfig(instance_config, self.project_config) def _get_bucket_manager(self) -> BucketManager: region = '-'.join(self.instance_config.zone.split('-')[:-1]) return BucketManager(self.instance_config.project_config.project_name, region) def _get_data_transfer(self) -> DataTransfer: """Returns a data transfer object.""" return DataTransfer( local_project_dir=self.project_config.project_dir, host_project_dir=self.instance_config.host_project_dir, sync_filters=self.project_config.sync_filters, instance_name=self.instance_config.name, ) def _get_instance_deployment(self) -> InstanceDeployment: """Returns an instance deployment manager.""" return InstanceDeployment(self.instance_config) def download(self, download_filters: list, output: AbstractOutputWriter, dry_run=False): raise NotImplementedError('GCP provider doesn\'t have an implementation of the "download" command yet.') def get_status_text(self) -> str: instance = self.instance_deployment.get_instance() if not instance: raise InstanceNotRunningError(self.instance_config.name) table = [ ('Instance Status', instance.status), ('Machine Type', instance.machine_type), ('Zone', instance.zone), ] if instance.public_ip_address: table.append(('Public IP Address', instance.public_ip_address)) table.append(('Purchasing Option', 'Preemtible VM' if instance.is_preemtible else 'On-demand VM')) return render_table(table) @property def ssh_key_path(self): return self.instance_deployment.ssh_key_manager.private_key_file ================================================ FILE: spotty/providers/gcp/resource_managers/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/resource_managers/bucket_manager.py ================================================ import re from spotty.deployment.abstract_cloud_instance.abstract_bucket_manager import AbstractBucketManager from spotty.deployment.abstract_cloud_instance.errors.bucket_not_found import BucketNotFoundError from spotty.providers.gcp.helpers.gs_client import GSClient from spotty.providers.gcp.resources.bucket import Bucket from spotty.utils import random_string class BucketManager(AbstractBucketManager): def __init__(self, project_name: str, region: str): super().__init__(project_name) self._gs = GSClient() self._region = region self._bucket_prefix = 'spotty-%s' % project_name.lower() def get_bucket(self) -> Bucket: buckets = self._gs.list_buckets() regex = re.compile('-'.join([self._bucket_prefix, '[a-z0-9]{12}', self._region])) buckets = [bucket for bucket in buckets if regex.match(bucket.name) is not None] if len(buckets) > 1: raise ValueError('Found several project buckets in the same region: %s.' % ', '.join(bucket.name for bucket in buckets)) if not len(buckets): raise BucketNotFoundError bucket = Bucket(buckets[0]) return bucket def create_bucket(self) -> Bucket: bucket_name = '-'.join([self._bucket_prefix, random_string(12), self._region]) bucket = self._gs.create_bucket(bucket_name, self._region) return Bucket(bucket) ================================================ FILE: spotty/providers/gcp/resource_managers/instance_stack_manager.py ================================================ from collections import OrderedDict from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.providers.gcp.resources.stack import Stack from spotty.providers.gcp.helpers.ce_client import CEClient from spotty.providers.gcp.helpers.deployment import wait_resources from spotty.providers.gcp.helpers.dm_client import DMClient from spotty.providers.gcp.helpers.dm_resource import DMResource from spotty.providers.gcp.helpers.rtc_client import RtcClient class InstanceStackManager(object): def __init__(self, machine_name: str, project_id: str, zone: str): self._dm = DMClient(project_id, zone) self._ce = CEClient(project_id, zone) self._rtc = RtcClient(project_id, zone) self._machine_name = machine_name self._stack_name = 'spotty-instance-' + machine_name # resource names self._INSTANCE_RESOURCE_NAME = machine_name self._DOCKER_WAITER_RESOURCE_NAME = machine_name + '-docker-waiter' self._DOCKER_STATUS_CONFIG_RESOURCE_NAME = machine_name + '-docker-status' @property def name(self): return self._stack_name def create_stack(self, template: str, output: AbstractOutputWriter): """Deploys a Deployment Manager template.""" # create a stack res = Stack.create(self._dm, self._stack_name, template) # print(res) # exit() output.write('Waiting for the stack to be created...') resource_messages = OrderedDict([ (self._INSTANCE_RESOURCE_NAME, 'launching the instance'), (self._DOCKER_WAITER_RESOURCE_NAME, 'running the Docker container'), ]) # wait for the stack to be created with output.prefix(' '): wait_resources(self._dm, self._ce, self._stack_name, resource_messages, instance_resource_name=self._INSTANCE_RESOURCE_NAME, machine_name=self._machine_name, output=output) def delete_stack(self, output: AbstractOutputWriter): stack = Stack.get_by_name(self._dm, self._stack_name) if not stack: return output.write('Waiting for the stack to be deleted...') # delete the stack try: if stack.is_running: # stop an ongoing operation first to make sure the delete method # won't raise an error "Resource '...' has an ongoing conflicting operation" stack.stop() # if the docker-waiter resource is still waiting for a signal, send a failure signal # to be able to delete the stack resource = DMResource.get_by_name(self._dm, self._stack_name, self._DOCKER_WAITER_RESOURCE_NAME) if resource.is_in_progress: self._rtc.set_value(self._DOCKER_STATUS_CONFIG_RESOURCE_NAME, '/failure/1', '1') # wait until the stack will be created or will fail stack.wait_stack_done() stack.delete() stack.wait_stack_deleted() except Exception as e: raise ValueError('Stack "%s" was not deleted. Error: %s\n' 'See Deployment Manager logs for details.' % (self._stack_name, str(e))) ================================================ FILE: spotty/providers/gcp/resource_managers/ssh_key_manager.py ================================================ import os import subprocess from spotty.configuration import get_spotty_keys_dir from shutil import which from spotty.providers.instance_manager_factory import PROVIDER_GCP class SshKeyManager(object): def __init__(self, project_name: str, zone: str): self._key_name = 'spotty-key-%s-%s' % (project_name.lower(), zone) self._keys_dir = get_spotty_keys_dir(PROVIDER_GCP) @property def private_key_file(self): return os.path.join(self._keys_dir, self._key_name) @property def public_key_file(self): return os.path.join(self._keys_dir, self._key_name + '.pub') def get_public_key_value(self): # generate a key if it doesn't exist if not os.path.isfile(self.private_key_file) or not os.path.isfile(self.public_key_file): self._generate_ssh_key() # read the public key value with open(self.public_key_file, 'r') as f: public_key_value = f.read().split()[1] return public_key_value def _generate_ssh_key(self): # delete the private key file if it already exists if os.path.isfile(self.private_key_file): os.unlink(self.private_key_file) # create a provider subdirectory if not os.path.isdir(self._keys_dir): os.makedirs(self._keys_dir, mode=0o755, exist_ok=True) # check that the "ssh-keygen" tool is installed ssh_keygen_cmd = 'ssh-keygen' if which(ssh_keygen_cmd) is None: raise ValueError('"ssh-keygen" command not found.') generate_key_cmd = [ssh_keygen_cmd, '-t', 'rsa', '-N', '', '-f', self.private_key_file, '-q'] # generate a key pair res = subprocess.run(generate_key_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if res.returncode: raise subprocess.CalledProcessError(res.returncode, generate_key_cmd) ================================================ FILE: spotty/providers/gcp/resources/__init__.py ================================================ ================================================ FILE: spotty/providers/gcp/resources/bucket.py ================================================ from spotty.deployment.abstract_cloud_instance.resources.abstract_bucket import AbstractBucket from google.cloud.storage import Bucket as GSBucket class Bucket(AbstractBucket): def __init__(self, bucket: GSBucket): self._bucket = bucket @property def name(self) -> str: return self._bucket.name ================================================ FILE: spotty/providers/gcp/resources/disk.py ================================================ from spotty.providers.gcp.helpers.ce_client import CEClient class Disk(object): def __init__(self, ce: CEClient, data: dict): """ Args: data (dict): Example: {'creationTimestamp': '2019-04-20T16:21:49.579-07:00', 'guestOsFeatures': [{'type': 'VIRTIO_SCSI_MULTIQUEUE'}], 'id': '1546539587132069731', 'kind': 'compute#disk', 'labelFingerprint': '42WmSpB8rSM=', 'lastAttachTimestamp': '2019-04-20T16:21:49.580-07:00', 'licenseCodes': ['1000205'], 'licenses': ['https://www.googleapis.com/compute/v1/projects/debian-cloud/global/licenses/debian-9-stretch'], 'name': 'instance-1', 'physicalBlockSizeBytes': '4096', 'selfLink': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/disks/instance-1', 'sizeGb': '10', 'sourceImage': 'https://www.googleapis.com/compute/v1/projects/debian-cloud/global/images/debian-9-stretch-v20190326', 'sourceImageId': '6831652533131678657', 'status': 'READY', 'type': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/diskTypes/pd-standard', 'users': ['https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/instances/instance-1'], 'zone': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b'} """ self._ce = ce self._data = data @staticmethod def get_by_name(ce: CEClient, disk_name: str): """Returns a disk by its name.""" res = ce.list_disks(disk_name) if not res: return None return Disk(ce, res[0]) @property def name(self) -> str: return self._data['name'] @property def status(self) -> str: return self._data['status'] @property def size(self) -> int: return int(self._data['sizeGb']) @property def users(self) -> list: return self._data.get('users', []) def is_available(self): return (self.status == 'READY') and not self.users ================================================ FILE: spotty/providers/gcp/resources/image.py ================================================ from spotty.providers.gcp.config.image_uri import ImageUri from spotty.providers.gcp.helpers.ce_client import CEClient class Image(object): def __init__(self, data: dict): """ Args: data (dict): Example: {'archiveSizeBytes': '3652446976', 'creationTimestamp': '2018-11-03T19:00:48.577-07:00', 'description': '', 'diskSizeGb': '10', 'guestOsFeatures': [{'type': 'VIRTIO_SCSI_MULTIQUEUE'}], 'id': '7541350343606791231', 'kind': 'compute#image', 'labelFingerprint': '42WmSpB8rSM=', 'licenseCodes': ['1000201'], 'licenses': ['https://www.googleapis.com/compute/v1/projects/ubuntu-os-cloud/global/licenses/ubuntu-1604-xenial'], 'name': 'spotty-ami', 'selfLink': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/global/images/spotty-ami', 'sourceDisk': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-central1-a/disks/spotty-ami', 'sourceDiskId': '3401548142858207031', 'sourceType': 'RAW', 'status': 'READY'} """ self._data = data @staticmethod def get_by_name(ce: CEClient, image_name: str): """Returns an image by its name.""" res = ce.list_images(image_name) if not res: return None return Image(res[0]) @staticmethod def get_by_uri(ce: CEClient, image_uri: str): image_uri = ImageUri(image_uri) if image_uri.is_family: image_data = ce.get_image_from_family(family_name=image_uri.name, project_id=image_uri.project_id) else: res = ce.list_images(image_name=image_uri.name, project_id=image_uri.project_id) image_data = res[0] if res else None if not image_data: return None return Image(image_data) @property def image_id(self) -> str: return self._data['id'] @property def name(self) -> str: return self._data['name'] @property def size(self) -> int: return self._data['diskSizeGb'] @property def self_link(self) -> str: return self._data['selfLink'] @property def source_disk(self): return self._data['sourceDisk'] ================================================ FILE: spotty/providers/gcp/resources/instance.py ================================================ from datetime import datetime from spotty.deployment.abstract_cloud_instance.resources.abstract_instance import AbstractInstance from spotty.providers.gcp.helpers.ce_client import CEClient class Instance(AbstractInstance): def __init__(self, ce: CEClient, data: dict): """ Args: data (dict): Example: {'canIpForward': False, 'cpuPlatform': 'Intel Haswell', 'creationTimestamp': '2019-04-20T16:21:49.536-07:00', 'deletionProtection': False, 'description': '', 'disks': [{'autoDelete': True, 'boot': True, 'deviceName': 'instance-1', 'guestOsFeatures': [{'type': 'VIRTIO_SCSI_MULTIQUEUE'}], 'index': 0, 'interface': 'SCSI', 'kind': 'compute#attachedDisk', 'licenses': ['https://www.googleapis.com/compute/v1/projects/debian-cloud/global/licenses/debian-9-stretch'], 'mode': 'READ_WRITE', 'source': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/disks/instance-1', 'type': 'PERSISTENT'}], 'id': '928537266896639843', 'kind': 'compute#instance', 'labelFingerprint': '42WmSpB8rSM=', 'machineType': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/machineTypes/n1-standard-1', 'metadata': {'fingerprint': 'IoRxXrApBlw=', 'kind': 'compute#metadata'}, 'name': 'instance-1', 'networkInterfaces': [{'accessConfigs': [{'kind': 'compute#accessConfig', 'name': 'External NAT', 'natIP': '34.73.140.188', 'networkTier': 'PREMIUM', 'type': 'ONE_TO_ONE_NAT'}], 'fingerprint': 'COAWpxIgZx0=', 'kind': 'compute#networkInterface', 'name': 'nic0', 'network': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/global/networks/default', 'networkIP': '10.142.0.2', 'subnetwork': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/regions/us-east1/subnetworks/default'}], 'scheduling': {'automaticRestart': False, 'onHostMaintenance': 'TERMINATE', 'preemptible': True}, 'selfLink': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/instances/instance-1', 'serviceAccounts': [{'email': '293101887402-compute@developer.gserviceaccount.com', 'scopes': ['https://www.googleapis.com/auth/devstorage.read_only', 'https://www.googleapis.com/auth/logging.write', 'https://www.googleapis.com/auth/monitoring.write', 'https://www.googleapis.com/auth/servicecontrol', 'https://www.googleapis.com/auth/service.management.readonly', 'https://www.googleapis.com/auth/trace.append']}], 'startRestricted': False, 'status': 'RUNNING', 'tags': {'fingerprint': '42WmSpB8rSM='}, 'zone': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b'} """ self._ce = ce self._data = data @staticmethod def get_by_name(ce: CEClient, machine_name: str): """Returns an instance by its stack name.""" res = ce.list_instances(machine_name) if not res: return None return Instance(ce, res[0]) @property def name(self) -> str: return self._data['name'] @property def is_running(self) -> bool: return self.status == 'RUNNING' @property def is_stopped(self) -> bool: # see Instance Life Cycle: https://cloud.google.com/compute/docs/instances/instance-life-cycle return self.status == 'TERMINATED' @property def public_ip_address(self) -> str: return self._data['networkInterfaces'][0]['accessConfigs'][0].get('natIP') @property def status(self) -> str: return self._data['status'] @property def machine_type(self) -> str: return self._data['machineType'].split('/')[-1] @property def zone(self) -> str: return self._data['zone'].split('/')[-1] @property def creation_timestamp(self) -> datetime: # fix the format: '2019-04-20T16:21:49.536-07:00' -> '2019-04-20T16:21:49-0700' time_str = self._data['creationTimestamp'][:-10] + \ self._data['creationTimestamp'][-6:-3] + \ self._data['creationTimestamp'][-2:] return datetime.strptime(time_str, '%Y-%m-%dT%H:%M:%S%z') @property def is_preemtible(self) -> bool: return self._data['scheduling']['preemptible'] def terminate(self, wait: bool = True): self._ce.delete_instance(self.name, wait) def stop(self, wait: bool = True): self._ce.stop_instance(self.name, wait) ================================================ FILE: spotty/providers/gcp/resources/snapshot.py ================================================ from spotty.providers.gcp.helpers.ce_client import CEClient class Snapshot(object): def __init__(self, data: dict): """ Args: data (dict): Example: {'creationTimestamp': '2019-04-20T12:40:13.291-07:00', 'diskSizeGb': '10', 'id': '714587297862306675', 'kind': 'compute#snapshot', 'labelFingerprint': '42WmSpB8rSM=', 'name': 'snapshot-test', 'selfLink': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/global/snapshots/snapshot-test', 'sourceDisk': 'https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/us-east1-b/disks/disk-test', 'sourceDiskId': '599723469887162882', 'status': 'READY', 'storageBytes': '0', 'storageBytesStatus': 'UP_TO_DATE', 'storageLocations': ['us-central1']} """ self._data = data @staticmethod def get_by_name(ce: CEClient, snapshot_name: str): """Returns a snapshot by its name.""" res = ce.list_snapshots(snapshot_name) if not res: return None return Snapshot(res[0]) @property def name(self) -> str: return self._data['name'] @property def size(self) -> int: return self._data['diskSizeGb'] @property def self_link(self) -> str: return self._data['selfLink'] ================================================ FILE: spotty/providers/gcp/resources/stack.py ================================================ import logging from time import sleep from httplib2 import ServerNotFoundError from spotty.providers.gcp.helpers.dm_client import DMClient class Stack(object): def __init__(self, dm: DMClient, data: dict): """ Args: dm (DMClient): Deployment Manager client data (dict): Stack info. Example: {'fingerprint': 'vvtbwT7F953T0YC9tQ9CUg==', 'id': '3128259442476717093', 'insertTime': '2019-04-20T16:27:06.739-07:00', 'name': 'spotty-instance-x11-test-i2', 'operation': {'endTime': '2019-04-20T16:27:23.141-07:00', 'error': {'errors': [{'code': 'RESOURCE_ERROR', 'location': '/deployments/spotty-instance-x11-test-i2/resources/spotty-instance-x11-test-i2-disk-1', 'message': '{"ResourceType":"compute.v1.disk","ResourceErrorCode":"400","ResourceErrorMessage":{"code":400,"errors":[{"domain":"global","location":"zone","locationType":"parameter","message":"Invalid ' "value 'zones/us-east1-b'. " 'Values must match the ' 'following regular expression: ' '\'[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?\'","reason":"invalidParameter"}],"message":"Invalid ' "value 'zones/us-east1-b'. " 'Values must match the ' 'following regular expression: ' '\'[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?\'","statusMessage":"Bad ' 'Request","requestPath":"https://www.googleapis.com/compute/v1/projects/spotty-221422/zones/zones%2Fus-east1-b/disks","httpMethod":"POST"}}'}]}, 'httpErrorMessage': 'BAD REQUEST', 'httpErrorStatusCode': 400, 'id': '1965130840716743717', 'insertTime': '2019-04-20T16:27:06.900-07:00', 'kind': 'deploymentmanager#operation', 'name': 'operation-1555802826515-586fe92d0a605-6166491c-7051043e', 'operationType': 'insert', 'progress': 100, 'selfLink': 'https://www.googleapis.com/deploymentmanager/v2/projects/spotty-221422/global/operations/operation-1555802826515-586fe92d0a605-6166491c-7051043e', 'startTime': '2019-04-20T16:27:06.908-07:00', 'status': 'DONE', 'targetId': '3128259442476717093', 'targetLink': 'https://www.googleapis.com/deploymentmanager/v2/projects/spotty-221422/global/deployments/spotty-instance-x11-test-i2', 'user': 'spotty@spotty-221422.iam.gserviceaccount.com'}, 'selfLink': 'https://www.googleapis.com/deploymentmanager/v2/projects/spotty-221422/global/deployments/spotty-instance-x11-test-i2', 'update': {'manifest': 'https://www.googleapis.com/deploymentmanager/v2/projects/spotty-221422/global/deployments/spotty-instance-x11-test-i2/manifests/manifest-1555802826772'}, 'updateTime': '2019-04-20T16:27:23.107-07:00'} """ self._dm = dm self._data = data @staticmethod def get_by_name(dm: DMClient, deployment_name: str): """Returns an instance by its stack name.""" res = dm.get(deployment_name) if not res: return None return Stack(dm, res) @staticmethod def create(dm: DMClient, deployment_name: str, template: str): return dm.deploy(deployment_name, template) @property def name(self) -> str: return self._data['name'] @property def status(self) -> str: return self._data.get('operation', {}).get('status') @property def is_running(self): return self.status == 'RUNNING' @property def is_done(self): """A deployment has the done status when it's successfully created or failed.""" return self.status == 'DONE' @property def error(self) -> str: """Returns an error in the format: {'code': '...', 'message': '...'}.""" return self._data.get('operation', {}).get('error', {}).get('errors', [None])[0] @property def fingerprint(self) -> str: return self._data['fingerprint'] def stop(self): self._dm.stop(self.name, self.fingerprint) def delete(self): self._dm.delete(self.name) def wait_stack_deleted(self, delay=15): stack = True while stack: try: stack = self.get_by_name(self._dm, self.name) except (ConnectionResetError, ServerNotFoundError): logging.warning('Connection problem') continue sleep(delay) def wait_stack_done(self, delay=5): is_done = False while not is_done: try: stack = self.get_by_name(self._dm, self.name) is_done = stack.is_done except (ConnectionResetError, ServerNotFoundError): logging.warning('Connection problem') continue sleep(delay) ================================================ FILE: spotty/providers/instance_manager_factory.py ================================================ from importlib import import_module from spotty.config.project_config import ProjectConfig from spotty.deployment.abstract_instance_manager import AbstractInstanceManager PROVIDER_AWS = 'aws' PROVIDER_GCP = 'gcp' PROVIDER_LOCAL = 'local' PROVIDER_REMOTE = 'remote' class InstanceManagerFactory(object): SUPPORTED_PROVIDERS = [ PROVIDER_AWS, PROVIDER_GCP, PROVIDER_LOCAL, PROVIDER_REMOTE, ] @classmethod def get_instance(cls, project_config: ProjectConfig, instance_config: dict) -> AbstractInstanceManager: provider_name = instance_config['provider'] if provider_name not in cls.SUPPORTED_PROVIDERS: raise ValueError('Provider "%s" is not supported' % provider_name) # get Instance Manger class for the provider InstanceManagerClass = getattr(import_module('spotty.providers.%s.instance_manager' % provider_name), 'InstanceManager') return InstanceManagerClass(project_config, instance_config) ================================================ FILE: spotty/providers/local/__init__.py ================================================ ================================================ FILE: spotty/providers/local/config/__init__.py ================================================ ================================================ FILE: spotty/providers/local/config/instance_config.py ================================================ import os from typing import List from spotty.config.abstract_instance_config import AbstractInstanceConfig, VolumeMount from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.config.container_config import PROJECT_VOLUME_MOUNT_NAME from spotty.config.project_config import ProjectConfig from spotty.config.host_path_volume import HostPathVolume from spotty.providers.local.config.validation import validate_instance_parameters class InstanceConfig(AbstractInstanceConfig): def __init__(self, instance_config: dict, project_config: ProjectConfig): super().__init__(instance_config, project_config) def _validate_instance_params(self, params: dict): # validate the config and fill missing parameters with the default values return validate_instance_parameters(params) def _get_instance_volumes(self) -> List[AbstractInstanceVolume]: volumes = [] for volume_config in self._params['volumes']: volume_type = volume_config['type'] if volume_type == HostPathVolume.TYPE_NAME: volumes.append(HostPathVolume(volume_config, self.project_config.project_dir)) else: raise ValueError('Volume type "%s" is not supported.' % volume_type) return volumes def _get_volume_mounts(self, volumes: List[AbstractInstanceVolume]) -> List[VolumeMount]: volume_mounts = super()._get_volume_mounts(volumes) # ignore a volume that matches the container project directory volume_mounts = [volume_mount for volume_mount in volume_mounts if os.path.relpath(self.container_config.project_dir, volume_mount.mount_path) != '.'] # mount the local project directory to the container volume_mounts.append(VolumeMount( name=PROJECT_VOLUME_MOUNT_NAME, host_path=self.project_config.project_dir, mount_path=self.container_config.project_dir, mode='rw', hidden=True, )) return volume_mounts @property def user(self) -> str: return '' ================================================ FILE: spotty/providers/local/config/validation.py ================================================ from spotty.config.validation import validate_config, get_instance_parameters_schema def validate_instance_parameters(params: dict): from spotty.config.host_path_volume import HostPathVolume schema = get_instance_parameters_schema({}, HostPathVolume.TYPE_NAME) return validate_config(schema, params) ================================================ FILE: spotty/providers/local/instance_manager.py ================================================ from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.errors.nothing_to_do import NothingToDoError from spotty.deployment.abstract_docker_instance_manager import AbstractDockerInstanceManager from spotty.providers.local.config.instance_config import InstanceConfig class InstanceManager(AbstractDockerInstanceManager): instance_config: InstanceConfig def _get_instance_config(self, instance_config: dict) -> InstanceConfig: """Validates the instance config and returns an InstanceConfig object.""" return InstanceConfig(instance_config, self.project_config) def is_running(self): return True def clean(self, output: AbstractOutputWriter): pass def sync(self, output: AbstractOutputWriter, dry_run=False): raise NothingToDoError('Nothing to do. The project directory is mounted to the container.') def download(self, download_filters: list, output: AbstractOutputWriter, dry_run=False): raise NothingToDoError('Nothing to do. The project directory is mounted to the container.') ================================================ FILE: spotty/providers/remote/__init__.py ================================================ ================================================ FILE: spotty/providers/remote/config/__init__.py ================================================ ================================================ FILE: spotty/providers/remote/config/instance_config.py ================================================ import os from typing import List from spotty.config.abstract_instance_config import AbstractInstanceConfig from spotty.config.abstract_instance_volume import AbstractInstanceVolume from spotty.config.project_config import ProjectConfig from spotty.config.host_path_volume import HostPathVolume from spotty.providers.remote.config.validation import validate_instance_parameters class InstanceConfig(AbstractInstanceConfig): def __init__(self, instance_config: dict, project_config: ProjectConfig): super().__init__(instance_config, project_config) def _validate_instance_params(self, params: dict): # validate the config and fill missing parameters with the default values return validate_instance_parameters(params) @property def user(self) -> str: return self._params['user'] @property def host(self) -> str: return self._params['host'] @property def port(self) -> int: return self._params['port'] @property def key_path(self) -> str: key_path = os.path.expanduser(self._params['keyPath']) if not os.path.isabs(key_path): key_path = os.path.join(self.project_config.project_dir, key_path) key_path = os.path.normpath(key_path) return key_path def _get_instance_volumes(self) -> List[AbstractInstanceVolume]: volumes = [] for volume_config in self._params['volumes']: volume_type = volume_config['type'] if volume_type == HostPathVolume.TYPE_NAME: volumes.append(HostPathVolume(volume_config)) else: raise ValueError('Volume type "%s" is not supported.' % volume_type) return volumes ================================================ FILE: spotty/providers/remote/config/validation.py ================================================ from schema import And, Optional from spotty.config.validation import validate_config, get_instance_parameters_schema def validate_instance_parameters(params: dict): from spotty.config.host_path_volume import HostPathVolume instance_parameters = { 'user': str, 'host': str, Optional('port', default=22): And(int, lambda x: 0 < x < 65536), 'keyPath': str, } schema = get_instance_parameters_schema(instance_parameters, HostPathVolume.TYPE_NAME) return validate_config(schema, params) ================================================ FILE: spotty/providers/remote/helpers/rsync.py ================================================ from shutil import which from typing import List from spotty.deployment.utils.cli import shlex_join def check_rsync_installed(): """Checks that rsync is installed.""" if which('rsync') is None: raise ValueError('rsync is not installed.') def get_upload_command(local_dir: str, remote_dir: str, ssh_user: str, ssh_host: str, ssh_port: int, ssh_key_path: str, filters: List[dict] = None, use_sudo: bool = False, dry_run: bool = False): # make sure there is only one list of exclude filters if (len(filters) > 1) or (len(filters[0]) > 1) or ('include' in filters[0]): raise ValueError('At the moment "remote" provider supports only one list of exclude filters.') remote_path = '%s@%s:%s' % (ssh_user, ssh_host, remote_dir) return _get_rsync_command(local_dir, remote_path, ssh_port, ssh_key_path, filters, mkdir=remote_dir, use_sudo=use_sudo, dry_run=dry_run) def get_download_command(remote_dir: str, local_dir: str, ssh_user: str, ssh_host: str, ssh_port: int, ssh_key_path: str, filters: List[dict] = None, use_sudo: bool = False, dry_run: bool = False): filters = filters[::-1] remote_path = '%s@%s:%s' % (ssh_user, ssh_host, remote_dir) return _get_rsync_command(remote_path, local_dir, ssh_port, ssh_key_path, filters, use_sudo=use_sudo, dry_run=dry_run) def _get_rsync_command(src_path: str, dst_path: str, ssh_port: int, ssh_key_path: str, filters: List[dict] = None, mkdir: str = None, use_sudo: bool = False, dry_run: bool = False): sudo_str = 'sudo ' if use_sudo else '' remote_rsync_cmd = sudo_str + 'rsync' if mkdir: remote_rsync_cmd = '%smkdir -p \'%s\' && %s' % (sudo_str, mkdir, remote_rsync_cmd) rsync_cmd = 'rsync -av ' \ '--no-owner ' \ '--no-group ' \ '--prune-empty-dirs ' \ '-e "ssh -i \'%s\' -p %d -o StrictHostKeyChecking=no -o ConnectTimeout=10" ' \ '--rsync-path="%s"' \ % (ssh_key_path, ssh_port, remote_rsync_cmd) if dry_run: rsync_cmd += ' --dry-run' if filters: args = [] for sync_filter in filters: if 'exclude' in sync_filter: for path in sync_filter['exclude']: args += ['--exclude', _fix_filter_path(path)] if 'include' in sync_filter: for path in sync_filter['include']: args += ['--include', _fix_filter_path(path)] rsync_cmd += ' ' + shlex_join(args) rsync_cmd += ' %s/ %s' % (src_path.rstrip('/'), dst_path) return rsync_cmd def _fix_filter_path(path: str) -> str: return '/' + path.replace('*', '**').lstrip('/') ================================================ FILE: spotty/providers/remote/instance_manager.py ================================================ import logging import subprocess from spotty.commands.writers.abstract_output_writrer import AbstractOutputWriter from spotty.deployment.abstract_ssh_instance_manager import AbstractSshInstanceManager from spotty.providers.remote.config.instance_config import InstanceConfig from spotty.providers.remote.helpers.rsync import get_upload_command, check_rsync_installed, get_download_command class InstanceManager(AbstractSshInstanceManager): instance_config: InstanceConfig def _get_instance_config(self, instance_config: dict) -> InstanceConfig: """Validates the instance config and returns an InstanceConfig object.""" return InstanceConfig(instance_config, self.project_config) def is_running(self): """Assuming the remote instance is running.""" return True def clean(self, output: AbstractOutputWriter): pass def sync(self, output: AbstractOutputWriter, dry_run=False): output.write('Syncing files with the instance...') # check rsync is installed check_rsync_installed() # sync the project with the instance rsync_cmd = get_upload_command( local_dir=self.project_config.project_dir, remote_dir=self.instance_config.host_project_dir, ssh_user=self.ssh_user, ssh_host=self.ssh_host, ssh_key_path=self.ssh_key_path, ssh_port=self.ssh_port, filters=self.project_config.sync_filters, use_sudo=(not self.instance_config.container_config.run_as_host_user), dry_run=dry_run, ) # execute the command locally logging.debug('rsync command: ' + rsync_cmd) exit_code = subprocess.call(rsync_cmd, shell=True) if exit_code != 0: raise ValueError('Failed to upload files to the instance.') def download(self, download_filters: list, output: AbstractOutputWriter, dry_run=False): output.write('Downloading files from the instance...') # check rsync is installed check_rsync_installed() # sync the project with the instance rsync_cmd = get_download_command( local_dir=self.project_config.project_dir, remote_dir=self.instance_config.host_project_dir, ssh_user=self.ssh_user, ssh_host=self.ssh_host, ssh_key_path=self.ssh_key_path, ssh_port=self.ssh_port, filters=download_filters, use_sudo=(not self.instance_config.container_config.run_as_host_user), dry_run=dry_run, ) # execute the command locally logging.debug('rsync command: ' + rsync_cmd) exit_code = subprocess.call(rsync_cmd, shell=True) if exit_code != 0: raise ValueError('Failed to download files from the instance.') @property def ssh_host(self) -> str: return self.instance_config.host @property def ssh_key_path(self) -> str: return self.instance_config.key_path @property def ssh_port(self) -> int: return self.instance_config.port ================================================ FILE: spotty/utils.py ================================================ import os import random import string import errno def package_dir(path: str = ''): """Returns an absolute path to the "spotty" package directory. Args: path: A relative path to add to the package path. """ res_path = os.path.dirname(os.path.abspath(__file__)) if path: res_path = os.path.join(res_path, path) return res_path def check_path(path): """Creates a directory if it doesn't exist.""" if not os.path.exists(path): try: os.makedirs(path) except OSError as exception: if exception.errno != errno.EEXIST: raise def random_string(length: int, chars: str = string.ascii_lowercase + string.digits): return ''.join(random.choice(chars) for _ in range(length)) def filter_list(list_of_dicts, key_name, value): return [row for row in list_of_dicts if row[key_name] == value] def render_table(table: list, separate_title=False): column_lengths = [max([len(str(row[i])) for row in table]) for i in range(len(table[0]))] row_separator = '+-%s-+' % '-+-'.join(['-' * col_length for col_length in column_lengths]) title_separator = '+=%s=+' % '=+='.join(['=' * col_length for col_length in column_lengths]) lines = [row_separator] for i, row in enumerate(table): line = '| %s |' % ' | '.join([str(val).ljust(col_length) for val, col_length in zip(row, column_lengths)]) lines.append(line) lines.append(title_separator if separate_title and not i else row_separator) return '\n'.join(lines) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/container_config.py ================================================ import unittest from spotty.config.container_config import ContainerConfig class TestContainerConfig(unittest.TestCase): def test_working_dir(self): container_config = ContainerConfig({ 'projectDir': '/workspace/project', 'workingDir': '', }) self.assertEqual(container_config.project_dir, '/workspace/project') self.assertEqual(container_config.working_dir, '/workspace/project') container_config = ContainerConfig({ 'projectDir': '/workspace/project', 'workingDir': '/working-dir', }) self.assertEqual(container_config.project_dir, '/workspace/project') self.assertEqual(container_config.working_dir, '/working-dir') if __name__ == '__main__': unittest.main() ================================================ FILE: tests/helpers/__init__.py ================================================ ================================================ FILE: tests/helpers/cli.py ================================================ import shlex import subprocess def run(command: str, capture_output: bool = False, assert_zero_code: bool = True) -> (int, str): # run the command stdout = subprocess.PIPE if capture_output else None res = subprocess.run(command, stdout=stdout, shell=True) # make sure the command is succeed if assert_zero_code: assert res.returncode == 0, 'Command "%s" is failed' % command # decode output output = res.stdout.decode('utf-8') if capture_output else None return res.returncode, output def touch_file(file_path: str): run('touch ' + shlex.quote(file_path)) ================================================ FILE: tests/helpers/spotty_cli.py ================================================ import os import shlex from typing import List from tests.helpers.cli import run class SpottyCli: def __init__(self, instance_name: str): self._instance_name = instance_name def is_instance_running(self) -> bool: """Checks whether the instance is running or not.""" exit_code, _ = run('spotty status ' + self._instance_name, capture_output=True, assert_zero_code=False) return exit_code == 0 def start_instance(self): """Starts an instance.""" if not self.is_instance_running(): # start instance run('spotty start ' + self._instance_name) def list_remote_files(self) -> List[str]: """Returns a list of files in the project directory on the remote machine.""" output = self.exec('find . -type f -print') remote_files = [os.path.normpath(file_path) for file_path in output.splitlines()] return remote_files def touch_file(self, file_path: str): """Returns a list of files in the project directory on the remote machine.""" self.exec('touch ' + shlex.quote(file_path)) def sync(self): """Syncs files with the remote instance.""" _, output = run('spotty sync ' + self._instance_name, capture_output=True) uploaded_files = [] downloaded_files = [] for line in output.splitlines(): if line.startswith('upload:'): uploaded_files.append(os.path.normpath(line.split()[1])) elif line.startswith('download:'): downloaded_files.append(line.split()[3].rsplit('.project/')[-1]) return uploaded_files, downloaded_files def download(self, filter_pattern: str): """Syncs files with the remote instance.""" _, output = run('spotty download %s -i %s' % (self._instance_name, shlex.quote(filter_pattern)), capture_output=True) print('---') print(output) print('---') uploaded_files = [] downloaded_files = [] for line in output.splitlines(): if line.startswith('upload:'): uploaded_files.append(os.path.normpath(line.split()[1])) elif line.startswith('download:'): downloaded_files.append(line.split()[3].rsplit('.project/')[-1]) return uploaded_files, downloaded_files def exec(self, container_command: str): """Execs a custom command in the container.""" _, output = run('spotty exec %s --no-sync -- %s' % (self._instance_name, container_command), capture_output=True) return output ================================================ FILE: tests/providers/__init__.py ================================================ ================================================ FILE: tests/providers/aws/__init__.py ================================================ ================================================ FILE: tests/providers/aws/commands/data/test-project/ignored-dir/ignored-file ================================================ ================================================ FILE: tests/providers/aws/commands/data/test-project/ignored-dir/included-file ================================================ ================================================ FILE: tests/providers/aws/commands/data/test-project/ignored-file ================================================ ================================================ FILE: tests/providers/aws/commands/data/test-project/local-file ================================================ ================================================ FILE: tests/providers/aws/commands/data/test-project/spotty.yaml ================================================ project: name: test-project syncFilters: - exclude: - ignored-dir/* - ignored-file - include: - ignored-dir/included-file containers: - projectDir: /workspace/project image: ubuntu:16.04 instances: - name: aws-1 provider: aws parameters: region: us-east-2 instanceType: t2.small ================================================ FILE: tests/providers/aws/commands/download.py ================================================ import os import unittest from tests.helpers.cli import touch_file from tests.helpers.spotty_cli import SpottyCli class TestInstanceDownload(unittest.TestCase): spotty = SpottyCli('aws-1') @classmethod def setUpClass(cls): # set local project directory project_dir = os.path.join(os.path.dirname(__file__), 'data', 'test-project') os.chdir(project_dir) # make sure the instance is running assert cls.spotty.is_instance_running() # make sure all files are synced local_files = sorted([ 'ignored-dir/included-file', 'local-file', 'spotty.yaml', ]) remote_files = cls.spotty.list_remote_files() assert set(local_files).issubset(set(remote_files)) def test_download_file(self): # touch the remote file self.spotty.touch_file('local-file') # download the file uploaded_files, downloaded_files = self.spotty.download('local-file') # only 1 file should be uploaded and downloaded self.assertEqual(len(uploaded_files), 1) self.assertEqual(len(downloaded_files), 1) # the updated file uploaded self.assertIn('local-file', uploaded_files) self.assertIn('local-file', downloaded_files) # download the file again uploaded_files, downloaded_files = self.spotty.download('local-file') # no files should be uploaded or downloaded self.assertFalse(uploaded_files) self.assertFalse(downloaded_files) # touch the remote file, then touch the local file self.spotty.touch_file('local-file') touch_file('local-file') # download the remote file again uploaded_files, downloaded_files = self.spotty.download('local-file') # the file should be uploaded, but not downloaded as the local file is newer than the remote one self.assertEqual(len(uploaded_files), 1) self.assertFalse(downloaded_files) self.assertIn('local-file', uploaded_files) def test_wildcard(self): # touch remote files self.spotty.touch_file('ignored-dir/ignored-file') self.spotty.touch_file('ignored-dir/included-file') # download the files uploaded_files, downloaded_files = self.spotty.download('ignored-dir/*') # 2 files should be uploaded and downloaded self.assertEqual(len(uploaded_files), 2) self.assertEqual(len(downloaded_files), 2) self.assertIn('ignored-dir/ignored-file', uploaded_files) self.assertIn('ignored-dir/ignored-file', downloaded_files) self.assertIn('ignored-dir/included-file', uploaded_files) self.assertIn('ignored-dir/included-file', downloaded_files) # touch one of the remote files self.spotty.touch_file('ignored-dir/ignored-file') # download the files uploaded_files, downloaded_files = self.spotty.download('ignored-dir/*') # 1 file should be uploaded and downloaded self.assertEqual(len(uploaded_files), 1) self.assertEqual(len(downloaded_files), 1) self.assertIn('ignored-dir/ignored-file', uploaded_files) self.assertIn('ignored-dir/ignored-file', downloaded_files) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/aws/commands/sync.py ================================================ import os import unittest from tests.helpers.cli import touch_file from tests.helpers.spotty_cli import SpottyCli class TestInstanceSync(unittest.TestCase): spotty = SpottyCli('aws-1') @classmethod def setUpClass(cls): # set local project directory project_dir = os.path.join(os.path.dirname(__file__), 'data', 'test-project') os.chdir(project_dir) # start AWS instance cls.spotty.start_instance() # make sure all files are synced local_files = sorted([ 'ignored-dir/included-file', 'local-file', 'spotty.yaml', ]) remote_files = cls.spotty.list_remote_files() assert set(local_files).issubset(set(remote_files)) def test_update_local_file(self): # touch local file touch_file('ignored-dir/included-file') touch_file('ignored-dir/ignored-file') # sync files with the remote instance uploaded_files, downloaded_files = self.spotty.sync() # only 1 file should be uploaded and downloaded self.assertEqual(len(uploaded_files), 1) self.assertEqual(len(downloaded_files), 1) # the updated file uploaded self.assertIn('ignored-dir/included-file', uploaded_files) self.assertIn('ignored-dir/included-file', downloaded_files) # the untouched file not uploaded self.assertNotIn('local-file', uploaded_files) self.assertNotIn('local-file', downloaded_files) # the ignored file not uploaded self.assertNotIn('ignored-dir/ignored-file', uploaded_files) self.assertNotIn('ignored-dir/ignored-file', downloaded_files) def test_update_remote_file(self): # touch remote files self.spotty.touch_file('local-file') self.spotty.touch_file('ignored-dir/ignored-file') # sync files with the remote instance uploaded_files, downloaded_files = self.spotty.sync() # local files were not changed, so should not be uploaded self.assertFalse(uploaded_files, 'No files should be uploaded') # the remote file that is newer still will be overwritten with the older file # from the bucket (this is the current "aws s3 sync" behaviour) self.assertIn('local-file', downloaded_files) self.assertEqual(len(downloaded_files), 1) # the ignored file should not be overwritten self.assertNotIn('ignored-dir/ignored-file', downloaded_files) def test_new_remote_file(self): # create new remote file self.spotty.touch_file('new-remote-file') # make sure file was created remote_files = self.spotty.list_remote_files() self.assertIn('new-remote-file', remote_files) # make sure the file with this name doesn't exist locally self.assertFalse(os.path.isfile('new-remote-file')) # sync files self.spotty.sync() # make sure the file wasn't delete on the remote machine remote_files = self.spotty.list_remote_files() self.assertIn('new-remote-file', remote_files) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/aws/config/__init__.py ================================================ ================================================ FILE: tests/providers/aws/config/container_deployment.py ================================================ import os import unittest from spotty.config.abstract_instance_config import VolumeMount from spotty.config.config_utils import _read_yaml from spotty.config.project_config import ProjectConfig from spotty.providers.aws.config.instance_config import InstanceConfig class TestContainerDeployment(unittest.TestCase): def test_instance_volume(self): local_project_dir = os.path.join(os.path.dirname(__file__), 'data') config = _read_yaml(os.path.join(local_project_dir, 'config1.yaml')) project_config = ProjectConfig(config, local_project_dir) instance_config = InstanceConfig(project_config.instances[0], project_config) self.assertEqual(instance_config.host_project_dir, '/mnt/test/project') self.assertEqual(instance_config.dockerfile_path, '/mnt/test/project/docker/Dockerfile') self.assertEqual(instance_config.docker_context_path, '/mnt/test/project/docker') self.assertEqual(len(instance_config.volume_mounts), 2) self.assertEqual(instance_config.volume_mounts[0], VolumeMount(name='workspace', host_path='/mnt/test', mount_path='/workspace', mode='rw', hidden=False)) self.assertEqual(instance_config.volume_mounts[1], VolumeMount(name=None, host_path='/root/.aws', mount_path='/root/.aws', mode='ro', hidden=True,)) def test_tmp_project_volume(self): local_project_dir = os.path.join(os.path.dirname(__file__), 'data') config = _read_yaml(os.path.join(local_project_dir, 'config-wo-mounts.yaml')) project_config = ProjectConfig(config, local_project_dir) instance_config = InstanceConfig(project_config.instances[0], project_config) host_project_dir = '/tmp/spotty/containers/spotty-my-project-aws-1-default/volumes/.project' self.assertEqual(instance_config.host_project_dir, host_project_dir) self.assertEqual(instance_config.dockerfile_path, os.path.join(host_project_dir, 'docker', 'Dockerfile')) self.assertEqual(instance_config.docker_context_path, os.path.join(host_project_dir, 'docker')) self.assertEqual(len(instance_config.volume_mounts), 1) self.assertEqual(instance_config.volume_mounts[0], VolumeMount(name=None, host_path=host_project_dir, mount_path='/workspace/project', mode='rw', hidden=True)) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/aws/config/data/config-wo-mounts.yaml ================================================ project: name: my-project containers: - projectDir: /workspace/project file: docker/Dockerfile instances: - name: aws-1 provider: aws parameters: region: us-east-2 instanceType: t2.small ================================================ FILE: tests/providers/aws/config/data/config1.yaml ================================================ project: name: my-project syncFilters: - exclude: - .git/* - .idea/* - '*/__pycache__/*' container: projectDir: /workspace/project file: docker/Dockerfile volumeMounts: - name: workspace mountPath: /workspace commands: | echo test instances: - name: aws-1 provider: aws parameters: region: us-east-2 instanceType: t2.small volumes: - name: workspace parameters: size: 10 deletionPolicy: Delete mountDir: /mnt/test ================================================ FILE: tests/providers/aws/config/instance_config_validation.py ================================================ import unittest from spotty.providers.aws.config.validation import validate_instance_parameters class TestBucketResource(unittest.TestCase): def test_default_configuration(self): """Checks the default values for an instance configuration are set correctly.""" required_params = { 'region': 'eu-west-1', 'instanceType': 'p2.xlarge', } expected_params = { **required_params, 'amiId': None, 'amiName': None, 'availabilityZone': '', 'commands': '', 'containerName': None, 'dockerDataRoot': '', 'instanceProfileArn': None, 'localSshPort': None, 'managedPolicyArns': [], 'maxPrice': 0, 'ports': [], 'rootVolumeSize': 0, 'spotInstance': False, 'subnetId': '', 'volumes': [], } self.assertEqual(expected_params, validate_instance_parameters(required_params)) def test_failed_validation(self): # no params with self.assertRaises(ValueError): validate_instance_parameters({}) # wrong case for the region with self.assertRaises(ValueError): validate_instance_parameters({ 'region': 'EU-WEST-1', 'instanceType': 'p2.xlarge', }) # unknown parameter with self.assertRaises(ValueError): validate_instance_parameters({ 'region': 'eu-west-1', 'instanceType': 'p2.xlarge', 'unknownParameter': 'test', }) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/aws/project_resources/__init__.py ================================================ ================================================ FILE: tests/providers/aws/project_resources/bucket.py ================================================ import unittest from spotty.deployment.abstract_cloud_instance.errors.bucket_not_found import BucketNotFoundError from spotty.providers.aws.resource_managers.bucket_manager import BucketManager from moto import mock_s3 class TestBucketResource(unittest.TestCase): @mock_s3 def test_create_and_find_bucket(self): region = 'eu-central-1' project_name = 'TEST_PROJECT' bucket_resource = BucketManager(project_name, region) # bucket not found with self.assertRaises(BucketNotFoundError): bucket_resource.get_bucket() # bucket found bucket_name = bucket_resource.create_bucket().name self.assertEqual(bucket_name, bucket_resource.get_bucket().name) # several buckets found bucket_resource.create_bucket() with self.assertRaises(ValueError): bucket_resource.get_bucket() if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/aws/project_resources/key_pair.py ================================================ import unittest import boto3 import os from moto import mock_ec2 from spotty.providers.aws.resource_managers.key_pair_manager import KeyPairManager from spotty.providers.instance_manager_factory import PROVIDER_AWS class TestKeyPairResource(unittest.TestCase): def test_key_path(self): region = 'eu-central-1' project_name = 'TEST_PROJECT' key_pair_manager = KeyPairManager(None, project_name, region) # check key path key_name = 'spotty-key-%s-%s' % (project_name.lower(), region) key_path = os.path.join(os.path.expanduser('~'), '.spotty', 'keys', PROVIDER_AWS, key_name) self.assertEqual(key_pair_manager.key_path, key_path) @mock_ec2 def test_create_and_delete_key(self): region = 'eu-central-1' project_name = 'TEST_PROJECT' ec2 = boto3.client('ec2', region_name=region) key_pair_manager = KeyPairManager(ec2, project_name, region) # key doesn't exist self.assertFalse(key_pair_manager._ec2_key_exists()) # create the key key_pair_manager.maybe_create_key() self.assertTrue(key_pair_manager._ec2_key_exists()) self.assertTrue(os.path.isfile(key_pair_manager.key_path)) with open(key_pair_manager.key_path) as f: key_content = f.read() # make sure the key is not being recreated key_pair_manager.maybe_create_key() with open(key_pair_manager.key_path) as f: same_key_content = f.read() self.assertEqual(key_content, same_key_content) # create the key and rewrite the key file ec2.delete_key_pair(KeyName=key_pair_manager.key_name) self.assertFalse(key_pair_manager._ec2_key_exists()) self.assertTrue(os.path.isfile(key_pair_manager.key_path)) key_pair_manager.maybe_create_key() self.assertTrue(key_pair_manager._ec2_key_exists()) self.assertTrue(os.path.isfile(key_pair_manager.key_path)) with open(key_pair_manager.key_path) as f: new_key_content = f.read() self.assertNotEqual(key_content, new_key_content) # recreate the key if the key file doesn't exist os.unlink(key_pair_manager.key_path) self.assertFalse(os.path.isfile(key_pair_manager.key_path)) key_pair_manager.maybe_create_key() self.assertTrue(key_pair_manager._ec2_key_exists()) self.assertTrue(os.path.isfile(key_pair_manager.key_path)) # delete key key_pair_manager.delete_key() self.assertFalse(key_pair_manager._ec2_key_exists()) self.assertFalse(os.path.isfile(key_pair_manager.key_path)) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/gcp/config/__init__.py ================================================ ================================================ FILE: tests/providers/gcp/config/image_uri.py ================================================ import unittest from spotty.providers.gcp.config.image_uri import ImageUri class TestImageUrl(unittest.TestCase): def test_image_url_parsing(self): pos_tests = [ { 'uri': 'projects/debian-cloud/global/images/family/debian-9', 'expected': ('debian-cloud', True, 'debian-9'), }, { 'uri': 'projects/debian-cloud/global/images/debian-9-stretch', 'expected': ('debian-cloud', False, 'debian-9-stretch'), }, { 'uri': 'global/images/family/my-image-family', 'expected': (None, True, 'my-image-family'), }, { 'uri': 'global/images/my-custom-image', 'expected': (None, False, 'my-custom-image'), }, { 'uri': 'https://compute.googleapis.com/compute/v1/projects/debian-cloud/global/images/debian-9-stretch', 'expected': ('debian-cloud', False, 'debian-9-stretch'), }, ] for pos_test in pos_tests: image_uri = ImageUri(pos_test['uri']) self.assertEqual(pos_test['expected'], (image_uri.project_id, image_uri.is_family, image_uri.name)) neg_tests = [ 'projects//global/images/family/debian-9', # no project 'projects/test1/test2/global/images/family/debian-9', # extra part 'projects/debian-cloud/global/image/family/debian-9', # "image" misspelling 'projects/debian-cloud/global/images/family/Debian-9', # capital letter 'projects/debian-cloud/global/images/' # no image name '/global/images/family/debian-9', # starts with a slash 'global/images/family/debian-9/', # ends with a slash 'global/images/-my-custom-image', # image name starts with a dash 'global/images/my-custom-image-', # image name ends with a dash 'https://compute.googleapis.com/compute/v1/global/images/debian-9-stretch', # no project name ] for neg_test in neg_tests: with self.assertRaises(ValueError): ImageUri(neg_test) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/local/__init__.py ================================================ ================================================ FILE: tests/providers/local/commands/__init__.py ================================================ ================================================ FILE: tests/providers/local/commands/data/test-project/spotty.yaml ================================================ project: name: test-project containers: - projectDir: /workspace/project image: ubuntu:16.04 instances: - name: local-1 provider: local ================================================ FILE: tests/providers/local/commands/run.py ================================================ import os import unittest from spotty.deployment.utils.commands import get_script_command from spotty.deployment.utils.user_scripts import render_script from tests.helpers.spotty_cli import SpottyCli class TestInstanceRun(unittest.TestCase): spotty = SpottyCli('local-1') @classmethod def setUpClass(cls): # set local project directory project_dir = os.path.join(os.path.dirname(__file__), 'data', 'test-project') os.chdir(project_dir) # start AWS instance cls.spotty.start_instance() def test_script_arguments(self): script_name = 'echo' script_content = 'echo test $1 $2' # no arguments script_command = get_script_command(script_name, script_content, script_args=None, logging=False) output = self.spotty.exec(script_command) self.assertEqual(output.strip(), 'test') # custom arguments script_command = get_script_command(script_name, script_content, script_args=['arg 1', 'arg 2'], logging=False) output = self.spotty.exec(script_command) self.assertEqual(output.strip(), 'test arg 1 arg 2') def test_script_params(self): script_content = 'echo test {{PARAM_1}} {{PARAM_2}}' script_params = { 'PARAM_2': 'param 2', } script_content = render_script(script_content, script_params) self.assertEqual(script_content, '#!/usr/bin/env bash\n\n' 'set -xe\n\n' 'echo test param 2') def test_script_logging(self): script_name = 'echo' script_content = 'echo test' # run the script with logging script_command = get_script_command(script_name, script_content, script_args=None, logging=True) output = self.spotty.exec(script_command) self.assertEqual(output.strip(), 'test') # read the latest log file output = self.spotty.exec('bash -c \'cat /var/log/spotty/run/$(ls -rt /var/log/spotty/run | tail -n1)\'') self.assertEqual(output.splitlines()[0], 'test') if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/local/config/__init__.py ================================================ ================================================ FILE: tests/providers/local/config/container_deployment.py ================================================ import os import unittest from spotty.config.abstract_instance_config import VolumeMount from spotty.config.config_utils import _read_yaml from spotty.config.project_config import ProjectConfig from spotty.providers.local.config.instance_config import InstanceConfig class TestContainerDeployment(unittest.TestCase): def test_instance_volume(self): local_project_dir = os.path.join(os.path.dirname(__file__), 'config', 'data') config = _read_yaml(os.path.join(local_project_dir, 'config1.yaml')) project_config = ProjectConfig(config, local_project_dir) instance_config = InstanceConfig(project_config.instances[0], project_config) self.assertEqual(instance_config.host_project_dir, local_project_dir) self.assertEqual(instance_config.dockerfile_path, os.path.join(local_project_dir, 'docker', 'Dockerfile')) self.assertEqual(instance_config.docker_context_path, os.path.join(local_project_dir, 'docker')) self.assertEqual(len(instance_config.volume_mounts), 2) self.assertEqual(instance_config.volume_mounts[0], VolumeMount(name='workspace', host_path='/mnt/test', mount_path='/workspace', mode='rw', hidden=False)) self.assertEqual(instance_config.volume_mounts[1], VolumeMount(name=None, host_path=local_project_dir, mount_path='/workspace/project', mode='rw', hidden=True)) if __name__ == '__main__': unittest.main() ================================================ FILE: tests/providers/local/config/data/config1.yaml ================================================ project: name: my-project syncFilters: - exclude: - .git/* - .idea/* - '*/__pycache__/*' containers: - projectDir: /workspace/project file: docker/Dockerfile volumeMounts: - name: workspace mountPath: /workspace commands: | echo test instances: - name: local-1 provider: local parameters: volumes: - name: workspace parameters: path: /mnt/test