Repository: tensorwerk/hangar-py Branch: master Commit: a6deb22854a6 Files: 190 Total size: 2.1 MB Directory structure: gitextract_qj3h30ym/ ├── .bumpversion.cfg ├── .coveragerc ├── .editorconfig ├── .gitattributes ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── feature_request.md │ │ └── questions_and_documentation.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ ├── asvbench.yml │ ├── release.yml │ ├── testsphinx.yml │ └── testsuite.yml ├── .gitignore ├── .readthedocs.yml ├── AUTHORS.rst ├── CHANGELOG.rst ├── CODE_OF_CONDUCT.rst ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── asv_bench/ │ ├── README.rst │ ├── asv.conf.json │ └── benchmarks/ │ ├── __init__.py │ ├── backend_comparisons.py │ ├── backends/ │ │ ├── __init__.py │ │ ├── hdf5_00.py │ │ ├── hdf5_01.py │ │ └── numpy_10.py │ ├── commit_and_checkout.py │ └── package.py ├── codecov.yml ├── docs/ │ ├── Tutorial-001.ipynb │ ├── Tutorial-002.ipynb │ ├── Tutorial-003.ipynb │ ├── Tutorial-Dataset.ipynb │ ├── Tutorial-QuickStart.ipynb │ ├── Tutorial-RealQuickStart.ipynb │ ├── api.rst │ ├── authors.rst │ ├── backends/ │ │ ├── hdf5_00.rst │ │ ├── hdf5_01.rst │ │ ├── lmdb_30.rst │ │ ├── numpy_10.rst │ │ └── remote_50.rst │ ├── backends.rst │ ├── benchmarking.rst │ ├── changelog.rst │ ├── cli.rst │ ├── codeofconduct.rst │ ├── concepts.rst │ ├── conf.py │ ├── contributing.rst │ ├── contributingindex.rst │ ├── design.rst │ ├── externals.rst │ ├── faq.rst │ ├── index.rst │ ├── installation.rst │ ├── noindexapi/ │ │ ├── apiinit.rst │ │ └── apiremotefetchdata.rst │ ├── quickstart.rst │ ├── readme.rst │ ├── requirements.txt │ ├── requirements_rtd.txt │ ├── spelling_wordlist.txt │ └── tutorial.rst ├── hangar.yml ├── mypy.ini ├── scripts/ │ └── run_proto_codegen.py ├── setup.cfg ├── setup.py ├── src/ │ └── hangar/ │ ├── __init__.py │ ├── __main__.py │ ├── _version.py │ ├── backends/ │ │ ├── __init__.py │ │ ├── chunk.py │ │ ├── hdf5_00.py │ │ ├── hdf5_01.py │ │ ├── lmdb_30.py │ │ ├── lmdb_31.py │ │ ├── numpy_10.py │ │ ├── remote_50.py │ │ ├── specparse.pyx │ │ ├── specs.pxd │ │ └── specs.pyx │ ├── bulk_importer.py │ ├── checkout.py │ ├── cli/ │ │ ├── __init__.py │ │ ├── cli.py │ │ └── utils.py │ ├── columns/ │ │ ├── __init__.py │ │ ├── column.py │ │ ├── common.py │ │ ├── constructors.py │ │ ├── introspection.py │ │ ├── layout_flat.py │ │ └── layout_nested.py │ ├── constants.py │ ├── context.py │ ├── dataset/ │ │ ├── __init__.py │ │ ├── common.py │ │ ├── numpy_dset.py │ │ ├── tensorflow_dset.py │ │ └── torch_dset.py │ ├── diagnostics/ │ │ ├── __init__.py │ │ ├── ecosystem.py │ │ ├── graphing.py │ │ └── integrity.py │ ├── diff.py │ ├── external/ │ │ ├── __init__.py │ │ ├── _external.py │ │ ├── base_plugin.py │ │ └── plugin_manager.py │ ├── external_cpython.pxd │ ├── merger.py │ ├── mixins/ │ │ ├── __init__.py │ │ ├── checkout_iteration.py │ │ ├── datasetget.py │ │ └── recorditer.py │ ├── op_state.py │ ├── optimized_utils.pxd │ ├── optimized_utils.pyx │ ├── records/ │ │ ├── __init__.py │ │ ├── column_parsers.pyx │ │ ├── commiting.py │ │ ├── hashmachine.pyx │ │ ├── hashs.py │ │ ├── heads.py │ │ ├── parsing.py │ │ ├── queries.py │ │ ├── recordstructs.pxd │ │ ├── recordstructs.pyx │ │ ├── summarize.py │ │ └── vcompat.py │ ├── remote/ │ │ ├── __init__.py │ │ ├── chunks.py │ │ ├── client.py │ │ ├── config_server.ini │ │ ├── content.py │ │ ├── hangar_service.proto │ │ ├── hangar_service_pb2.py │ │ ├── hangar_service_pb2.pyi │ │ ├── hangar_service_pb2_grpc.py │ │ ├── header_manipulator_client_interceptor.py │ │ ├── request_header_validator_interceptor.py │ │ └── server.py │ ├── remotes.py │ ├── repository.py │ ├── txnctx.py │ ├── typesystem/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── descriptors.py │ │ ├── ndarray.py │ │ ├── pybytes.py │ │ └── pystring.py │ └── utils.py ├── tests/ │ ├── bulk_importer/ │ │ └── test_bulk_importer.py │ ├── conftest.py │ ├── ml_datasets/ │ │ └── test_dataset.py │ ├── property_based/ │ │ ├── conftest.py │ │ ├── test_pbt_column_flat.py │ │ └── test_pbt_column_nested.py │ ├── test_backend_hdf5_00_hdf5_01.py │ ├── test_branching.py │ ├── test_checkout.py │ ├── test_checkout_arrayset_access.py │ ├── test_cli.py │ ├── test_column.py │ ├── test_column_backends.py │ ├── test_column_definition_permutations.py │ ├── test_column_nested.py │ ├── test_column_pickle.py │ ├── test_commit_ref_verification.py │ ├── test_context_management.py │ ├── test_diff.py │ ├── test_diff_staged_summary.py │ ├── test_initiate.py │ ├── test_merging.py │ ├── test_optimized_utils.py │ ├── test_remote_serialize.py │ ├── test_remotes.py │ ├── test_repo_integrity_verification.py │ ├── test_utils.py │ ├── test_version.py │ ├── test_visualizations.py │ └── typesystem/ │ ├── test_ndarray_typesysem.py │ ├── test_pybytes_typesystem.py │ └── test_pystr_typesystem.py └── tox.ini ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bumpversion.cfg ================================================ [bumpversion] current_version = 0.5.2 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? serialize = {major}.{minor}.{patch}.{release}{build} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = rc first_value = dev values = dev rc [bumpversion:part:build] [bumpversion:file:setup.py] search = version='{current_version}' replace = version='{new_version}' [bumpversion:file:docs/conf.py] search = version = release = '{current_version}' replace = version = release = '{new_version}' [bumpversion:file:src/hangar/__init__.py] search = __version__ = '{current_version}' replace = __version__ = '{new_version}' [bumpversion:file:src/hangar/diagnostics/__init__.py] search = __version__ = '{current_version}' replace = __version__ = '{new_version}' ================================================ FILE: .coveragerc ================================================ [paths] source = src [run] branch = True parallel = True source = hangar tests omit = */hangar/__main__.py */hangar_service_pb2.py */hangar_service_pb2_grpc.py */hangar_service_pb2.pyi [report] exclude_lines = pragma: no cover def __repr__ def _repr_pretty_ def _ipython_key_completions_ show_missing = True precision = 2 omit = *migrations* ================================================ FILE: .editorconfig ================================================ # see http://editorconfig.org root = true [*] end_of_line = lf trim_trailing_whitespace = true insert_final_newline = true indent_style = space indent_size = 4 charset = utf-8 [*.{bat,cmd,ps1}] end_of_line = crlf ================================================ FILE: .gitattributes ================================================ * text=auto *.bat eol=crlf *.cmd eol=crlf *.ps1 eol=lf *.sh eol=lf *.rtf -text ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: "[BUG REPORT]" labels: 'Bug: Awaiting Priority Assignment' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **Severity** Select an option: - [ ] Data Corruption / Loss of Any Kind - [ ] Unexpected Behavior, Exceptions or Error Thrown - [ ] Performance Bottleneck **To Reproduce** Steps to reproduce the behavior, minimal example code preferred: **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: - Python: - Hangar: **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: "[FEATURE REQUEST]" labels: enhancement assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/ISSUE_TEMPLATE/questions_and_documentation.md ================================================ --- name: Questions and Documentation about: Is something confusing? The documentation not clear? We can help title: "[QUESTION & DOCS]: " labels: documentation, question assignees: '' --- **Executive Summary** In one to two sentences, describe your question or issue with the documentation: **Additional Context / Explantation** (if applicable) provide more info about the question/problem (we love example code & screenshots!) **Desktop (If applicable, please complete the following version information):** - OS: - Python: - Hangar Version: - _Install Type_ - [ ] Source Build - [ ] Pip install - [ ] Conda (conda-forge) install **External Links** (If applicable) reference other issues, read the docs pages, code docstrings. - ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ## Motivation and Context #### _Why is this change required? What problem does it solve?:_ #### _If it fixes an open issue, please link to the issue here:_ ## Description #### _Describe your changes in detail:_ ## Screenshots (if appropriate): ## Types of changes What types of changes does your code introduce? Put an `x` in all the boxes that apply: - [ ] Documentation update - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) Is this PR ready for review, or a work in progress? - [ ] Ready for review - [ ] Work in progress ## How Has This Been Tested? Put an `x` in the boxes that apply: - [ ] Current tests cover modifications made - [ ] New tests have been added to the test suite - [ ] Modifications were made to existing tests to support these changes - [ ] Tests may be needed, but they are not included when the PR was proposed - [ ] I don't know. Help! ## Checklist: - [ ] My code follows the code style of this project. - [ ] My change requires a change to the documentation. - [ ] I have updated the documentation accordingly. - [ ] I have read the **[CONTRIBUTING](../CONTRIBUTING.rst)** document. - [ ] I have signed (or will sign when prompted) the tensorwork CLA. - [ ] I have added tests to cover my changes. - [ ] All new and existing tests passed. ================================================ FILE: .github/workflows/asvbench.yml ================================================ name: ASV Benchmarking on: pull_request: branches: - master jobs: run_benchmarks: runs-on: ${{ matrix.os }} strategy: max-parallel: 4 fail-fast: false matrix: os: [ubuntu-18.04, macOS-10.14] python-version: [3.6, 3.7] steps: - uses: actions/checkout@v1 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install --upgrade setuptools pip install virtualenv==16.7.9 pip install git+https://github.com/airspeed-velocity/asv - name: Run Benchmarks run: | cd asv_bench/ asv machine --yes asv continuous --split origin/master HEAD | tee -a asv_continuous.log shell: bash continue-on-error: true - name: Show Comparison run: | cd asv_bench/ asv compare --split origin/master HEAD | tee -a asv_compare.log if [[ $(cat asv_continuous.log | grep "PERFORMANCE DECREASED") ]]; then echo "Benchmarks Performance Decreased" exit 1 elif [[ $(cat asv_continuous.log | grep "PERFORMANCE INCREASED") ]]; then echo "Benchmark Performance Increased" else echo "Benchmarks Run Without Errors, No Significant Change." fi shell: bash ================================================ FILE: .github/workflows/release.yml ================================================ name: release on: release: types: [published, prereleased] jobs: build-linux-cp36: runs-on: ubuntu-latest container: quay.io/pypa/manylinux2014_x86_64 steps: - uses: actions/checkout@v2 - name: Install Python package dependencies run: /opt/python/cp36-cp36m/bin/python -m pip install cython wheel setuptools - name: Build binary wheel run: /opt/python/cp36-cp36m/bin/python setup.py bdist_wheel - name: Apply auditwheel run: auditwheel repair -w dist dist/* - name: Remove linux wheel run: rm dist/*-linux_x86_64.whl - name: Archive dist artifacts uses: actions/upload-artifact@v1 with: name: dist-linux-3.6 path: dist build-linux-cp37: runs-on: ubuntu-latest container: quay.io/pypa/manylinux2014_x86_64 steps: - uses: actions/checkout@v2 - name: Install Python package dependencies run: /opt/python/cp37-cp37m/bin/python -m pip install cython wheel setuptools - name: Build binary wheel run: /opt/python/cp37-cp37m/bin/python setup.py bdist_wheel - name: Apply auditwheel run: auditwheel repair -w dist dist/* - name: Remove linux wheel run: rm dist/*-linux_x86_64.whl - name: Archive dist artifacts uses: actions/upload-artifact@v1 with: name: dist-linux-3.7 path: dist build-linux-cp38: runs-on: ubuntu-latest container: quay.io/pypa/manylinux2014_x86_64 steps: - uses: actions/checkout@v2 - name: Install Python package dependencies run: /opt/python/cp38-cp38/bin/python -m pip install cython wheel setuptools - name: Build binary wheel run: /opt/python/cp38-cp38/bin/python setup.py bdist_wheel - name: Apply auditwheel for manylinux wheel run: auditwheel repair -w dist dist/* - name: Remove linux wheel run: rm dist/*-linux_x86_64.whl - name: Archive dist artifacts uses: actions/upload-artifact@v1 with: name: dist-linux-3.8 path: dist build-macos: runs-on: macos-latest strategy: max-parallel: 4 matrix: python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} x64 uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} architecture: x64 - name: Install Python package dependencies run: pip install cython wheel setuptools - name: Build binary wheel run: python setup.py bdist_wheel - name: Archive dist artifacts uses: actions/upload-artifact@v1 with: name: dist-macos-${{ matrix.python-version }} path: dist build-windows: runs-on: windows-latest strategy: max-parallel: 3 matrix: python-version: [3.6, 3.7, 3.8] steps: - uses: actions/checkout@v2 - name: Download Build Tools for Visual Studio 2019 run: Invoke-WebRequest -Uri https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile vs_buildtools.exe - name: Run vs_buildtools.exe install run: ./vs_buildtools.exe --quiet --wait --norestart --nocache --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --add Microsoft.VisualStudio.Component.VC.v141.x86.x64 --add Microsoft.VisualStudio.Component.VC.140 --includeRecommended - name: Set up Python ${{ matrix.python-version }} x64 uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} architecture: x64 - name: Install Python package dependencies run: pip install cython wheel setuptools - name: Build binary wheel run: python setup.py bdist_wheel - name: Archive dist artifacts uses: actions/upload-artifact@v1 with: name: dist-windows-${{ matrix.python-version }} path: dist upload: needs: [build-linux-cp36, build-linux-cp37, build-linux-cp38, build-macos, build-windows] runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Set up Python uses: actions/setup-python@v1 with: python-version: 3.8 - name: Install dependencies run: | python -m pip install --upgrade pip pip install cython wheel setuptools - name: Create source dist run: python setup.py sdist # Linux - name: Stage linux 3.6 uses: actions/download-artifact@v1 with: name: dist-linux-3.6 - run: mv -v dist-linux-3.6/* dist/ - name: Stage linux 3.7 uses: actions/download-artifact@v1 with: name: dist-linux-3.7 - run: mv -v dist-linux-3.7/* dist/ - name: Stage linux 3.8 uses: actions/download-artifact@v1 with: name: dist-linux-3.8 - run: mv -v dist-linux-3.8/* dist/ # MacOS - name: Stage macos 3.6 uses: actions/download-artifact@v1 with: name: dist-macos-3.6 - run: mv -v dist-macos-3.6/* dist/ - name: Stage macos 3.7 uses: actions/download-artifact@v1 with: name: dist-macos-3.7 - run: mv -v dist-macos-3.7/* dist/ - name: Stage macos 3.8 uses: actions/download-artifact@v1 with: name: dist-macos-3.8 - run: mv -v dist-macos-3.8/* dist/ # Windows - name: Stage windows 3.6 uses: actions/download-artifact@v1 with: name: dist-windows-3.6 - run: mv -v dist-windows-3.6/* dist/ - name: Stage windows 3.7 uses: actions/download-artifact@v1 with: name: dist-windows-3.7 - run: mv -v dist-windows-3.7/* dist/ - name: Stage windows 3.8 uses: actions/download-artifact@v1 with: name: dist-windows-3.8 - run: mv -v dist-windows-3.8/* dist/ - name: Upload PreRelease to Test PyPi with Twine if: "github.event.release.prerelease" env: TWINE_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }} run: | ls -l dist/* pip install twine twine upload --repository-url https://test.pypi.org/legacy/ dist/* - name: Upload Release to PyPi with Twine if: "!github.event.release.prerelease" env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | ls -l dist/* pip install twine twine upload dist/* ================================================ FILE: .github/workflows/testsphinx.yml ================================================ name: Build Sphinx Docs on: pull_request: branches: - master push: branches: - master jobs: build_docs: runs-on: ubuntu-latest strategy: fail-fast: false steps: - uses: actions/checkout@v2 - name: Set up Python 3.7 uses: actions/setup-python@v1 with: python-version: 3.7 - name: Install dependencies run: | python -m pip install --upgrade setuptools pip wheel tox sudo apt-get update sudo apt-get install pandoc - name: Run Documentation Generator run: tox -e docs env: GH_ACTIONS_PROC_NR: 1 ================================================ FILE: .github/workflows/testsuite.yml ================================================ name: Run Test Suite on: pull_request: branches: - master push: branches: - master jobs: run_test_suite: runs-on: ${{ matrix.platform }} strategy: fail-fast: false matrix: # https://help.github.com/articles/virtual-environments-for-github-actions testcover: [yes, no] testml: [no, yes] platform: - windows-latest - macos-latest - ubuntu-latest python-version: [3.6, 3.7, 3.8] exclude: # tensorflow-cpu:latest (2.1.0) is not available for python 3.8 yet. - python-version: 3.8 testml: yes # build time with limited macos jobs - platform: macos-latest python-version: 3.7 - platform: windows-latest python-version: 3.7 testml: yes steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade setuptools wheel # Use the latest published version for myself :) python -m pip install tox-gh-actions - name: Run Tests Without Coverage Report if: matrix.testcover == 'no' run: tox env: PYTEST_XDIST_PROC_NR: 2 TESTCOVER: ${{ matrix.testcover }} TESTML: ${{ matrix.testml }} - name: Run Tests With Coverage Report if: matrix.testcover == 'yes' run: tox -- --cov-report xml env: PYTEST_XDIST_PROC_NR: 2 TESTCOVER: ${{ matrix.testcover }} TESTML: ${{ matrix.testml }} - name: Upload Coverage Report to Codecov if: matrix.testcover == 'yes' run: bash <(curl -s https://codecov.io/bash) -n "${CC_PLAT}-py${CC_PY}-cov${CC_COV}-ml${CC_ML}" shell: bash env: CC_PLAT: ${{ matrix.platform }} CC_PY: ${{ matrix.python-version }} CC_COV: ${{ matrix.testcover }} CC_ML: ${{ matrix.testml }} ================================================ FILE: .gitignore ================================================ *.py[cod] # C extensions *.c *.so cython_debug/ # cython annotation files src/hangar/backends/*.html docs/_build # Packages *.egg *.egg-info dist build eggs .eggs parts bin var sdist wheelhouse develop-eggs .installed.cfg lib lib64 venv*/ pyvenv*/ MANIFEST # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox .coverage.* .pytest_cache/ nosetests.xml coverage.xml htmlcov .hypothesis # Performance Testing asv_bench/html asv_bench/env asv_bench/results # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject .idea *.iml *.komodoproject # Complexity output/*.html output/*/index.html # Sphinx docs/_build .DS_Store *~ .*.sw[po] .build .ve .env .cache .pytest .bootstrap .appveyor.token *.bak # Mypy Cache .mypy_cache/ .dmypy.json monkeytype.sqlite3 # IDE Settings .vscode/ .ipynb_checkpoints/ # Testing data *.pkl.gz *.sqlite3 *.dmypy.json ================================================ FILE: .readthedocs.yml ================================================ # .readthedocs.yml # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # Optionally build your docs in additional formats such as PDF and ePub formats: all # Optionally set the version of Python and requirements required to build your docs python: version: 3.7 install: - requirements: docs/requirements.txt - method: pip path: . - method: setuptools path: . - requirements: docs/requirements_rtd.txt system_packages: true ================================================ FILE: AUTHORS.rst ================================================ Authors ======= * Richard Izzo - rick@tensorwerk.com * Luca Antiga - luca@tensorwerk.com * Sherin Thomas - sherin@tensorwerk.com * Alessia Marcolini - alessia@tensorwerk.com ================================================ FILE: CHANGELOG.rst ================================================ ========== Change Log ========== _`In-Progress` ============== Improvements ------------ * New API design for datasets (previously dataloaders) for machine learning libraries. (`#187 `__) `@hhsecond <>`__ `v0.5.2`_ (2020-05-08) ====================== New Features ------------ * New column data type supporting arbitrary ``bytes`` data. (`#198 `__) `@rlizzo `__ Improvements ------------ * ``str`` typed columns can now accept data containing any unicode code-point. In prior releases data containing any ``non-ascii`` character could not be written to this column type. (`#198 `__) `@rlizzo `__ Bug Fixes --------- * Fixed issue where ``str`` and (newly added) ``bytes`` column data could not be fetched / pushed between a local client repository and remote server. (`#198 `__) `@rlizzo `__ `v0.5.1`_ (2020-04-05) ====================== BugFixes -------- * Fixed issue where importing ``make_torch_dataloader`` or ``make_tf_dataloader`` under python 3.6 Would raise a ``NameError`` irrigardless of if the package is installed. (`#196 `__) `@rlizzo `__ `v0.5.0`_ (2020-04-4) ===================== Improvements ------------ * Python 3.8 is now fully supported. (`#193 `__) `@rlizzo `__ * Major backend overhaul which defines column layouts and data types in the same interchangable / extensable manner as storage backends. This will allow rapid development of new layouts and data type support as new use cases are discovered by the community. (`#184 `__) `@rlizzo `__ * Column and backend classes are now fully serializable (pickleable) for ``read-only`` checkouts. (`#180 `__) `@rlizzo `__ * Modularized internal structure of API classes to easily allow new columnn layouts / data types to be added in the future. (`#180 `__) `@rlizzo `__ * Improved type / value checking of manual specification for column ``backend`` and ``backend_options``. (`#180 `__) `@rlizzo `__ * Standardized column data access API to follow python standard library ``dict`` methods API. (`#180 `__) `@rlizzo `__ * Memory usage of arrayset checkouts has been reduced by ~70% by using C-structs for allocating sample record locating info. (`#179 `__) `@rlizzo `__ * Read times from the ``HDF5_00`` and ``HDF5_01`` backend have been reduced by 33-38% (or more for arraysets with many samples) by eliminating redundant computation of chunked storage B-Tree. (`#179 `__) `@rlizzo `__ * Commit times and checkout times have been reduced by 11-18% by optimizing record parsing and memory allocation. (`#179 `__) `@rlizzo `__ New Features ------------ * Added ``str`` type column with same behavior as ``ndarray`` column (supporting both single-level and nested layouts) added to replace functionality of removed ``metadata`` container. (`#184 `__) `@rlizzo `__ * New backend based on ``LMDB`` has been added (specifier of ``lmdb_30``). (`#184 `__) `@rlizzo `__ * Added ``.diff()`` method to ``Repository`` class to enable diffing changes between any pair of commits / branches without needing to open the diff base in a checkout. (`#183 `__) `@rlizzo `__ * New CLI command ``hangar diff`` which reports a summary view of changes made between any pair of commits / branches. (`#183 `__) `@rlizzo `__ * Added ``.log()`` method to ``Checkout`` objects so graphical commit graph or machine readable commit details / DAG can be queried when operating on a particular commit. (`#183 `__) `@rlizzo `__ * "string" type columns now supported alongside "ndarray" column type. (`#180 `__) `@rlizzo `__ * New "column" API, which replaces "arrayset" name. (`#180 `__) `@rlizzo `__ * Arraysets can now contain "nested subsamples" under a common sample key. (`#179 `__) `@rlizzo `__ * New API to add and remove samples from and arrayset. (`#179 `__) `@rlizzo `__ * Added ``repo.size_nbytes`` and ``repo.size_human`` to report disk usage of a repository on disk. (`#174 `__) `@rlizzo `__ * Added method to traverse the entire repository history and cryptographically verify integrity. (`#173 `__) `@rlizzo `__ Changes ------- * Argument syntax of ``__getitem__()`` and ``get()`` methods of ``ReaderCheckout`` and ``WriterCheckout`` classes. The new format supports handeling arbitrary arguments specific to retrieval of data from any column type. (`#183 `__) `@rlizzo `__ Removed ------- * ``metadata`` container for ``str`` typed data has been completly removed. It is replaced by a highly extensible and much more user-friendly ``str`` typed column. (`#184 `__) `@rlizzo `__ * ``__setitem__()`` method in ``WriterCheckout`` objects. Writing data to columns via a checkout object is no longer supported. (`#183 `__) `@rlizzo `__ Bug Fixes --------- * Backend data stores no longer use file symlinks, improving compatibility with some types file systems. (`#171 `__) `@rlizzo `__ * All arrayset types ("flat" and "nested subsamples") and backend readers can now be pickled -- for parallel processing -- in a read-only checkout. (`#179 `__) `@rlizzo `__ Breaking changes ---------------- * New backend record serialization format is incompatible with repositories written in version 0.4 or earlier. * New arrayset API is incompatible with Hangar API in version 0.4 or earlier. `v0.4.0`_ (2019-11-21) ====================== New Features ------------ * Added ability to delete branch names/pointers from a local repository via both API and CLI. (`#128 `__) `@rlizzo `__ * Added ``local`` keyword arg to arrayset key/value iterators to return only locally available samples (`#131 `__) `@rlizzo `__ * Ability to change the backend storage format and options applied to an ``arrayset`` after initialization. (`#133 `__) `@rlizzo `__ * Added blosc compression to HDF5 backend by default on PyPi installations. (`#146 `__) `@rlizzo `__ * Added Benchmarking Suite to Test for Performance Regressions in PRs. (`#155 `__) `@rlizzo `__ * Added new backend optimized to increase speeds for fixed size arrayset access. (`#160 `__) `@rlizzo `__ Improvements ------------ * Removed ``msgpack`` and ``pyyaml`` dependencies. Cleaned up and improved remote client/server code. (`#130 `__) `@rlizzo `__ * Multiprocess Torch DataLoaders allowed on Linux and MacOS. (`#144 `__) `@rlizzo `__ * Added CLI options ``commit``, ``checkout``, ``arrayset create``, & ``arrayset remove``. (`#150 `__) `@rlizzo `__ * Plugin system revamp. (`#134 `__) `@hhsecond `__ * Documentation Improvements and Typo-Fixes. (`#156 `__) `@alessiamarcolini `__ * Removed implicit removal of arrayset schema from checkout if every sample was removed from arrayset. This could potentially result in dangling accessors which may or may not self-destruct (as expected) in certain edge-cases. (`#159 `__) `@rlizzo `__ * Added type codes to hash digests so that calculation function can be updated in the future without breaking repos written in previous Hangar versions. (`#165 `__) `@rlizzo `__ Bug Fixes --------- * Programatic access to repository log contents now returns branch heads alongside other log info. (`#125 `__) `@rlizzo `__ * Fixed minor bug in types of values allowed for ``Arrayset`` names vs ``Sample`` names. (`#151 `__) `@rlizzo `__ * Fixed issue where using checkout object to access a sample in multiple arraysets would try to create a ``namedtuple`` instance with invalid field names. Now incompatible field names are automatically renamed with their positional index. (`#161 `__) `@rlizzo `__ * Explicitly raise error if ``commit`` argument is set while checking out a repository with ``write=True``. (`#166 `__) `@rlizzo `__ Breaking changes ---------------- * New commit reference serialization format is incompatible with repositories written in version 0.3.0 or earlier. `v0.3.0`_ (2019-09-10) ====================== New Features ------------ * API addition allowing reading and writing arrayset data from a checkout object directly. (`#115 `__) `@rlizzo `__ * Data importer, exporters, and viewers via CLI for common file formats. Includes plugin system for easy extensibility in the future. (`#103 `__) (`@rlizzo `__, `@hhsecond `__) Improvements ------------ * Added tutorial on working with remote data. (`#113 `__) `@rlizzo `__ * Added Tutorial on Tensorflow and PyTorch Dataloaders. (`#117 `__) `@hhsecond `__ * Large performance improvement to diff/merge algorithm (~30x previous). (`#112 `__) `@rlizzo `__ * New commit hash algorithm which is much more reproducible in the long term. (`#120 `__) `@rlizzo `__ * HDF5 backend updated to increase speed of reading/writing variable sized dataset compressed chunks (`#120 `__) `@rlizzo `__ Bug Fixes --------- * Fixed ML Dataloaders errors for a number of edge cases surrounding partial-remote data and non-common keys. (`#110 `__) ( `@hhsecond `__, `@rlizzo `__) Breaking changes ---------------- * New commit hash algorithm is incompatible with repositories written in version 0.2.0 or earlier `v0.2.0`_ (2019-08-09) ====================== New Features ------------ * Numpy memory-mapped array file backend added. (`#70 `__) `@rlizzo `__ * Remote server data backend added. (`#70 `__) `@rlizzo `__ * Selection heuristics to determine appropriate backend from arrayset schema. (`#70 `__) `@rlizzo `__ * Partial remote clones and fetch operations now fully supported. (`#85 `__) `@rlizzo `__ * CLI has been placed under test coverage, added interface usage to docs. (`#85 `__) `@rlizzo `__ * TensorFlow and PyTorch Machine Learning Dataloader Methods (*Experimental Release*). (`#91 `__) lead: `@hhsecond `__, co-author: `@rlizzo `__, reviewed by: `@elistevens `__ Improvements ------------ * Record format versioning and standardization so to not break backwards compatibility in the future. (`#70 `__) `@rlizzo `__ * Backend addition and update developer protocols and documentation. (`#70 `__) `@rlizzo `__ * Read-only checkout arrayset sample ``get`` methods now are multithread and multiprocess safe. (`#84 `__) `@rlizzo `__ * Read-only checkout metadata sample ``get`` methods are thread safe if used within a context manager. (`#101 `__) `@rlizzo `__ * Samples can be assigned integer names in addition to ``string`` names. (`#89 `__) `@rlizzo `__ * Forgetting to close a ``write-enabled`` checkout before terminating the python process will close the checkout automatically for many situations. (`#101 `__) `@rlizzo `__ * Repository software version compatability methods added to ensure upgrade paths in the future. (`#101 `__) `@rlizzo `__ * Many tests added (including support for Mac OSX on Travis-CI). lead: `@rlizzo `__, co-author: `@hhsecond `__ Bug Fixes --------- * Diff results for fast forward merges now returns sensible results. (`#77 `__) `@rlizzo `__ * Many type annotations added, and developer documentation improved. `@hhsecond `__ & `@rlizzo `__ Breaking changes ---------------- * Renamed all references to ``datasets`` in the API / world-view to ``arraysets``. * These are backwards incompatible changes. For all versions > 0.2, repository upgrade utilities will be provided if breaking changes occur. `v0.1.1`_ (2019-05-24) ====================== Bug Fixes --------- * Fixed typo in README which was uploaded to PyPi `v0.1.0`_ (2019-05-24) ====================== New Features ------------ * Remote client-server config negotiation and administrator permissions. (`#10 `__) `@rlizzo `__ * Allow single python process to access multiple repositories simultaneously. (`#20 `__) `@rlizzo `__ * Fast-Forward and 3-Way Merge and Diff methods now fully supported and behaving as expected. (`#32 `__) `@rlizzo `__ Improvements ------------ * Initial test-case specification. (`#14 `__) `@hhsecond `__ * Checkout test-case work. (`#25 `__) `@hhsecond `__ * Metadata test-case work. (`#27 `__) `@hhsecond `__ * Any potential failure cases raise exceptions instead of silently returning. (`#16 `__) `@rlizzo `__ * Many usability improvements in a variety of commits. Bug Fixes --------- * Ensure references to checkout arrayset or metadata objects cannot operate after the checkout is closed. (`#41 `__) `@rlizzo `__ * Sensible exception classes and error messages raised on a variety of situations (Many commits). `@hhsecond `__ & `@rlizzo `__ * Many minor issues addressed. API Additions ------------- * Refer to API documentation (`#23 `__) Breaking changes ---------------- * All repositories written with previous versions of Hangar are liable to break when using this version. Please upgrade versions immediately. `v0.0.0`_ (2019-04-15) ====================== * First Public Release of Hangar! .. _v0.0.0: https://github.com/tensorwerk/hangar-py/commit/2aff3805c66083a7fbb2ebf701ceaf38ac5165c7 .. _v0.1.0: https://github.com/tensorwerk/hangar-py/compare/v0.0.0...v0.1.0 .. _v0.1.1: https://github.com/tensorwerk/hangar-py/compare/v0.1.0...v0.1.1 .. _v0.2.0: https://github.com/tensorwerk/hangar-py/compare/v0.1.1...v0.2.0 .. _v0.3.0: https://github.com/tensorwerk/hangar-py/compare/v0.2.0...v0.3.0 .. _v0.4.0: https://github.com/tensorwerk/hangar-py/compare/v0.3.0...v0.4.0 .. _v0.5.0: https://github.com/tensorwerk/hangar-py/compare/v0.4.0...v0.5.0 .. _v0.5.1: https://github.com/tensorwerk/hangar-py/compare/v0.5.0...v0.5.1 .. _v0.5.2: https://github.com/tensorwerk/hangar-py/compare/v0.5.1...v0.5.2 .. _In-Progress: https://github.com/tensorwerk/hangar-py/compare/v0.5.2...master ================================================ FILE: CODE_OF_CONDUCT.rst ================================================ =========================== Contributor Code of Conduct =========================== Our Pledge ---------- In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. Our Standards ------------- Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting Our Responsibilities -------------------- Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. Scope ----- This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. Enforcement ----------- Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at `hangar.info@tensorwerk.com `__. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. Attribution ----------- This Code of Conduct is adapted from the `Contributor Covenant`_ homepage, version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html .. _Contributor Covenant: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq ================================================ FILE: CONTRIBUTING.rst ================================================ ============ Contributing ============ Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given. All community members should read and abide by our :ref:`ref-code-of-conduct`. Bug reports =========== When `reporting a bug `_ please include: * Your operating system name and version. * Any details about your local setup that might be helpful in troubleshooting. * Detailed steps to reproduce the bug. Documentation improvements ========================== Hangar could always use more documentation, whether as part of the official Hangar docs, in docstrings, or even on the web in blog posts, articles, and such. Feature requests and feedback ============================= The best way to send feedback is to file an issue at https://github.com/tensorwerk/hangar-py/issues. If you are proposing a feature: * Explain in detail how it would work. * Keep the scope as narrow as possible, to make it easier to implement. * Remember that this is a volunteer-driven project, and that code contributions are welcome :) Development =========== To set up `hangar-py` for local development: 1. Fork `hangar-py `_ (look for the "Fork" button). 2. Clone your fork locally:: git clone git@github.com:your_name_here/hangar-py.git 3. Create a branch for local development:: git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally. 4. When you're done making changes, run all the checks, doc builder and spell checker with `tox `_ one command:: tox 5. Commit your changes and push your branch to GitHub:: git add . git commit -m "Your detailed description of your changes." git push origin name-of-your-bugfix-or-feature 6. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- If you need some code review or feedback while you're developing the code just make the pull request. For merging, you should: 1. Include passing tests (run ``tox``) [1]_. 2. Update documentation when there's new API, functionality etc. 3. Add a note to ``CHANGELOG.rst`` about the changes. 4. Add yourself to ``AUTHORS.rst``. .. [1] If you don't have all the necessary python versions available locally you can rely on Travis - it will `run the tests `_ for each change you add in the pull request. It will be slower though ... Tips ---- To run a subset of tests:: tox -e envname -- pytest -k test_myfeature To run all the test environments in *parallel* (you need to ``pip install detox``):: detox ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2019 Richard Izzo Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ graft docs graft src graft tests include .bumpversion.cfg include .coveragerc include .editorconfig include AUTHORS.rst include CHANGELOG.rst include CONTRIBUTING.rst include CODE_OF_CONDUCT.rst include LICENSE include README.rst include tox.ini include mypy.ini include setup.py global-exclude *.py[cod] *.so *.DS_Store global-exclude __pycache__ .mypy_cache .pytest_cache .hypothesis ================================================ FILE: README.rst ================================================ ======== Overview ======== .. start-badges .. list-table:: :stub-columns: 1 * - docs - |docs| * - tests - | |gh-build-status| |codecov| | |lgtm| * - package - | |version| |wheel| |conda-forge| | |supported-versions| |supported-implementations| | |license| .. |docs| image:: https://readthedocs.org/projects/hangar-py/badge/?style=flat :target: https://readthedocs.org/projects/hangar-py :alt: Documentation Status .. |gh-build-status| image:: https://github.com/tensorwerk/hangar-py/workflows/Run%20Test%20Suite/badge.svg?branch=master :alt: Build Status :target: https://github.com/tensorwerk/hangar-py/actions?query=workflow%3A%22Run+Test+Suite%22+branch%3Amaster+event%3Apush+is%3Acompleted .. |codecov| image:: https://codecov.io/gh/tensorwerk/hangar-py/branch/master/graph/badge.svg :alt: Code Coverage :target: https://codecov.io/gh/tensorwerk/hangar-py .. |lgtm| image:: https://img.shields.io/lgtm/grade/python/g/tensorwerk/hangar-py.svg?logo=lgtm&logoWidth=18 :alt: Language grade: Python :target: https://lgtm.com/projects/g/tensorwerk/hangar-py/context:python .. |version| image:: https://img.shields.io/pypi/v/hangar.svg :alt: PyPI Package latest release :target: https://pypi.org/project/hangar .. |license| image:: https://img.shields.io/github/license/tensorwerk/hangar-py :alt: GitHub license :target: https://github.com/tensorwerk/hangar-py/blob/master/LICENSE .. |conda-forge| image:: https://img.shields.io/conda/vn/conda-forge/hangar.svg :alt: Conda-Forge Latest Version :target: https://anaconda.org/conda-forge/hangar .. |wheel| image:: https://img.shields.io/pypi/wheel/hangar.svg :alt: PyPI Wheel :target: https://pypi.org/project/hangar .. |supported-versions| image:: https://img.shields.io/pypi/pyversions/hangar.svg :alt: Supported versions :target: https://pypi.org/project/hangar .. |supported-implementations| image:: https://img.shields.io/pypi/implementation/hangar.svg :alt: Supported implementations :target: https://pypi.org/project/hangar .. end-badges Hangar is version control for tensor data. Commit, branch, merge, revert, and collaborate in the data-defined software era. * Free software: Apache 2.0 license What is Hangar? =============== Hangar is based off the belief that too much time is spent collecting, managing, and creating home-brewed version control systems for data. At its core Hangar is designed to solve many of the same problems faced by traditional code version control system (i.e. ``Git``), just adapted for numerical data: * Time travel through the historical evolution of a dataset * Zero-cost Branching to enable exploratory analysis and collaboration * Cheap Merging to build datasets over time (with multiple collaborators) * Completely abstracted organization and management of data files on disk * Ability to only retrieve a small portion of the data (as needed) while still maintaining complete historical record * Ability to push and pull changes directly to collaborators or a central server (i.e. a truly distributed version control system) The ability of version control systems to perform these tasks for codebases is largely taken for granted by almost every developer today; however, we are in-fact standing on the shoulders of giants, with decades of engineering which has resulted in these phenomenally useful tools. Now that a new era of "Data-Defined software" is taking hold, we find there is a strong need for analogous version control systems which are designed to handle numerical data at large scale... Welcome to Hangar! The Hangar Workflow: :: Checkout Branch | ▼ Create/Access Data | ▼ Add/Remove/Update Samples | ▼ Commit Log Style Output: .. code-block:: text * 5254ec (master) : merge commit combining training updates and new validation samples |\ | * 650361 (add-validation-data) : Add validation labels and image data in isolated branch * | 5f15b4 : Add some metadata for later reference and add new training samples received after initial import |/ * baddba : Initial commit adding training images and labels Learn more about what Hangar is all about at https://hangar-py.readthedocs.io/ Installation ============ Hangar is in early alpha development release! :: pip install hangar Documentation ============= https://hangar-py.readthedocs.io/ Development =========== To run the all tests run:: tox Note, to combine the coverage data from all the tox environments run: .. list-table:: :widths: 10 90 :stub-columns: 1 - - Windows - :: set PYTEST_ADDOPTS=--cov-append tox - - Other - :: PYTEST_ADDOPTS=--cov-append tox ================================================ FILE: asv_bench/README.rst ================================================ Hangar Performance Benchmarking Suite ===================================== A set of benchmarking tools are included in order to track the performance of common hangar operations over the course of time. The benchmark suite is run via the phenomenal `Airspeed Velocity (ASV) `_ project. Benchmarks can be viewed at the following web link, or by examining the raw data files in the separate benchmark results repo. - `Benchmark Web View `_ - `Benchmark Results Repo `_ .. figure:: ../docs/img/asv-detailed.png :align: center Purpose ******* In addition to providing historical metrics and insight into application performance over many releases of Hangar, *the benchmark suite is used as a canary to identify potentially problematic pull requests.* All PRs to the Hangar repository are automatically benchmarked by our CI system to compare the performance of proposed changes to that of the current ``master`` branch. *The results of this canary are explicitly NOT to be used as the "be-all-end-all" decider of whether a PR is suitable to be merged or not.* Instead, it is meant to serve the following purposes: 1. **Help contributors understand the consequences of some set of changes on the greater system early in the PR process.** Simple code is best; if there's no obvious performance degradation or significant improvement to be had, then there's no need (or really rationale) for using more complex algorithms or data structures. It's more work for the author, project maintainers, and long term health of the codebase. 2. **Not everything can be caught by the capabilities of a traditional test suite.** Hangar is fairly flat/modular in structure, but there are certain hotspots in the codebase where a simple change could drastically degrade performance. It's not always obvious where these hotspots are, and even a change which is functionally identical (introducing no issues/bugs to the end user) can unknowingly cross a line and introduce some large regression completely unnoticed to the authors/reviewers. 3. Sometimes tradeoffs need to be made when introducing something new to a system. Whether this be due to fundamental CS problems (space vs. time) or simple matters of practicality vs. purity, it's always easier to act in environments where relevant information is available before a decision is made. **Identifying and quantifying tradeoffs/regressions/benefits during development is the only way we can make informed decisions.** The only times to be OK with some regression is when knowing about it in advance, it might be the right choice at the time, but if we don't measure we will never know. Important Notes on Using/Modifying the Benchmark Suite ****************************************************** 1. **Do not commit any of the benchmark results, environment files, or generated visualizations to the repository**. We store benchmark results in a `separate repository `_ so to not clutter the main repo with un-necessary data. The default directories these are generated in are excluded in our ``.gitignore`` config, so baring some unusual git usage patterns, this should not be a day-to-day concern. 2. Proposed changes to the benchmark suite should be made to the code in this repository first. The benchmark results repository mirror will be synchronized upon approval/merge of changes to the main Hangar repo. Introduction to Running Benchmarks ********************************** As ASV sets up and manages it's own virtual environments and source installations, benchmark execution is not run via ``tox``. While a brief tutorial is included below, please refer to the `ASV Docs `_ for detailed information on how to both run, understand, and write ASV benchmarks. First Time Setup ---------------- 1. Ensure that ``virtualenv``, ``setuptools``, ``pip`` are updated to the latest version. 2. Install ASV ``$ pip install asv``. 3. Open a terminal and navigate to the ``hangar-py/asv-bench`` directory. 4. Run ``$ asv machine`` to record details of your machine, it is OK to just use the defaults. Running Benchmarks ------------------ Refer to the `using ASV `_ page for a full tutorial, paying close attention to the `asv run `_ command. Generally ``asv run`` requires a range of commits to benchmark across (specified via either branch name, tags, or commit digests). To benchmark every commit between the current master ``HEAD`` and ``v0.3.0``, you would execute:: $ asv run v0.2.0..master However, this may result in a larger workload then you are willing to wait around for. To limit the number of commits, you can specify the ``--steps=N`` option to only benchmark ``N`` commits at most between ``HEAD`` and ``v0.3.0``. The most useful tool during development is the `asv continuous `_ command. using the following syntax will benchmark any changes in a local development branch against the base ``master`` commit:: $ asv continuous origin/master HEAD Running `asv compare `_ will generate a quick summary of any performance differences:: $ asv compare origin/master HEAD Visualizing Results ------------------- After generating benchmark data for a number of commits through history, the results can be reviewed in (an automatically generated) local web interface by running the following commands:: $ asv publish $ asv preview Navigating to ``http://127.0.0.1:8080/`` will pull up an interactive webpage where the full set of benchmark graphs/explorations utilities can be viewed. This will look something like the image below. .. figure:: ../docs/img/asv-main.png :align: center ================================================ FILE: asv_bench/asv.conf.json ================================================ { // The version of the config file format. Do not change, unless // you know what you are doing. "version": 1, // The name of the project being benchmarked "project": "hangar", // The project's homepage "project_url": "https://hangar-py.readthedocs.io", // The URL or local path of the source code repository for the // project being benchmarked "repo": "..", // The Python project's subdirectory in your repo. If missing or // the empty string, the project is assumed to be located at the root // of the repository. // "repo_subdir": "", // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"], // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], // "build_command": [ // "python setup.py build", // "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" // ], // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). "branches": ["master"], // for git // "branches": ["default"], // for mercurial // The DVCS being used. If not set, it will be automatically // determined from "repo" by looking at the protocol in the URL // (if remote), or by looking for special directories, such as // ".git" (if local). "dvcs": "git", // The tool to use to create environments. May be "conda", // "virtualenv" or other value depending on the plugins in use. // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. "environment_type": "virtualenv", // timeout in seconds for installing any dependencies in environment // defaults to 10 min //"install_timeout": 600, // the base URL to show a commit for the project. "show_commit_url": "http://github.com/tensorwerk/hangar-py/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["3.7"], // The list of conda channel names to be searched for benchmark // dependency packages in the specified order // "conda_channels": ["conda-forge", "defaults"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty // list or empty string indicates to just test against the default // (latest) version. null indicates that the package is to not be // installed. If the package to be tested is only available from // PyPi, and the 'environment_type' is conda, then you can preface // the package name by 'pip+', and the package will be installed via // pip (with all the conda available packages installed first, // followed by the pip installed packages). // // "matrix": { // "numpy": ["1.6", "1.7"], // "six": ["", null], // test with and without six installed // "pip+emcee": [""], // emcee is only available for install with pip. // }, "matrix": { "req": { "Cython": [], // latest version of Cython }, }, // Combinations of libraries/python versions can be excluded/included // from the set to test. Each entry is a dictionary containing additional // key-value pairs to include/exclude. // // An exclude entry excludes entries where all values match. The // values are regexps that should match the whole string. // // An include entry adds an environment. Only the packages listed // are installed. The 'python' key is required. The exclude rules // do not apply to includes. // // In addition to package names, the following keys are available: // // - python // Python version, as in the *pythons* variable above. // - environment_type // Environment type, as above. // - sys_platform // Platform, as in sys.platform. Possible values for the common // cases: 'linux2', 'win32', 'cygwin', 'darwin'. // // "exclude": [ // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows // {"environment_type": "conda", "six": null}, // don't run without six on conda // ], // // "include": [ // // additional env for python2.7 // {"python": "2.7", "numpy": "1.8"}, // // additional env if run on windows+conda // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, // ], // The directory (relative to the current directory) that benchmarks are // stored in. If not provided, defaults to "benchmarks" "benchmark_dir": "benchmarks", // The directory (relative to the current directory) to cache the Python // environments in. If not provided, defaults to "env" "env_dir": "env", // The directory (relative to the current directory) that raw benchmark // results are stored in. If not provided, defaults to "results". "results_dir": "results", // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". "html_dir": "html", // The number of characters to retain in the commit hashes. "hash_length": 8, // `asv` will cache results of the recent builds in each // environment, making them faster to install next time. This is // the number of builds to keep, per environment. "build_cache_size": 2 // The commits after which the regression search in `asv publish` // should start looking for regressions. Dictionary whose keys are // regexps matching to benchmark names, and values corresponding to // the commit (exclusive) after which to start looking for // regressions. The default is to start from the first commit // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // // "regressions_first_commits": { // "some_benchmark": "352cdf", // Consider regressions only after this commit // "another_benchmark": null, // Skip regression detection altogether // }, // The thresholds for relative change in results, after which `asv // publish` starts reporting regressions. Dictionary of the same // form as in ``regressions_first_commits``, with values // indicating the thresholds. If multiple entries match, the // maximum is taken. If no entry matches, the default is 5%. // // "regressions_thresholds": { // "some_benchmark": 0.01, // Threshold of 1% // "another_benchmark": 0.5, // Threshold of 50% // }, } ================================================ FILE: asv_bench/benchmarks/__init__.py ================================================ ================================================ FILE: asv_bench/benchmarks/backend_comparisons.py ================================================ # Write the benchmarking functions here. # See "Writing benchmarks" in the asv docs for more information. import numpy as np import os from hangar import Repository from tempfile import mkdtemp from shutil import rmtree from hangar.utils import folder_size # ------------------------- fixture functions ---------------------------------- class _WriterSuite: params = ['hdf5_00', 'hdf5_01', 'numpy_10'] param_names = ['backend'] processes = 2 repeat = (2, 4, 30.0) # repeat == tuple (min_repeat, max_repeat, max_time) number = 2 warmup_time = 0 def setup(self, backend): # self.method self.current_iter_number = 0 self.backend_code = { 'numpy_10': '10', 'hdf5_00': '00', 'hdf5_01': '01', } # self.num_samples self.sample_shape = (50, 50, 20) self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', 'foo@test.bar', remove_old=True) self.co = self.repo.checkout(write=True) component_arrays = [] ndims = len(self.sample_shape) for idx, shape in enumerate(self.sample_shape): layout = [1 for i in range(ndims)] layout[idx] = shape component = np.hamming(shape).reshape(*layout) * 100 component_arrays.append(component.astype(np.float32)) self.arr = np.prod(component_arrays).astype(np.float32) try: self.aset = self.co.arraysets.init_arrayset( 'aset', prototype=self.arr, backend_opts=self.backend_code[backend]) except TypeError: try: self.aset = self.co.arraysets.init_arrayset( 'aset', prototype=self.arr, backend=self.backend_code[backend]) except ValueError: raise NotImplementedError except ValueError: raise NotImplementedError except AttributeError: self.aset = self.co.add_ndarray_column( 'aset', prototype=self.arr, backend=self.backend_code[backend]) def teardown(self, backend): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def write(self, backend): arr = self.arr iter_number = self.current_iter_number with self.aset as cm_aset: for i in range(self.num_samples): arr[iter_number, iter_number, iter_number] += 1 cm_aset[i] = arr self.current_iter_number += 1 # ----------------------------- Writes ---------------------------------------- class Write_50by50by20_300_samples(_WriterSuite): method = 'write' num_samples = 300 time_write = _WriterSuite.write # ----------------------------- Reads ----------------------------------------- class _ReaderSuite: params = ['hdf5_00', 'hdf5_01', 'numpy_10'] param_names = ['backend'] processes = 2 repeat = (2, 4, 30.0) # repeat == tuple (min_repeat, max_repeat, max_time) number = 3 warmup_time = 0 timeout = 60 def setup_cache(self): backend_code = { 'numpy_10': '10', 'hdf5_00': '00', 'hdf5_01': '01', } sample_shape = (50, 50, 10) num_samples = 3_000 repo = Repository(path=os.getcwd(), exists=False) repo.init('tester', 'foo@test.bar', remove_old=True) co = repo.checkout(write=True) component_arrays = [] ndims = len(sample_shape) for idx, shape in enumerate(sample_shape): layout = [1 for i in range(ndims)] layout[idx] = shape component = np.hamming(shape).reshape(*layout) * 100 component_arrays.append(component.astype(np.float32)) arr = np.prod(component_arrays).astype(np.float32) for backend, code in backend_code.items(): try: co.arraysets.init_arrayset( backend, prototype=arr, backend_opts=code) except TypeError: try: co.arraysets.init_arrayset( backend, prototype=arr, backend=code) except ValueError: pass except ValueError: pass except AttributeError: co.add_ndarray_column(backend, prototype=arr, backend=code) try: col = co.columns except AttributeError: col = co.arraysets with col as asets_cm: for aset in asets_cm.values(): changer = 0 for i in range(num_samples): arr[changer, changer, changer] += 1 aset[i] = arr changer += 1 co.commit('first commit') co.close() repo._env._close_environments() def setup(self, backend): self.repo = Repository(path=os.getcwd(), exists=True) self.co = self.repo.checkout(write=False) try: try: self.aset = self.co.columns[backend] except AttributeError: self.aset = self.co.arraysets[backend] except KeyError: raise NotImplementedError def teardown(self, backend): self.co.close() self.repo._env._close_environments() def read(self, backend): with self.aset as cm_aset: for i in cm_aset.keys(): arr = cm_aset[i] class Read_50by50by10_3000_samples(_ReaderSuite): method = 'read' num_samples = 3000 time_read = _ReaderSuite.read ================================================ FILE: asv_bench/benchmarks/backends/__init__.py ================================================ ================================================ FILE: asv_bench/benchmarks/backends/hdf5_00.py ================================================ # Write the benchmarking functions here. # See "Writing benchmarks" in the asv docs for more information. import numpy as np from hangar import Repository from tempfile import mkdtemp from shutil import rmtree from hangar.utils import folder_size class _WriterSuite_HDF5_00: processes = 2 repeat = (2, 4, 20.0) # repeat == tuple (min_repeat, max_repeat, max_time) number = 2 warmup_time = 0 def setup(self): # self.method # self.num_samples # self.sample_shape self.current_iter_number = 0 self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', 'foo@test.bar', remove_old=True) self.co = self.repo.checkout(write=True) component_arrays = [] ndims = len(self.sample_shape) for idx, shape in enumerate(self.sample_shape): layout = [1 for i in range(ndims)] layout[idx] = shape component = np.hamming(shape).reshape(*layout) * 100 component_arrays.append(component.astype(np.float32)) arr = np.prod(component_arrays).astype(np.float32) try: self.aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='00') except TypeError: self.aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='00') except ValueError: # marks as skipped benchmark for commits which do not have this backend. raise NotImplementedError except AttributeError: self.aset = self.co.add_ndarray_column('aset', prototype=arr, backend='00') if self.method == 'read': with self.aset as cm_aset: for i in range(self.num_samples): arr[0, 0, 0] += 1 cm_aset[i] = arr self.co.commit('first commit') self.co.close() self.co = self.repo.checkout(write=False) try: self.aset = self.co.columns['aset'] except AttributeError: self.aset = self.co.arraysets['aset'] else: self.arr = arr def teardown(self): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def read(self): with self.aset as cm_aset: for k in cm_aset.keys(): arr = cm_aset[k] def write(self): arr = self.arr iter_num = self.current_iter_number with self.aset as cm_aset: for i in range(self.num_samples): arr[iter_num, iter_num, iter_num] += 1 cm_aset[i] = arr self.current_iter_number += 1 def size(self): return folder_size(self.repo._env.repo_path, recurse=True) class Write_50by50by10_1_samples(_WriterSuite_HDF5_00): method = 'write' sample_shape = (50, 50, 10) num_samples = 1 time_write = _WriterSuite_HDF5_00.write class Write_50by50by10_100_samples(_WriterSuite_HDF5_00): method = 'write' sample_shape = (50, 50, 10) num_samples = 100 time_write = _WriterSuite_HDF5_00.write # ----------------------------- Reads ----------------------------------------- class Read_50by50by10_1_samples(_WriterSuite_HDF5_00): method = 'read' sample_shape = (50, 50, 10) num_samples = 1 time_read = _WriterSuite_HDF5_00.read class Read_50by50by10_100_samples(_WriterSuite_HDF5_00): method = 'read' sample_shape = (50, 50, 10) num_samples = 100 time_read = _WriterSuite_HDF5_00.read class Read_50by50by10_300_samples(_WriterSuite_HDF5_00): method = 'read' sample_shape = (50, 50, 10) num_samples = 300 time_read = _WriterSuite_HDF5_00.read track_repo_size = _WriterSuite_HDF5_00.size track_repo_size.unit = 'bytes' ================================================ FILE: asv_bench/benchmarks/backends/hdf5_01.py ================================================ # Write the benchmarking functions here. # See "Writing benchmarks" in the asv docs for more information. import numpy as np from hangar import Repository from tempfile import mkdtemp from shutil import rmtree from hangar.utils import folder_size class _WriterSuite_HDF5_01: processes = 2 repeat = (2, 4, 20.0) # repeat == tuple (min_repeat, max_repeat, max_time) number = 2 warmup_time = 0 def setup(self): # self.method # self.num_samples # self.sample_shape self.current_iter_number = 0 self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', 'foo@test.bar', remove_old=True) self.co = self.repo.checkout(write=True) component_arrays = [] ndims = len(self.sample_shape) for idx, shape in enumerate(self.sample_shape): layout = [1 for i in range(ndims)] layout[idx] = shape component = np.hamming(shape).reshape(*layout) * 100 component_arrays.append(component.astype(np.float32)) arr = np.prod(component_arrays).astype(np.float32) try: self.aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='01') except TypeError: try: self.aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='01') except ValueError: raise NotImplementedError except ValueError: # marks as skipped benchmark for commits which do not have this backend. raise NotImplementedError except AttributeError: self.aset = self.co.add_ndarray_column('aset', prototype=arr, backend='01') if self.method == 'read': with self.aset as cm_aset: for i in range(self.num_samples): arr[0, 0, 0] += 1 cm_aset[i] = arr self.co.commit('first commit') self.co.close() self.co = self.repo.checkout(write=False) try: self.aset = self.co.columns['aset'] except AttributeError: self.aset = self.co.arraysets['aset'] else: self.arr = arr def teardown(self): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def read(self): with self.aset as cm_aset: for k in cm_aset.keys(): arr = cm_aset[k] def write(self): arr = self.arr iter_num = self.current_iter_number with self.aset as cm_aset: for i in range(self.num_samples): arr[iter_num, iter_num, iter_num] += 1 cm_aset[i] = arr self.current_iter_number += 1 def size(self): return folder_size(self.repo._env.repo_path, recurse=True) class Write_50by50by10_1_samples(_WriterSuite_HDF5_01): method = 'write' sample_shape = (50, 50, 10) num_samples = 1 time_write = _WriterSuite_HDF5_01.write class Write_50by50by10_100_samples(_WriterSuite_HDF5_01): method = 'write' sample_shape = (50, 50, 10) num_samples = 100 time_write = _WriterSuite_HDF5_01.write # ----------------------------- Reads ----------------------------------------- class Read_50by50by10_1_samples(_WriterSuite_HDF5_01): method = 'read' sample_shape = (50, 50, 10) num_samples = 1 time_read = _WriterSuite_HDF5_01.read class Read_50by50by10_100_samples(_WriterSuite_HDF5_01): method = 'read' sample_shape = (50, 50, 10) num_samples = 100 time_read = _WriterSuite_HDF5_01.read class Read_50by50by10_300_samples(_WriterSuite_HDF5_01): method = 'read' sample_shape = (50, 50, 10) num_samples = 300 time_read = _WriterSuite_HDF5_01.read track_repo_size = _WriterSuite_HDF5_01.size track_repo_size.unit = 'bytes' ================================================ FILE: asv_bench/benchmarks/backends/numpy_10.py ================================================ # Write the benchmarking functions here. # See "Writing benchmarks" in the asv docs for more information. import numpy as np from hangar import Repository from tempfile import mkdtemp from shutil import rmtree from hangar.utils import folder_size class _WriterSuite_NUMPY_10: processes = 2 repeat = (2, 4, 20.0) # repeat == tuple (min_repeat, max_repeat, max_time) number = 2 warmup_time = 0 def setup(self): # self.method # self.num_samples # self.sample_shape self.current_iter_number = 0 self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', 'foo@test.bar', remove_old=True) self.co = self.repo.checkout(write=True) component_arrays = [] ndims = len(self.sample_shape) for idx, shape in enumerate(self.sample_shape): layout = [1 for i in range(ndims)] layout[idx] = shape component = np.hamming(shape).reshape(*layout) * 100 component_arrays.append(component.astype(np.float32)) arr = np.prod(component_arrays).astype(np.float32) try: self.aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='10') except TypeError: self.aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='10') except ValueError: # marks as skipped benchmark for commits which do not have this backend. raise NotImplementedError except AttributeError: self.aset = self.co.add_ndarray_column('aset', prototype=arr, backend='10') if self.method == 'read': with self.aset as cm_aset: for i in range(self.num_samples): arr[0, 0, 0] += 1 cm_aset[i] = arr self.co.commit('first commit') self.co.close() self.co = self.repo.checkout(write=False) try: self.aset = self.co.columns['aset'] except AttributeError: self.aset = self.co.arraysets['aset'] else: self.arr = arr def teardown(self): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def read(self): with self.aset as cm_aset: for k in cm_aset.keys(): arr = cm_aset[k] def write(self): arr = self.arr iter_num = self.current_iter_number with self.aset as cm_aset: for i in range(self.num_samples): arr[iter_num, iter_num, iter_num] += 1 cm_aset[i] = arr self.current_iter_number += 1 def size(self): return folder_size(self.repo._env.repo_path, recurse=True) class Write_50by50by10_1_samples(_WriterSuite_NUMPY_10): method = 'write' sample_shape = (50, 50, 10) num_samples = 1 time_write = _WriterSuite_NUMPY_10.write class Write_50by50by10_100_samples(_WriterSuite_NUMPY_10): method = 'write' sample_shape = (50, 50, 10) num_samples = 100 time_write = _WriterSuite_NUMPY_10.write # ----------------------------- Reads ----------------------------------------- class Read_50by50by10_1_samples(_WriterSuite_NUMPY_10): method = 'read' sample_shape = (50, 50, 10) num_samples = 1 time_read = _WriterSuite_NUMPY_10.read class Read_50by50by10_100_samples(_WriterSuite_NUMPY_10): method = 'read' sample_shape = (50, 50, 10) num_samples = 100 time_read = _WriterSuite_NUMPY_10.read class Read_50by50by10_300_samples(_WriterSuite_NUMPY_10): method = 'read' sample_shape = (50, 50, 10) num_samples = 300 time_read = _WriterSuite_NUMPY_10.read track_repo_size = _WriterSuite_NUMPY_10.size track_repo_size.unit = 'bytes' ================================================ FILE: asv_bench/benchmarks/commit_and_checkout.py ================================================ from tempfile import mkdtemp from shutil import rmtree import numpy as np from hangar import Repository class MakeCommit(object): params = (5_000, 20_000, 50_000) param_names = ['num_samples'] processes = 2 repeat = (2, 4, 20) number = 1 warmup_time = 0 def setup(self, num_samples): self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', 'foo@test.bar', remove_old=True) self.co = self.repo.checkout(write=True) arr = np.array([0,], dtype=np.uint8) try: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='10') except TypeError: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='10') except AttributeError: aset = self.co.add_ndarray_column('aset', prototype=arr, backend='10') with aset as cm_aset: for i in range(num_samples): arr[:] = i % 255 cm_aset[i] = arr def teardown(self, num_samples): self.co.close() self.repo._env._close_environments() rmtree(self.tmpdir) def time_commit(self, num_samples): self.co.commit('hello') class CheckoutCommit(object): params = (5_000, 20_000, 50_000) param_names = ['num_samples'] processes = 2 number = 1 repeat = (2, 4, 20) warmup_time = 0 def setup(self, num_samples): self.tmpdir = mkdtemp() self.repo = Repository(path=self.tmpdir, exists=False) self.repo.init('tester', 'foo@test.bar', remove_old=True) self.co = self.repo.checkout(write=True) arr = np.array([0,], dtype=np.uint8) try: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend_opts='10') except TypeError: aset = self.co.arraysets.init_arrayset('aset', prototype=arr, backend='10') except AttributeError: aset = self.co.add_ndarray_column('aset', prototype=arr, backend='10') with aset as cm_aset: for i in range(num_samples): arr[:] = i % 255 cm_aset[i] = arr self.co.commit('first') self.co.close() self.co = None def teardown(self, num_samples): try: self.co.close() except PermissionError: pass self.repo._env._close_environments() rmtree(self.tmpdir) def time_checkout_read_only(self, num_samples): self.co = self.repo.checkout(write=False) def time_checkout_write_enabled(self, num_samples): self.co = self.repo.checkout(write=True) self.co.close() ================================================ FILE: asv_bench/benchmarks/package.py ================================================ class TimeImport(object): processes = 2 repeat = (5, 10, 10.0) def timeraw_import(self): return """ from hangar import Repository """ ================================================ FILE: codecov.yml ================================================ comment: layout: "diff, files" behavior: default require_changes: false # if true: only post the comment if coverage changes coverage: range: 60..100 round: nearest precision: 2 ================================================ FILE: docs/Tutorial-001.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Part 1: Creating A Repository And Working With Data\n", "\n", "This tutorial will review the first steps of working with a hangar repository.\n", "\n", "To fit with the beginner's theme, we will use the MNIST dataset. Later examples will show off how to work with much more complex data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from hangar import Repository\n", "\n", "import numpy as np\n", "import pickle\n", "import gzip\n", "import matplotlib.pyplot as plt\n", "\n", "from tqdm import tqdm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating & Interacting with a Hangar Repository\n", "\n", "Hangar is designed to “just make sense” in every operation you have to perform.\n", "As such, there is a single interface which all interaction begins with: the\n", " designed to “just make sense” in every operation you have to perform.\n", "As such, there is a single interface which all interaction begins with: the\n", "[Repository](api.rst#hangar.repository.Repository) object.\n", "\n", "Whether a hangar repository exists at the path you specify or not, just tell\n", "hangar where it should live!\n", "\n", "#### Intitializing a repository\n", "\n", "The first time you want to work with a new repository, the repository\n", "[init()](api.rst#hangar.repository.Repository.init) method\n", "must be called. This is where you provide Hangar with your name and email\n", "address (to be used in the commit log), as well as implicitly confirming that\n", "you do want to create the underlying data files hangar uses on disk." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: /Users/rick/projects/tensorwerk/hangar/dev/mnist/.hangar\n" ] }, { "data": { "text/plain": [ "'/Users/rick/projects/tensorwerk/hangar/dev/mnist/.hangar'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo = Repository(path='/Users/rick/projects/tensorwerk/hangar/dev/mnist/')\n", "\n", "# First time a repository is accessed only!\n", "# Note: if you feed a path to the `Repository` which does not contain a pre-initialized hangar repo,\n", "# when the Repository object is initialized it will let you know that you need to run `init()`\n", "\n", "repo.init(user_name='Rick Izzo', user_email='rick@tensorwerk.com', remove_old=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Checking out the repo for writing\n", "\n", "A repository can be checked out in two modes:\n", "\n", "1. [write-enabled](api.rst#hangar.checkout.WriterCheckout): applies all operations to the staging area’s current\n", " state. Only one write-enabled checkout can be active at a different time,\n", " must be closed upon last use, or manual intervention will be needed to remove\n", " the writer lock.\n", "\n", "2. [read-only](api.rst#read-only-checkout): checkout a commit or branch to view repository state as it\n", " existed at that point in time.\n", "\n", "#### Lots of useful information is in the iPython `__repr__`\n", "\n", "If you're ever in doubt about what the state of the object your working\n", "on is, just call its reps, and the most relevant information will be\n", "sent to your screen!" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar WriterCheckout \n", " Writer : True \n", " Base Branch : master \n", " Num Columns : 0\n" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co = repo.checkout(write=True)\n", "co" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### A checkout allows access to [columns](api.rst#hangar.columns.column.Columns)\n", "\n", "The [columns](api.rst#hangar.checkout.WriterCheckout.columns) attributes\n", "of a checkout provide the interface to working with all of the data on disk!" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 0 \n", " Column Names / Partial Remote References: \n", " - " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Before data can be added to a repository, a column must be initialized.\n", "\n", "We're going to first load up a the MNIST pickled dataset so it can be added to\n", "the repo!" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Load the dataset\n", "with gzip.open('/Users/rick/projects/tensorwerk/hangar/dev/data/mnist.pkl.gz', 'rb') as f:\n", " train_set, valid_set, test_set = pickle.load(f, encoding='bytes')\n", "\n", "def rescale(array):\n", " array = array * 256\n", " rounded = np.round(array)\n", " return rounded.astype(np.uint8())\n", "\n", "sample_trimg = rescale(train_set[0][0])\n", "sample_trlabel = np.array([train_set[1][0]])\n", "trimgs = rescale(train_set[0])\n", "trlabels = train_set[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Before data can be added to a repository, a column must be initialized.\n", "\n", "An \"Column\" is a named grouping of data samples where each sample shares a\n", "number of similar attributes and array properties.\n", "\n", "See the docstrings below or in [add_ndarray_column()](api.rst#hangar.checkout.WriterCheckout.add_ndarray_column)\n", "\n", ".. include:: ./noindexapi/apiinit.rst" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "col = co.add_ndarray_column(name='mnist_training_images', prototype=trimgs[0])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : mnist_training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (784,) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Interaction\n", "\n", "#### Through columns attribute\n", "\n", "When a column is initialized, a column accessor object will be returned,\n", "however, depending on your use case, this may or may not be the most convenient\n", "way to access a arrayset.\n", "\n", "In general, we have implemented a full `dict` mapping interface on top of all\n", "objects. To access the `'mnist_training_images'` arrayset you can just use a\n", "dict style access like the following (note: if operating in iPython/Jupyter, the\n", "arrayset keys will autocomplete for you).\n", "\n", "The column objects returned here contain many useful instrospecion methods which\n", "we will review over the rest of the tutorial." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : mnist_training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (784,) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['mnist_training_images']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : mnist_training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (784,) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_aset = co.columns['mnist_training_images']\n", "\n", "# OR an equivalent way using the `.get()` method\n", "\n", "train_aset = co.columns.get('mnist_training_images')\n", "train_aset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Through the checkout object (arrayset and sample access)\n", "\n", "In addition to the standard `co.columns` access methods, we have implemented a convenience mapping to [columns](api.rst#hangar.columns.column.Columns) and [flat samples](api.rst#hangar.columns.layout_flat.FlatSampleWriter) or [nested samples](api.rst#hangar.columns.layout_nested.NestedSampleWriter) / [nested subsamples](api.rst#hangar.columns.layout_nested.FlatSubsampleWriter) (ie. data) for both reading and writing from the [checkout](api.rst#hangar.checkout.WriterCheckout) object itself.\n", "\n", "To get the same arrayset object from the checkout, simply use:" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : mnist_training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (784,) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_asets = co['mnist_training_images']\n", "train_asets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Though that works as expected, most use cases will take advantage of adding and reading data from multiple columns / samples at a time. This is shown in the next section." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Adding Data\n", "\n", "To add data to a named arrayset, we can use dict-style setting\n", "(refer to the `__setitem__`, `__getitem__`, and `__delitem__` methods),\n", "or the `update()` method. Sample keys can be either `str` or `int` type." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "train_aset['0'] = trimgs[0]\n", "\n", "data = {\n", " '1': trimgs[1],\n", " '2': trimgs[2],\n", "}\n", "train_aset.update(data)\n", "\n", "train_aset[51] = trimgs[51]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Using the checkout method" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "co['mnist_training_images', 60] = trimgs[60]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### How many samples are in the arrayset?" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_aset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Containment Testing" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'hi' in train_aset" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'0' in train_aset" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "60 in train_aset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Dictionary Style Retrieval for known keys" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "True\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAOYElEQVR4nO3dbYxc5XnG8euKbUwxJvHGseMQFxzjFAg0Jl0ZkBFQoVCCIgGKCLGiiFBapwlOQutKUFoVWtHKrRIiSimSKS6m4iWQgPAHmsSyECRqcFmoAROHN+MS4+0aswIDIfZ6fffDjqsFdp5dZs68eO//T1rNzLnnzLk1cPmcmeeceRwRAjD5faDTDQBoD8IOJEHYgSQIO5AEYQeSmNrOjR3i6XGoZrRzk0Aqv9Fb2ht7PFatqbDbPkfS9ZKmSPrXiFhVev6hmqGTfVYzmwRQsDE21K01fBhve4qkGyV9TtLxkpbZPr7R1wPQWs18Zl8i6fmI2BoReyXdJem8atoCULVmwn6kpF+Nery9tuwdbC+33We7b0h7mtgcgGY0E/axvgR4z7m3EbE6InojoneapjexOQDNaCbs2yXNH/X445J2NNcOgFZpJuyPSlpke4HtQyR9SdK6atoCULWGh94iYp/tFZJ+rJGhtzUR8XRlnQGoVFPj7BHxgKQHKuoFQAtxuiyQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJNDWLK7qfp5b/E0/5yOyWbv+ZPz+6bm34sP3FdY9auLNYP+wbLtb/97pD6tYe7/1+cd1dw28V6yffs7JYP+bPHinWO6GpsNveJukNScOS9kVEbxVNAaheFXv234+IXRW8DoAW4jM7kESzYQ9JP7H9mO3lYz3B9nLbfbb7hrSnyc0BaFSzh/FLI2KH7TmS1tv+ZUQ8PPoJEbFa0mpJOsI90eT2ADSoqT17ROyo3e6UdJ+kJVU0BaB6DYfd9gzbMw/cl3S2pM1VNQagWs0cxs+VdJ/tA69zR0T8qJKuJpkpxy0q1mP6tGJ9xxkfKtbfPqX+mHDPB8vjxT/9dHm8uZP+49czi/V/+OdzivWNJ95Rt/bi0NvFdVcNfLZY/9hPD75PpA2HPSK2Svp0hb0AaCGG3oAkCDuQBGEHkiDsQBKEHUiCS1wrMHzmZ4r16269sVj/5LT6l2JOZkMxXKz/9Q1fLdanvlUe/jr1nhV1azNf3ldcd/qu8tDcYX0bi/VuxJ4dSIKwA0kQdiAJwg4kQdiBJAg7kARhB5JgnL0C05/ZUaw/9pv5xfonpw1U2U6lVvafUqxvfbP8U9S3LvxB3drr+8vj5HP/6T+L9VY6+C5gHR97diAJwg4kQdiBJAg7kARhB5Ig7EAShB1IwhHtG1E8wj1xss9q2/a6xeAlpxbru88p/9zzlCcPL9af+MYN77unA67d9bvF+qNnlMfRh197vViPU+v/APG2bxVX1YJlT5SfgPfYGBu0OwbHnMuaPTuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJME4exeYMvvDxfrwq4PF+ot31B8rf/r0NcV1l/z9N4v1OTd27ppyvH9NjbPbXmN7p+3No5b12F5v+7na7awqGwZQvYkcxt8q6d2z3l8paUNELJK0ofYYQBcbN+wR8bCkdx9Hnidpbe3+WknnV9wXgIo1+gXd3Ijol6Ta7Zx6T7S93Haf7b4h7WlwcwCa1fJv4yNidUT0RkTvNE1v9eYA1NFo2Adsz5Ok2u3O6loC0AqNhn2dpItr9y+WdH817QBolXF/N972nZLOlDTb9nZJV0taJelu25dKeknSha1scrIb3vVqU+sP7W58fvdPffkXxforN00pv8D+8hzr6B7jhj0iltUpcXYMcBDhdFkgCcIOJEHYgSQIO5AEYQeSYMrmSeC4K56tW7vkxPKgyb8dtaFYP+PCy4r1md9/pFhH92DPDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJMM4+CZSmTX7168cV131p3dvF+pXX3las/8UXLyjW478/WLc2/+9+XlxXbfyZ8wzYswNJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEkzZnNzgH55arN9+9XeK9QVTD21425+6bUWxvujm/mJ939ZtDW97smpqymYAkwNhB5Ig7EAShB1IgrADSRB2IAnCDiTBODuKYuniYv2IVduL9Ts/8eOGt33sg39UrP/O39S/jl+Shp/b2vC2D1ZNjbPbXmN7p+3No5ZdY/tl25tqf+dW2TCA6k3kMP5WSeeMsfx7EbG49vdAtW0BqNq4YY+IhyUNtqEXAC3UzBd0K2w/WTvMn1XvSbaX2+6z3TekPU1sDkAzGg37TZIWSlosqV/Sd+s9MSJWR0RvRPRO0/QGNwegWQ2FPSIGImI4IvZLulnSkmrbAlC1hsJue96ohxdI2lzvuQC6w7jj7LbvlHSmpNmSBiRdXXu8WFJI2ibpaxFRvvhYjLNPRlPmzinWd1x0TN3axiuuL677gXH2RV9+8exi/fXTXi3WJ6PSOPu4k0RExLIxFt/SdFcA2orTZYEkCDuQBGEHkiDsQBKEHUiCS1zRMXdvL0/ZfJgPKdZ/HXuL9c9/8/L6r33fxuK6Byt+ShoAYQeyIOxAEoQdSIKwA0kQdiAJwg4kMe5Vb8ht/2nln5J+4cLylM0nLN5WtzbeOPp4bhg8qVg/7P6+pl5/smHPDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJMM4+ybn3hGL92W+Vx7pvXrq2WD/90PI15c3YE0PF+iODC8ovsH/cXzdPhT07kARhB5Ig7EAShB1IgrADSRB2IAnCDiTBOPtBYOqCo4r1Fy75WN3aNRfdVVz3C4fvaqinKlw10FusP3T9KcX6rLXl353HO427Z7c93/aDtrfYftr2t2vLe2yvt/1c7XZW69sF0KiJHMbvk7QyIo6TdIqky2wfL+lKSRsiYpGkDbXHALrUuGGPiP6IeLx2/w1JWyQdKek8SQfOpVwr6fxWNQmgee/rCzrbR0s6SdJGSXMjol8a+QdB0pw66yy33We7b0h7musWQMMmHHbbh0v6oaTLI2L3RNeLiNUR0RsRvdM0vZEeAVRgQmG3PU0jQb89Iu6tLR6wPa9WnydpZ2taBFCFcYfebFvSLZK2RMR1o0rrJF0saVXt9v6WdDgJTD36t4v1139vXrF+0d/+qFj/kw/dW6y30sr+8vDYz/+l/vBaz63/VVx31n6G1qo0kXH2pZK+Iukp25tqy67SSMjvtn2ppJckXdiaFgFUYdywR8TPJI05ubuks6ptB0CrcLoskARhB5Ig7EAShB1IgrADSXCJ6wRNnffRurXBNTOK6359wUPF+rKZAw31VIUVL59WrD9+U3nK5tk/2Fys97zBWHm3YM8OJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0mkGWff+wflny3e+6eDxfpVxzxQt3b2b73VUE9VGRh+u27t9HUri+se+1e/LNZ7XiuPk+8vVtFN2LMDSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBJpxtm3nV/+d+3ZE+9p2bZvfG1hsX79Q2cX6x6u9+O+I4699sW6tUUDG4vrDhermEzYswNJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEo6I8hPs+ZJuk/RRjVy+vDoirrd9jaQ/lvRK7alXRUT9i74lHeGeONlM/Aq0ysbYoN0xOOaJGRM5qWafpJUR8bjtmZIes72+VvteRHynqkYBtM5E5mfvl9Rfu/+G7S2Sjmx1YwCq9b4+s9s+WtJJkg6cg7nC9pO219ieVWed5bb7bPcNaU9TzQJo3ITDbvtwST+UdHlE7JZ0k6SFkhZrZM//3bHWi4jVEdEbEb3TNL2ClgE0YkJhtz1NI0G/PSLulaSIGIiI4YjYL+lmSUta1yaAZo0bdtuWdIukLRFx3ajl80Y97QJJ5ek8AXTURL6NXyrpK5Kesr2ptuwqSctsL5YUkrZJ+lpLOgRQiYl8G/8zSWON2xXH1AF0F86gA5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJDHuT0lXujH7FUn/M2rRbEm72tbA+9OtvXVrXxK9NarK3o6KiI+MVWhr2N+zcbsvIno71kBBt/bWrX1J9NaodvXGYTyQBGEHkuh02Fd3ePsl3dpbt/Yl0Vuj2tJbRz+zA2ifTu/ZAbQJYQeS6EjYbZ9j+xnbz9u+shM91GN7m+2nbG+y3dfhXtbY3ml786hlPbbX236udjvmHHsd6u0a2y/X3rtNts/tUG/zbT9oe4vtp21/u7a8o+9doa+2vG9t/8xue4qkZyV9VtJ2SY9KWhYRv2hrI3XY3iapNyI6fgKG7dMlvSnptog4obbsHyUNRsSq2j+UsyLiii7p7RpJb3Z6Gu/abEXzRk8zLul8SV9VB9+7Ql9fVBvet07s2ZdIej4itkbEXkl3STqvA310vYh4WNLguxafJ2lt7f5ajfzP0nZ1eusKEdEfEY/X7r8h6cA04x197wp9tUUnwn6kpF+Nerxd3TXfe0j6ie3HbC/vdDNjmBsR/dLI/zyS5nS4n3cbdxrvdnrXNONd8941Mv15szoR9rGmkuqm8b+lEfEZSZ+TdFntcBUTM6FpvNtljGnGu0Kj0583qxNh3y5p/qjHH5e0owN9jCkidtRud0q6T903FfXAgRl0a7c7O9zP/+umabzHmmZcXfDedXL6806E/VFJi2wvsH2IpC9JWteBPt7D9ozaFyeyPUPS2eq+qajXSbq4dv9iSfd3sJd36JZpvOtNM64Ov3cdn/48Itr+J+lcjXwj/4Kkv+xED3X6+oSkJ2p/T3e6N0l3auSwbkgjR0SXSvqwpA2Snqvd9nRRb/8u6SlJT2okWPM61NtpGvlo+KSkTbW/czv93hX6asv7xumyQBKcQQckQdiBJAg7kARhB5Ig7EAShB1IgrADSfwfs4RxaLJFjqkAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "out1 = train_aset['0']\n", "# OR\n", "out2 = co['mnist_training_images', '0']\n", "\n", "print(np.allclose(out1, out2))\n", "\n", "plt.imshow(out1.reshape(28, 28))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dict style iteration supported out of the box" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "1\n", "2\n", "51\n", "60\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlAAAACBCAYAAAAPH4TmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAZWUlEQVR4nO3deZgV1ZkG8Pf0AnSzN9BsIg1Cs4kBaRSQJQYRNa4ji8QIIThmNC4oKkicSaIYMZNHRgVUVECNwd2IjqLCdAwisquADYIsgiCbIMja3ffMH7Tn1HfTRd+6a93q9/c8Pv2d/ureOvbXdftQdeqU0lqDiIiIiCKXkeoOEBEREaUbDqCIiIiIPOIAioiIiMgjDqCIiIiIPOIAioiIiMgjDqCIiIiIPIppAKWUukgptV4ptVEpNSFenaLUYD2Dg7UMFtYzOFjL4FDRrgOllMoE8CWAQQC2A1gGYITW+ov4dY+ShfUMDtYyWFjP4GAtgyUrhteeA2Cj1noTACilXgRwBQDXX4Qaqqauhdox7JJicQyHcUIfVy5pT/VkLVPvEPbv1Vo3qSTFYzPN8NgMFh6bwXGqYzOWAVRLANsc7e0Azg3fSCl1A4AbAKAWcnGuGhjDLikWS/SCU6WrrCdr6S/z9atbXVI8NtMMj81g4bEZHKc6NmOZA1XZiOxfrgdqrWdorYu01kXZqBnD7ijBqqwna5k2eGwGC4/N4OCxGSCxDKC2A2jlaJ8GYEds3aEUYj2Dg7UMFtYzOFjLAIllALUMQHulVBulVA0A1wCYG59uUQqwnsHBWgYL6xkcrGWARD0HSmtdppS6GcB7ADIBzNRar41bzyipWM/gYC2DhfUMDtYyWGKZRA6t9TsA3olTXyjFWM/gYC2DhfUMDtYyOLgSOREREZFHHEARERERecQBFBEREZFHHEARERERecQBFBEREZFHHEAREREReRTTMgZEQVH2sx4m3nnTcZH7rPezJv7J4lEi12JaDRNnFq9MUO+IiMhveAaKiIiIyCMOoIiIiIg84iW8Sqgs+2PJbNI4otesv7NAtMtzQyZufcZukcu9yT6Q+9uHa4jcyqKXTLy3/LDInfvKOBO3u+OTiPpFlQsN6C7aj86cauJ22fKwCDniVb1nidz6onIT31XQK34dpJQ7PORc0X7oz4+b+P5hI0VOL1+TlD4FVahvNxPv6Jcrcp/fPDV884hkKnt+oOTEEZEbO+w3trF0dVTvXx2UXtBDtLPnr0hRT4B9/97bxE3nbRO5sm3bk90dADwDRUREROQZB1BEREREHnEARURERORRoOdAZXZqb2JdM1vkdgxoYOKjveRco7z6tr3wJy8hVu8eqSvaD029yMRLuv5N5DaXHjXx5F2DRK7FQh1zX6qz0guLTHz39OdFrjDbzkULiVlPwKbSUhN/H6opct0dzeMX9xS5nGI7tyJ07Jj3DqeBo1ecY+NGmSKXN3NxsrsTV7uL5L8v799yWYp6EgyqexfR3jS0noknDbGfg1fX3i+2CyG6z72QtvMT22XL43bwzEUmfvuOn4lc9vvLo9pfUBy6xs7lnP7gIyK36Gg7E7/VvYXI6eNy+ZdY7bmxt2gX/+5hEw++6jqRq39JXHcdMZ6BIiIiIvKIAygiIiIijwJ1Ca/8p2eL9sOzp5nYeYkmGUodp4//67FfiVzWYXtKuvcrN4tc3W/KTFxz71GRy12+JI49DKbMevVE+3D/jia+fYq9THB+zg9hr3T/t8Ts/X1MvGC6PK286A+PmviDp58Quc5/tbVtOz69L2e52dHf/txyzzggkzOT3Jl4yLCXIfXp8vgbmL/OxAtUH1DVMhs3MnGnp0tE7q1my1xepVy+Hz+3NNxg4if7Dxa5gvcTvntfOThCLr8y9U/2M61rDTn1pWuNrSZ+W7UUuXhPMMkMuyJYqu3UilmdnxO5YW9cb+IWV30R55644xkoIiIiIo84gCIiIiLyiAMoIiIiIo8CNQeq5vodor3iWCsTF2bvivn9x+2U14o3/WAf8zL7jFdF7vuQvSLc9NGPo9ofFy3wbvtz8rr8sp7TXLaM3H35dq7GvDpy7svoLRea+NmC+SJXr/O+mPftd3+89BUTP1Ry4Sm2TA+ZZ7Q28boBchJXt6W/NHGLZXz8x48ym+abeOv0JiL3wtn2Z9ilRvz/3OwP2eVBVp+Q8x/71zoR9/0FRWaD+ibuf7d8LFg3R53KUC5ynRbYR+C0P/FZgnp3UvgyKPPHn2bioXXkZ+u4Tvaz9+VGZ4pc+b7vEtC7k3gGioiIiMgjDqCIiIiIPArUJbyynd+K9mMPDTXxAxfJ1cYzP69j4s9uesz1PSftPcvEGy+QTwkvP7DTxL/ofZPIbbnVxm2Q2FOd1V3Zz+wTw+d0k09uz0Dly1eM3jpQtJfP72Ti1WPkexQfrWXi/OXy1vaN++0yCdl/Kpb7Tvzd2CmXrcqq3iiNZD19xDV39Kt6rrnqbNt1dnXqT3uFf5Ym9k/MSwc7m3jGrJ+L3Mrb3T/Xq7stT9vpLW/lF7tu1+3jX4t2+5ErE9anWFxX1/7tnzJyiMg1mxLdFJpI8AwUERERkUccQBERERF5VOUASik1Uym1Wym1xvG9PKXUB0qpDRVfGya2mxQvrGegFLCWwcFjM1B4bFYDkVygng1gKgDn2ukTACzQWk9WSk2oaI+Pf/dikzfL3gbZ5K1GIue8tbHLmfI679r+9tbbuTMGmDj/gPu1VLVYznNq498nd8xGmtbzR6EB3UX70Zl2zlK7bPkrHYJd/v/ydVeZOHOInBPX4Od20YjOz8vH6xRO22bijG2rRK7hQhuXPiBv+X3tLPt79OvzbxW5zOK4zCXYC+AXSGItQ327iXa/Wh/F6619oaC2+9ITreaXu+biZDbS4NjMalsg2r2Gxj7Hs8Nrdg5pnS2ZIldr4B4TL+r2osg986Rj3lNyn9ZVlaQfm6dSfr58zNnzPZyPnZKfmatPlJq45XT5KBeSqjwDpbX+J4DwhRSuAPBsRfwsgCvj3C9KENYzUH4AaxkYPDYDhcdmNRDtHKimWuudAFDxNd9tQ6XUDUqp5Uqp5aU47rYZpVZE9WQt0wKPzWDhsRkcPDYDJuHLGGitZwCYAQD1VF7KFtcu3+t+ar70oPu53y7X2ic773lcnlpGKOGn9H0llbVUPbqYeO8dcimBwmxbvxVhnzX/94O9zXnfi/bW3Ub75TXW+n+1q/HWhxTtjfpNM2vafY+Vt8ef4s7hpImmnlsvzRHt/Mxcly3TQ1bB6aI9JG+u67Y5m/eb2G9HfjKPzXPe+FK0JzZ2X5W9VNuf1Ocn5OfntX//rYk7/H6tiUOHDontsuY0M/FlLUaKXLPPlpo4o6GcUtR/4DAT//Osl1376EfxqKdzeZfHZz0qcmdk5YRvbox5cKyJGxf7Zy7KpLWXmHjouc+7bvfIzU+I9oNTznLZMnbRnoHapZRqDgAVX3fHr0uUAqxncLCWwcJ6BgdrGTDRDqDmAhhVEY8C8GZ8ukMpwnoGB2sZLKxncLCWARPJMgZzACwG0EEptV0pNQbAZACDlFIbAAyqaFMaYD0DpQ1Yy8DgsRkoPDargSrnQGmtR7ikBrp8P+10Gi+v6Y/uav/XZrVeYOIBQ38rtqv7knyKdTpIl3pm5Mq5NWV/PmjiTzq+LnKby+xT1++YOE7kGi782sT5te0Z82TPYTmn+VbR3hKft92stS6q5PsJq2VWu0OuuWPrGiRqtwmz7X9qi/Z5Ne2yF88cPE1ufOAgEsnPx+aJwfbXbESDR8KyteDGOe/p9217iFw72M/PENyJR3SFPa5LaCLnQLWsE34TXFIl/dgMt+0COzf0VHOe7tvbVbTz55ilq05Zl2RrNcp+lk/66EyRu7ex7XMtVYpk4UrkRERERB5xAEVERETkUcKXMUgH5Qe+F+19N3Yy8ddz7S3zEyY9J7a7Z5hd2Vqvkje/t3rAcfunTtnqDWnr6IAuov1ex+mu215/2+0mrvt3eVk12iUIyLv85f454Z/Z2D55YNfVhSKXN2y7iT8sfCbslfZy1OPT5DqH+bsS91R337vTrgbeJsv9kl0451IFzkt2ibDtksaivbLtnITuz+9mDH/SNbfihJ3E8MGf+olc3UP+nJriXN7iYJn772D9DLmWTWaXDiYuX7s+rn3iGSgiIiIijziAIiIiIvKIl/AqEfqsxMTX/PEuE7/w+7+I7T7t5bik10u+R5fa9oG07Z/aKXJlm7bE3smAO+v+T0U7wzHWH71V3siS8/el8INsJVdaLnVcuc1Uwb+MezRP/nustst24UL95MOhdaYy8bYLaorciRb2DpuMGvYyxPv9HhPbZdu3wLfl8j3+c5O99P5dSF52zM2w79l0ibzjMPgVjJ3zocBA2Arjye5MNffTHPsTLw/75f3dpn8zsZ/uJs9q09rEx1s3ct2uZc1/uOYKs+Xlvatf/dDEL3dqFr55THgGioiIiMgjDqCIiIiIPOIAioiIiMgjzoGqQt5MuxzBzevlSuT1Jtvboee0fU/k1o6cauKOra4XuQ5/tOPW8g2b4tLPIDhwXW8T39tUzjcLwa6qu+L9ziJ3Ovxxe7nzyfMAEHLM+phXIvvcHiuT0qd4O34sW7RDjplBsyZOEbm5N3eL6D3HN3patDNgJzAd1SdEbke5/RlP3fNTE18wf6zYrsEq+/vS/P1dIqe22uN2T4lcoblppp1jpZetrqrrgbX5wd6iXdJ5mqOlRO4Tx13j+UtlznnreaK1+Iv8HDjr3F+ZeE2fZ91fqNxTQXVX63km/s3jo0Wu05R9Ub3nvl75Ji4dEt0q8MPb2M/FO/Pis+RAnxz7N/ZlcA4UERERUUpxAEVERETkES/heaAWyVvrjwyxpyx7Dr9F5JaMtw/cXHe+vERxbcGFJv6+bzx7mN7KHFdT6mfUELnFx+yt6G2f2yFfl9BeSeEPOV73F+dDLVeI3LWbLjZxx9s2i1yyH2YcL+1+uUq0uzxol+to1fObqN6zeLdcKXzPu/Yhvo3WygeD1pi3zNGyuUIsd33/8J/1N+P7mLhnzcUi9+IPLavobTURdtt76BSLOIxe8msTt/mrf26JD4XstblT9T+o61P0X22X6yg+8zWRG5hjr7tuvPwJ+cLLE9qthPu67Iho3zL6VhNnxnnqBM9AEREREXnEARQRERGRRxxAEREREXnEOVAxKN+128RNH90tcsfutjNzcpWcz/NUwdsmvvQqeft17htL4tnFwNhXXsfEyX4UjnPe0/rJXUVu3RV2uYp3j9QXuR3T2pm47n7/zA2Jpzb3LK56I4+a4+u4v6dTbv89rrl7i682cSH88YggomjkDLbzLs956xqRW3r2i8nuTkRu2NbfxMVLznTd7omfPyPazjldV382RuSaFCduyRiegSIiIiLyiAMoIiIiIo94Cc+DUF+5svJXQ+1Tn8/stkXkwi/bOT32nX36fO6b7rdfk3XnoqEmLgxbLiDeQgO6i/buO46auKRoqsgNXD3cxLUvkqvK10UwL9sFWes3A3pPewScn2+ThvwthT2JnPPy+qaJPxG5hX2cTzOoJXKXrbf36rd9aI3IhRA8TYZsFe3L6lxg4o3jOohcqPWxiN6z7mK5in/dbXbayu6z7dCi7aPrIu6nPmr33f6I++fne33lVIqBOfbvaHlxo4j3FyuegSIiIiLyiAMoIiIiIo84gCIiIiLyiHOgKqGK7O2TX95q5zI9dZ58onf/WvJJ8W6Oa/k4ik++a2MboZ1R9DCgHE9Fzwgb2z/Sd46Jp0E++iMett5nnz7/2siHRa4w2/4OnL10lMi1uOqLuPeFKBUyPrKPqrr31V+I3FWjpoZv7gvOeU9rRof3sRbcHCm1x3TOoUPx7pbv6OPHRbvc0W4zMf5LkbR+07GvOL2nPs/O0buswXNxetfY8AwUERERkUdVDqCUUq2UUsVKqRKl1Fql1G0V389TSn2glNpQ8bVh4rtLsQghBNYyULJZz2DgsRk4PDargUgu4ZUBGKe1XqmUqgtghVLqAwC/ArBAaz1ZKTUBwAQA4xPX1fjKatPaxF+NbiFyfxhuV2m9us7eqN5/4q4iE3/4SC+Ra/hs/E+ZeuDfWjruIA+F3Uw8IGeficfO7iFyZ8yy22Z/K0/H7xrQxMR5w7eb+JbTF4jtLs61SyPMPdxU5EauvsjEjZ+s7dr9FPFvPdNAppL/htxfmG3iZu8muzesZVW+/q8+or1w1H87Wu6X7DaXyVvzQ0/lO7Nx6FmlWM84UovsJea3Dsglhfo1S81yQFWegdJa79Rar6yIDwEoAdASwBUAfpwU9CyAKxPVSYqPDGSAtQyUUtYzGHhsBg6PzWrA0xwopVQBgO4AlgBoqrXeCZwcZAHId3nNDUqp5Uqp5aU4XtkmlAKsZbCwnsHBWgYL6xlcEQ+glFJ1ALwGYKzW+mCkr9Naz9BaF2mti7JRM5o+UpyxlsHCegYHaxksrGewRbSMgVIqGyd/CV7QWr9e8e1dSqnmWuudSqnmAHYnqpPRyio43cTf92gucsPvm2fi/2jwOqIxbqed27R4epHI5c22T3JvGErpnCchXWtZS9lf1ZJBT4jcR/3s3IcNx5uJ3Oj6WyJ6/9t29DPxvI/l9fX2t/n3kSzpWk+/KNdhD+5I4X3J6VrLN3rb43HhF+1E7tWbBpu45sZdEb3f9+eeJtq/vP9tEw+q/WeRa5hhHyeyt/yoyG0ts7m7xt0hcrXfWBJRX2KRrvVMB58faCnaEzPscdzyf+XvWbyWUahMJHfhKQDPACjRWjsXyJkL4MdFcUYBeDP8teQv+uQsbdYyWFjPAOCxGUisZ8BFcgbqPADXAVitlPpxGvxEAJMBvKyUGgPgawBDXV5PPlF+cizOWgZHHbCegcBjM3B4bFYDVQ6gtNYfQawRLQyMb3e8y2puL9l8N1PeYn5jmw9NPKJuZKePw938TV8Tr3xcXtpp/Kp9infeIf9cpnOThSxorX1by6b/sGezx/+mt8g91Mz95+tcEb5vrS2u2606bk+4jvjwBpErHG2XMWgP/16yC/ODn+uZjo70PJKS/frt2Kz3lWz/85hduTv8CQzOlfoL638tcmNeeMrzvjPC/tyEnOubIEfknMsTXDnjbpFr9cDHJs5F4i/ZheGxmUA1rpcXz1Znn2Hi8i+/Ct88YbgSOREREZFHHEARERERecQBFBEREZFHES1jkGonBtslAk7c/p3ITWz3jokvzDkc1fvvCrv9tf/ccSbueO86E+cdkPNwwm6Aphg5r11vGFogcp1vucXEXwx7LOL37PjOTSbuMN3ObylctaKyzamaCX+UC53U6Bn5WTfpmktN/H7n6JZ9SYQrn7rLxM45TxRsZZu3proLAHgGioiIiMgzDqCIiIiIPEqLS3hbrrTjvC+7vhLx66YdsLc2PvLhhSKnyu0dph0nyadxt99lb3lN5Cqm5K5s0xbRbne7bV9+e8+I36cQy0ysT7EdVR/H5zcxcXk3XoiPRM2J9Uy88RX5bLZ22Yl91Ej3JSNN3PipXJE7ff5yE/P4pmTjGSgiIiIijziAIiIiIvKIAygiIiIij9JiDlThjUtNfOmNPaJ7Dyx1zXGeE1H10WyKvd39kilni1xbfBq+OQHQy1abeGxBn6TuuyXWuuY474lSiWegiIiIiDziAIqIiIjIIw6giIiIiDziAIqIiIjIIw6giIiIiDziAIqIiIjIIw6giIiIiDziAIqIiIjIIw6giIiIiDxSWidvLVel1B4AWwE0BrA3aTt2V9360Vpr3aTqzarGWp4S6xm76tYP1jI50rWeh1H9foZVSXktkzqAMjtVarnWuijpO2Y/4s4vffdLPwB/9cUrv/Sd/YidX/rul34A/uqLF37qt1/64od+8BIeERERkUccQBERERF5lKoB1IwU7Tcc+xE7v/TdL/0A/NUXr/zSd/Yjdn7pu1/6AfirL174qd9+6UvK+5GSOVBERERE6YyX8IiIiIg84gCKiIiIyKOkDqCUUhcppdYrpTYqpSYked8zlVK7lVJrHN/LU0p9oJTaUPG1YRL60UopVayUKlFKrVVK3ZaqvsQqVfVkLeOPx2Zw6slaBqeWAOtZsU9f1jNpAyilVCaAaQAuBtAZwAilVOdk7R/AbAAXhX1vAoAFWuv2ABZUtBOtDMA4rXUnAL0A/Lbi55CKvkQtxfWcDdYybnhsGmlfT9bSSPtaAqyngz/rqbVOyn8AegN4z9G+B8A9ydp/xT4LAKxxtNcDaF4RNwewPpn9qdjvmwAG+aEv6VRP1jI4tWQ9WUvWkvVMx3om8xJeSwDbHO3tFd9LpaZa650AUPE1P5k7V0oVAOgOYEmq+xIFv9WTtYye32oJsJ7RYi3DpHEtAdbzX/ipnskcQKlKvldt11BQStUB8BqAsVrrg6nuTxRYzwqsZbCkeT1ZS4c0ryXAegp+q2cyB1DbAbRytE8DsCOJ+6/MLqVUcwCo+Lo7GTtVSmXj5C/BC1rr11PZlxj4rZ6sZfT8VkuA9YwWa1khALUEWE/Dj/VM5gBqGYD2Sqk2SqkaAK4BMDeJ+6/MXACjKuJROHldNaGUUgrAMwBKtNYPp7IvMfJbPVnL6PmtlgDrGS3WEoGpJcB6AvBxPZM88esSAF8C+ArA75K87zkAdgIoxclR/RgAjXBy5v6Giq95SehHX5w8Bfs5gE8r/rskFX1J13qylsGpJevJWrKWrGe61pOPciEiIiLyiCuRExEREXnEARQRERGRRxxAEREREXnEARQRERGRRxxAEREREXnEARQRERGRRxxAEREREXn0/6qK5FZQqcBNAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# iterate normally over keys\n", "\n", "for k in train_aset:\n", " # equivalent method: for k in train_aset.keys():\n", " print(k)\n", "\n", "# iterate over items (plot results)\n", "\n", "fig, axs = plt.subplots(nrows=1, ncols=5, figsize=(10, 10))\n", "\n", "for idx, v in enumerate(train_aset.values()):\n", " axs[idx].imshow(v.reshape(28, 28))\n", "plt.show()\n", "\n", "# iterate over items, store k, v in dict\n", "\n", "myDict = {}\n", "for k, v in train_aset.items():\n", " myDict[k] = v" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Performance\n", "\n", "Once you’ve completed an interactive exploration, be sure to use the context\n", "manager form of the `update()` and `get()` methods!\n", "\n", "In order to make sure that all your data is always safe in Hangar, the backend\n", "diligently ensures that all contexts (operations which can somehow interact\n", "with the record structures) are opened and closed appropriately. When you use the\n", "context manager form of a arrayset object, we can offload a significant amount of\n", "work to the python runtime, and dramatically increase read and write speeds.\n", "\n", "Most columns we’ve tested see an increased throughput differential of 250% -\n", "500% for writes and 300% - 600% for reads when comparing using the context\n", "manager form vs the naked form!" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Beginning non-context manager form\n", "----------------------------------\n", "Finished non-context manager form in: 78.54769086837769 seconds\n", "Hard reset requested with writer_lock: 8910b50e-1f9d-4cb1-986c-b99ea84c8a54\n", "\n", "Beginning context manager form\n", "--------------------------------\n", "Finished context manager form in: 11.608536720275879 seconds\n", "Hard reset requested with writer_lock: ad4a2ef9-8494-49f8-84ef-40c3990b1e9b\n" ] } ], "source": [ "import time\n", "\n", "# ----------------- Non Context Manager Form ----------------------\n", "\n", "co = repo.checkout(write=True)\n", "aset_trimgs = co.add_ndarray_column(name='train_images', prototype=sample_trimg)\n", "aset_trlabels = co.add_ndarray_column(name='train_labels', prototype=sample_trlabel)\n", "\n", "print(f'Beginning non-context manager form')\n", "print('----------------------------------')\n", "start_time = time.time()\n", "\n", "for idx, img in enumerate(trimgs):\n", " aset_trimgs[idx] = img\n", " aset_trlabels[idx] = np.array([trlabels[idx]])\n", "\n", "print(f'Finished non-context manager form in: {time.time() - start_time} seconds')\n", "\n", "co.reset_staging_area()\n", "co.close()\n", "\n", "# ----------------- Context Manager Form --------------------------\n", "\n", "co = repo.checkout(write=True)\n", "aset_trimgs = co.add_ndarray_column(name='train_images', prototype=sample_trimg)\n", "aset_trlabels = co.add_ndarray_column(name='train_labels', prototype=sample_trlabel)\n", "\n", "print(f'\\nBeginning context manager form')\n", "print('--------------------------------')\n", "start_time = time.time()\n", "\n", "with aset_trimgs, aset_trlabels:\n", " for idx, img in enumerate(trimgs):\n", " aset_trimgs[idx] = img\n", " aset_trlabels[idx] = np.array([trlabels[idx]])\n", "\n", "print(f'Finished context manager form in: {time.time() - start_time} seconds')\n", "\n", "co.reset_staging_area()\n", "co.close()\n", "\n", "print(f'Finished context manager with checkout form in: {time.time() - start_time} seconds')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Clearly, the context manager form is far and away superior, however we fell that\n", "for the purposes of interactive use that the \"Naked\" form is valubal to the\n", "average user!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Commiting Changes\n", "\n", "Once you have made a set of changes you want to commit, just simply call the [commit()](api.rst#hangar.checkout.WriterCheckout.commit) method (and pass in a message)!" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=8eb01eaf0c657f8526dbf9a8ffab0a4606ebfd3b'" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.commit('hello world, this is my first hangar commit')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The returned value (`'e11d061dc457b361842801e24cbd119a745089d6'`) is the commit hash of this commit. It\n", "may be useful to assign this to a variable and follow this up by creating a\n", "branch from this commit!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Don't Forget to Close the Write-Enabled Checkout to Release the Lock!\n", "\n", "We mentioned in `Checking out the repo for writing` that when a\n", "`write-enabled` checkout is created, it places a lock on writers until it is\n", "closed. If for whatever reason the program terminates via a non python `SIGKILL` or fatal\n", "interpreter error without closing the\n", "write-enabled checkout, this lock will persist (forever technically, but\n", "realistically until it is manually freed).\n", "\n", "Luckily, preventing this issue from occurring is as simple as calling\n", "[close()](api.rst#hangar.checkout.WriterCheckout.close)!\n", "\n", "If you forget, normal interperter shutdown should trigger an `atexit` hook automatically,\n", "however this behavior should not be relied upon. Is better to just call\n", "[close()](api.rst#hangar.checkout.WriterCheckout.close)." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### But if you did forget, and you recieve a `PermissionError` next time you open a checkout\n", "\n", "```\n", "PermissionError: Cannot acquire the writer lock. Only one instance of\n", "a writer checkout can be active at a time. If the last checkout of this\n", "repository did not properly close, or a crash occured, the lock must be\n", "manually freed before another writer can be instantiated.\n", "```\n", "\n", "You can manually free the lock with the following method. However!\n", "\n", "This is a dangerous operation, and it's one of the only ways where a user can put\n", "data in their repository at risk! If another python process is still holding the\n", "lock, do NOT force the release. Kill the process (that's totally fine to do at\n", "any time, then force the lock release)." ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.force_release_writer_lock()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reading Data\n", "\n", "Two different styles of access are considered below, In general, the contex manager form\n", "if recomended (though marginal performance improvements are expected to be seen at best)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " Neither BRANCH or COMMIT specified.\n", " * Checking out writing HEAD BRANCH: master\n", "\n", "Begining Key Iteration\n", "-----------------------\n", "completed in 5.838773965835571 sec\n", "\n", "Begining Items Iteration with Context Manager\n", "---------------------------------------------\n", "completed in 5.516948938369751 sec\n" ] } ], "source": [ "co = repo.checkout()\n", "\n", "trlabel_col = co['train_labels']\n", "trimg_col = co['train_images']\n", "\n", "print(f'\\nBegining Key Iteration')\n", "print('-----------------------')\n", "start = time.time()\n", "\n", "for idx in trimg_col.keys():\n", " image_data = trimg_col[idx]\n", " label_data = trlabel_col[idx]\n", "\n", "print(f'completed in {time.time() - start} sec')\n", "\n", "print(f'\\nBegining Items Iteration with Context Manager')\n", "print('---------------------------------------------')\n", "start = time.time()\n", "\n", "with trlabel_col, trimg_col:\n", " for index, image_data in trimg_col.items():\n", " label_data = trlabel_col[index]\n", "\n", "print(f'completed in {time.time() - start} sec')\n", "\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Inspecting state from the top!\n", "\n", "After your first commit, the summary and log methods will begin to work, and you can either print the stream to the console (as shown below), or you can\n", "dig deep into the internal of how hangar thinks about your data! (To be covered in an advanced tutorial later on).\n", "\n", "The point is, regardless of your level of interaction with a live hangar repository, all level of state is accessable from the top, and in general has been built to be the only way to directly access it!" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary of Contents Contained in Data Repository \n", " \n", "================== \n", "| Repository Info \n", "|----------------- \n", "| Base Directory: /Users/rick/projects/tensorwerk/hangar/dev/mnist \n", "| Disk Usage: 57.29 MB \n", " \n", "=================== \n", "| Commit Details \n", "------------------- \n", "| Commit: a=8eb01eaf0c657f8526dbf9a8ffab0a4606ebfd3b \n", "| Created: Tue Feb 25 19:03:06 2020 \n", "| By: Rick Izzo \n", "| Email: rick@tensorwerk.com \n", "| Message: hello world, this is my first hangar commit \n", " \n", "================== \n", "| DataSets \n", "|----------------- \n", "| Number of Named Columns: 2 \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"train_images\", layout=\"flat\") \n", "| Num Data Pieces: 50000 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (784,) \n", "| - dtype: uint8 \n", "| - backend: 00 \n", "| - backend_options: {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'} \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"train_labels\", layout=\"flat\") \n", "| Num Data Pieces: 50000 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (1,) \n", "| - dtype: int64 \n", "| - backend: 10 \n", "| - backend_options: {} \n", " \n", "================== \n", "| Metadata: \n", "|----------------- \n", "| Number of Keys: 0 \n", "\n" ] } ], "source": [ "repo.summary()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=8eb01eaf0c657f8526dbf9a8ffab0a4606ebfd3b (\u001B[1;31mmaster\u001B[m) : hello world, this is my first hangar commit\n" ] } ], "source": [ "repo.log()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/Tutorial-002.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Part 2: Checkouts, Branching, & Merging\n", "\n", "This section deals with navigating repository history, creating & merging\n", "branches, and understanding conflicts." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The Hangar Workflow\n", "\n", "The hangar workflow is intended to mimic common ``git`` workflows in which small\n", "incremental changes are made and committed on dedicated ``topic`` branches.\n", "After the ``topic`` has been adequatly set, ``topic`` branch is merged into\n", "a separate branch (commonly referred to as ``master``, though it need not to be the\n", "actual branch named ``\"master\"``), where well vetted and more permanent changes\n", "are kept.\n", "\n", " Create Branch -> Checkout Branch -> Make Changes -> Commit\n", "\n", "#### Making the Initial Commit\n", "\n", "Let's initialize a new repository and see how branching works in Hangar:\n", "\n", "" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from hangar import Repository\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "repo = Repository(path='/Users/rick/projects/tensorwerk/hangar/dev/mnist/')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: /Users/rick/projects/tensorwerk/hangar/dev/mnist/.hangar\n" ] } ], "source": [ "repo_pth = repo.init(user_name='Test User', user_email='test@foo.com', remove_old=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When a repository is first initialized, it has no history, no commits." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "repo.log() # -> returns None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Though the repository is essentially empty at this point in time, there is one\n", "thing which is present: a branch with the name: ``\"master\"``." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['master']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.list_branches()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This ``\"master\"`` is the branch we make our first commit on; until we do, the\n", "repository is in a semi-unstable state; with no history or contents, most of the\n", "functionality of a repository (to store, retrieve, and work with versions of\n", "data across time) just isn't possible. A significant portion of otherwise\n", "standard operations will generally flat out refuse to execute (ie. read-only\n", "checkouts, log, push, etc.) until the first commit is made.\n", "\n", "One of the only options available at this point is to create a\n", "write-enabled checkout on the ``\"master\"`` branch and to begin to add data so we\n", "can make a commit. Let’s do that now:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "co = repo.checkout(write=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As expected, there are no columns nor metadata samples recorded in the checkout." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number of metadata keys: 0\n", "number of columns: 0\n" ] } ], "source": [ "print(f'number of metadata keys: {len(co.metadata)}')\n", "print(f'number of columns: {len(co.columns)}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let’s add a dummy array just to put something in the repository history to\n", "commit. We'll then close the checkout so we can explore some useful tools which\n", "depend on having at least one historical record (commit) in the repo." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "dummy = np.arange(10, dtype=np.uint16)\n", "col = co.add_ndarray_column('dummy_column', prototype=dummy)\n", "col['0'] = dummy\n", "initialCommitHash = co.commit('first commit with a single sample added to a dummy column')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If we check the history now, we can see our first commit hash, and that it is labeled with the branch name `\"master\"`" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e (\u001B[1;31mmaster\u001B[m) : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So now our repository contains:\n", "- [A commit](api.rst#hangar.checkout.WriterCheckout.commit_hash): a fully\n", " independent description of the entire repository state as\n", " it existed at some point in time. A commit is identified by a `commit_hash`.\n", "- [A branch](api.rst#hangar.checkout.WriterCheckout.branch_name): a label\n", " pointing to a particular `commit` / `commit_hash`.\n", "\n", "Once committed, it is not possible to remove, modify, or otherwise tamper with\n", "the contents of a commit in any way. It is a permanent record, which Hangar has\n", "no method to change once written to disk.\n", "\n", "In addition, as a `commit_hash` is not only calculated from the `commit` ’s\n", "contents, but from the `commit_hash` of its parents (more on this to follow),\n", "knowing a single top-level `commit_hash` allows us to verify the integrity of\n", "the entire repository history. This fundamental behavior holds even in cases of\n", "disk-corruption or malicious use.\n", "\n", "### Working with Checkouts & Branches\n", "\n", "As mentioned in the first tutorial, we work with the data in a repository through\n", "a [checkout](api.rst#hangar.repository.Repository.checkout). There are two types\n", "of checkouts (each of which have different uses and abilities):\n", "\n", "**[Checking out a branch / commit for reading:](api.rst#read-only-checkout)** is\n", "the process of retrieving records describing repository state at some point in\n", "time, and setting up access to the referenced data.\n", "\n", "- Any number of read checkout processes can operate on a repository (on\n", " any number of commits) at the same time.\n", "\n", "**[Checking out a branch for writing:](api.rst#write-enabled-checkout)** is the\n", "process of setting up a (mutable) ``staging area`` to temporarily gather\n", "record references / data before all changes have been made and staging area\n", "contents are committed in a new permanent record of history (a `commit`).\n", "\n", "- Only one write-enabled checkout can ever be operating in a repository\n", " at a time.\n", "- When initially creating the checkout, the `staging area` is not\n", " actually “empty”. Instead, it has the full contents of the last `commit`\n", " referenced by a branch’s `HEAD`. These records can be removed / mutated / added\n", " to in any way to form the next `commit`. The new `commit` retains a\n", " permanent reference identifying the previous ``HEAD`` ``commit`` was used as\n", " its base `staging area`.\n", "- On commit, the branch which was checked out has its ``HEAD`` pointer\n", " value updated to the new `commit`’s `commit_hash`. A write-enabled\n", " checkout starting from the same branch will now use that `commit`’s\n", " record content as the base for its `staging area`.\n", "\n", "#### Creating a branch\n", "\n", "A branch is an individual series of changes / commits which diverge from the main\n", "history of the repository at some point in time. All changes made along a branch\n", "are completely isolated from those on other branches. After some point in time,\n", "changes made in a disparate branches can be unified through an automatic\n", "`merge` process (described in detail later in this tutorial). In general, the\n", "`Hangar` branching model is semantically identical to the `Git` one; The one exception\n", "is that in Hangar, a branch must always have a `name` and a `base_commit`. (No\n", "\"Detached HEAD state\" is possible for a `write-enabled` checkout). If No `base_commit` is\n", "specified, the current writer branch `HEAD` `commit` is used as the `base_commit`\n", "hash for the branch automatically.\n", "\n", "Hangar branches have the same lightweight and performant properties which\n", "make working with `Git` branches so appealing - they are cheap and easy to use,\n", "create, and discard (if necessary).\n", "\n", "To create a branch, use the [create_branch()](api.rst#hangar.repository.Repository.create_branch)\n", "method." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "branch_1 = repo.create_branch(name='testbranch')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "BranchHead(name='testbranch', digest='a=eaee002ed9c6e949c3657bd50e3949d6a459d50e')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "branch_1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We use the [list_branches()](api.rst#hangar.repository.Repository.list_branches) and [log()](api.rst#hangar.repository.Repository.log) methods to see that a new branch named `testbranch` has been created and is indeed pointing to our initial commit." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "branch names: ['master', 'testbranch'] \n", "\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e (\u001B[1;31mmaster\u001B[m) (\u001B[1;31mtestbranch\u001B[m) : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "print(f'branch names: {repo.list_branches()} \\n')\n", "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If instead, we actually specify the base commit (with a different branch\n", "name) we see we do actually get a third branch. pointing to the same commit as\n", "`master` and `testbranch`" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "branch_2 = repo.create_branch(name='new', base_commit=initialCommitHash)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "BranchHead(name='new', digest='a=eaee002ed9c6e949c3657bd50e3949d6a459d50e')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "branch_2" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e (\u001B[1;31mmaster\u001B[m) (\u001B[1;31mnew\u001B[m) (\u001B[1;31mtestbranch\u001B[m) : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Making changes on a branch\n", "\n", "Let’s make some changes on the `new` branch to see how things work.\n", "\n", "We can see that the data we added previously is still here (`dummy` arrayset containing\n", "one sample labeled `0`)." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "co = repo.checkout(write=True, branch='new')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 1 \n", " Column Names / Partial Remote References: \n", " - dummy_column / False" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint16 \n", " Shape : (10,) \n", " Number of Samples : 1 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['dummy_column']" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['dummy_column']['0']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's add another sample to the `dummy_arrayset` called `1`" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "arr = np.arange(10, dtype=np.uint16)\n", "# let's increment values so that `0` and `1` aren't set to the same thing\n", "arr += 1\n", "\n", "co['dummy_column', '1'] = arr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can see that in this checkout, there are indeed two samples in the `dummy_arrayset`:" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(co.columns['dummy_column'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's all, let's commit this and be done with this branch." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "co.commit('commit on `new` branch adding a sample to dummy_arrayset')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### How do changes appear when made on a branch?\n", "\n", "If we look at the log, we see that the branch we were on (`new`) is a commit ahead of `master` and `testbranch`" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94 (\u001B[1;31mnew\u001B[m) : commit on `new` branch adding a sample to dummy_arrayset\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e (\u001B[1;31mmaster\u001B[m) (\u001B[1;31mtestbranch\u001B[m) : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The meaning is exactly what one would intuit. We made some changes, they were\n", "reflected on the `new` branch, but the `master` and `testbranch` branches\n", "were not impacted at all, nor were any of the commits!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merging (Part 1) Fast-Forward Merges\n", "\n", "Say we like the changes we made on the ``new`` branch so much that we want them\n", "to be included into our ``master`` branch! How do we make this happen for this\n", "scenario??\n", "\n", "Well, the history between the ``HEAD`` of the ``new`` and the ``HEAD`` of the\n", "``master`` branch is perfectly linear. In fact, when we began making changes\n", "on ``new``, our staging area was *identical* to what the ``master`` ``HEAD``\n", "commit references are right now!\n", "\n", "If you’ll remember that a branch is just a pointer which assigns some ``name``\n", "to a ``commit_hash``, it becomes apparent that a merge in this case really\n", "doesn’t involve any work at all. With a linear history between ``master`` and\n", "``new``, any ``commits`` exsting along the path between the ``HEAD`` of\n", "``new`` and ``master`` are the only changes which are introduced, and we can\n", "be sure that this is the only view of the data records which can exist!\n", "\n", "What this means in practice is that for this type of merge, we can just update\n", "the ``HEAD`` of ``master`` to point to the ``HEAD`` of ``\"new\"``, and the\n", "merge is complete.\n", "\n", "This situation is referred to as a **Fast Forward (FF) Merge**. A FF merge is\n", "safe to perform any time a linear history lies between the ``HEAD`` of some\n", "``topic`` and ``base`` branch, regardless of how many commits or changes which\n", "were introduced.\n", "\n", "For other situations, a more complicated **Three Way Merge** is required. This\n", "merge method will be explained a bit more later in this tutorial." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "co = repo.checkout(write=True, branch='master')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Performing the Merge\n", "\n", "In practice, you’ll never need to know the details of the merge theory explained\n", "above (or even remember it exists). Hangar automatically figures out which merge\n", "algorithms should be used and then performed whatever calculations are needed to\n", "compute the results.\n", "\n", "As a user, merging in Hangar is a one-liner! just use the [merge()](api.rst#hangar.checkout.WriterCheckout.merge)\n", "method from a `write-enabled` checkout (shown below), or the analogous methods method\n", "from the Repository Object [repo.merge()](api.rst#hangar.repository.Repository.merge)\n", "(if not already working with a `write-enabled` checkout object)." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected Fast-Forward Merge Strategy\n" ] }, { "data": { "text/plain": [ "'a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94'" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.merge(message='message for commit (not used for FF merge)', dev_branch='new')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's check the log!" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94 (\u001B[1;31mmaster\u001B[m) (\u001B[1;31mnew\u001B[m) : commit on `new` branch adding a sample to dummy_arrayset\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e (\u001B[1;31mtestbranch\u001B[m) : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'master'" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.branch_name" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94'" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.commit_hash" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint16 \n", " Shape : (10,) \n", " Number of Samples : 2 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['dummy_column']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As you can see, everything is as it should be!" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Making changes to introduce diverged histories\n", "\n", "Let’s now go back to our `testbranch` branch and make some changes there so\n", "we can see what happens when changes don’t follow a linear history." ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "co = repo.checkout(write=True, branch='testbranch')" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 1 \n", " Column Names / Partial Remote References: \n", " - dummy_column / False" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint16 \n", " Shape : (10,) \n", " Number of Samples : 1 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['dummy_column']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will start by mutating sample `0` in `dummy_arrayset` to a different value" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59], dtype=uint16)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "old_arr = co['dummy_column', '0']\n", "new_arr = old_arr + 50\n", "new_arr" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "co['dummy_column', '0'] = new_arr" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let’s make a commit here, then add some metadata and make a new commit (all on\n", "the `testbranch` branch)." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=fcd82f86e39b19c3e5351dda063884b5d2fda67b'" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.commit('mutated sample `0` of `dummy_column` to new value')" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=fcd82f86e39b19c3e5351dda063884b5d2fda67b (\u001B[1;31mtestbranch\u001B[m) : mutated sample `0` of `dummy_column` to new value\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "co.metadata['hello'] = 'world'" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=69a08ca41ca1f5577fb0ffcf59d4d1585f614c4d'" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.commit('added hellow world metadata')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looking at our history how, we see that none of the original branches reference\n", "our first commit anymore." ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=69a08ca41ca1f5577fb0ffcf59d4d1585f614c4d (\u001B[1;31mtestbranch\u001B[m) : added hellow world metadata\n", "* a=fcd82f86e39b19c3e5351dda063884b5d2fda67b : mutated sample `0` of `dummy_column` to new value\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can check the history of the `master` branch by specifying it as an argument to the `log()` method." ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94 (\u001B[1;31mmaster\u001B[m) (\u001B[1;31mnew\u001B[m) : commit on `new` branch adding a sample to dummy_arrayset\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log('master')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merging (Part 2) Three Way Merge\n", "\n", "If we now want to merge the changes on `testbranch` into `master`, we can't just follow a simple linear history; **the branches have diverged**.\n", "\n", "For this case, Hangar implements a **Three Way Merge** algorithm which does the following:\n", "- Find the most recent common ancestor `commit` present in both the `testbranch` and `master` branches\n", "- Compute what changed between the common ancestor and each branch's `HEAD` commit\n", "- Check if any of the changes conflict with each other (more on this in a later tutorial)\n", "- If no conflicts are present, compute the results of the merge between the two sets of changes\n", "- Create a new `commit` containing the merge results reference both branch `HEAD`s as parents of the new `commit`, and update the `base` branch `HEAD` to that new `commit`'s `commit_hash`" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "co = repo.checkout(write=True, branch='master')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Once again, as a user, the details are completely irrelevant, and the operation\n", "occurs from the same one-liner call we used before for the FF Merge." ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected 3-Way Merge Strategy\n" ] }, { "data": { "text/plain": [ "'a=002041fe8d8846b06f33842964904b627de55214'" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.merge(message='merge of testbranch into master', dev_branch='testbranch')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If we now look at the log, we see that this has a much different look than\n", "before. The three way merge results in a history which references changes made\n", "in both diverged branches, and unifies them in a single ``commit``" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=002041fe8d8846b06f33842964904b627de55214 (\u001B[1;31mmaster\u001B[m) : merge of testbranch into master\n", "\u001B[1;31m|\u001B[m\u001B[1;32m\\\u001B[m \n", "\u001B[1;31m|\u001B[m * a=69a08ca41ca1f5577fb0ffcf59d4d1585f614c4d (\u001B[1;31mtestbranch\u001B[m) : added hellow world metadata\n", "\u001B[1;31m|\u001B[m * a=fcd82f86e39b19c3e5351dda063884b5d2fda67b : mutated sample `0` of `dummy_column` to new value\n", "* \u001B[1;32m|\u001B[m a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94 (\u001B[1;31mnew\u001B[m) : commit on `new` branch adding a sample to dummy_arrayset\n", "\u001B[1;32m|\u001B[m\u001B[1;32m/\u001B[m \n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Manually inspecting the merge result to verify it matches our expectations\n", "\n", "`dummy_arrayset` should contain two arrays, key `1` was set in the previous\n", "commit originally made in `new` and merged into `master`. Key `0` was\n", "mutated in `testbranch` and unchanged in `master`, so the update from\n", "`testbranch` is kept.\n", "\n", "There should be one metadata sample with they key `hello` and the value\n", "``\"world\"``." ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 1 \n", " Column Names / Partial Remote References: \n", " - dummy_column / False" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint16 \n", " Shape : (10,) \n", " Number of Samples : 2 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['dummy_column']" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59], dtype=uint16),\n", " array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=uint16)]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co['dummy_column', ['0', '1']]" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Metadata \n", " Writeable: True \n", " Number of Keys: 1\n" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.metadata" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'world'" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.metadata['hello']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**The Merge was a success!**" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Conflicts\n", "\n", "Now that we've seen merging in action, the next step is to talk about conflicts.\n", "\n", "#### How Are Conflicts Detected?\n", "\n", "Any merge conflicts can be identified and addressed ahead of running a `merge`\n", "command by using the built in [diff](api.rst#hangar.diff.WriterUserDiff) tools.\n", "When diffing commits, Hangar will provide a list of conflicts which it identifies.\n", "In general these fall into 4 categories:\n", "\n", "1. **Additions** in both branches which created new keys (samples /\n", " columns / metadata) with non-compatible values. For samples &\n", " metadata, the hash of the data is compared, for columns, the schema\n", " specification is checked for compatibility in a method custom to the\n", " internal workings of Hangar.\n", "2. **Removal** in `Master Commit/Branch` **& Mutation** in `Dev Commit / Branch`. Applies for samples, columns, and metadata identically.\n", "3. **Mutation** in `Dev Commit/Branch` **& Removal** in `Master Commit / Branch`. Applies for samples, columns, and metadata identically.\n", "4. **Mutations** on keys of both branches to non-compatible values. For\n", " samples & metadata, the hash of the data is compared; for columns, the\n", " schema specification is checked for compatibility in a method custom to the\n", " internal workings of Hangar.\n", "\n", "#### Let's make a merge conflict\n", "\n", "To force a conflict, we are going to checkout the `new` branch and set the\n", "metadata key `hello` to the value `foo conflict... BOO!`. Then if we try\n", "to merge this into the `testbranch` branch (which set `hello` to a value\n", "of `world`) we see how hangar will identify the conflict and halt without\n", "making any changes.\n", "\n", "Automated conflict resolution will be introduced in a future version of Hangar,\n", "for now it is up to the user to manually resolve conflicts by making any\n", "necessary changes in each branch before reattempting a merge operation." ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "co = repo.checkout(write=True, branch='new')" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "co.metadata['hello'] = 'foo conflict... BOO!'" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=95896880b33fc06a3c2359a03408f07c87bcc8c0'" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.commit ('commit on new branch to hello metadata key so we can demonstrate a conflict')" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=95896880b33fc06a3c2359a03408f07c87bcc8c0 (\u001B[1;31mnew\u001B[m) : commit on new branch to hello metadata key so we can demonstrate a conflict\n", "* a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94 : commit on `new` branch adding a sample to dummy_arrayset\n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**When we attempt the merge, an exception is thrown telling us there is a conflict!**" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected 3-Way Merge Strategy\n" ] }, { "ename": "ValueError", "evalue": "HANGAR VALUE ERROR:: Merge ABORTED with conflict: Conflicts(t1=[(b'l:hello', b'2=d8fa6800caf496e637d965faac1a033e4636c2e6')], t21=[], t22=[], t3=[], conflict=True)", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mco\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mmerge\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmessage\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m'this merge should not happen'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdev_branch\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m'testbranch'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/checkout.py\u001B[0m in \u001B[0;36mmerge\u001B[0;34m(self, message, dev_branch)\u001B[0m\n\u001B[1;32m 1027\u001B[0m \u001B[0mdev_branch\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mdev_branch\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1028\u001B[0m \u001B[0mrepo_path\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_repo_path\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m-> 1029\u001B[0;31m writer_uuid=self._writer_lock)\n\u001B[0m\u001B[1;32m 1030\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1031\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0masetHandle\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_columns\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalues\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/merger.py\u001B[0m in \u001B[0;36mselect_merge_algorithm\u001B[0;34m(message, branchenv, stageenv, refenv, stagehashenv, master_branch, dev_branch, repo_path, writer_uuid)\u001B[0m\n\u001B[1;32m 136\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0mValueError\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 138\u001B[0;31m \u001B[0;32mraise\u001B[0m \u001B[0me\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 139\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 140\u001B[0m \u001B[0;32mfinally\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/merger.py\u001B[0m in \u001B[0;36mselect_merge_algorithm\u001B[0;34m(message, branchenv, stageenv, refenv, stagehashenv, master_branch, dev_branch, repo_path, writer_uuid)\u001B[0m\n\u001B[1;32m 133\u001B[0m \u001B[0mrefenv\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mrefenv\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 134\u001B[0m \u001B[0mstagehashenv\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mstagehashenv\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 135\u001B[0;31m repo_path=repo_path)\n\u001B[0m\u001B[1;32m 136\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 137\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0mValueError\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/merger.py\u001B[0m in \u001B[0;36m_three_way_merge\u001B[0;34m(message, master_branch, masterHEAD, dev_branch, devHEAD, ancestorHEAD, branchenv, stageenv, refenv, stagehashenv, repo_path)\u001B[0m\n\u001B[1;32m 260\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mconflict\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mconflict\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 261\u001B[0m \u001B[0mmsg\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;34mf'HANGAR VALUE ERROR:: Merge ABORTED with conflict: {conflict}'\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 262\u001B[0;31m \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mmsg\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 263\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 264\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mmEnv\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbegin\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mwrite\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mtxn\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;31mValueError\u001B[0m: HANGAR VALUE ERROR:: Merge ABORTED with conflict: Conflicts(t1=[(b'l:hello', b'2=d8fa6800caf496e637d965faac1a033e4636c2e6')], t21=[], t22=[], t3=[], conflict=True)" ] } ], "source": [ "co.merge(message='this merge should not happen', dev_branch='testbranch')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Checking for Conflicts\n", "\n", "Alternatively, use the diff methods on a checkout to test for conflicts before attempting a merge.\n", "\n", "It is possible to diff between a checkout object and:\n", "\n", "1. Another branch ([diff.branch()](api.rst#hangar.diff.WriterUserDiff.branch))\n", "2. A specified commit ([diff.commit()](api.rst#hangar.diff.WriterUserDiff.commit))\n", "3. Changes made in the staging area before a commit is made\n", " ([diff.staged()](api.rst#hangar.diff.WriterUserDiff.staged))\n", " (for `write-enabled` checkouts only.)\n", "\n", "Or via the [CLI status tool](cli.rst#hangar-status) between the staging area and any branch/commit\n", "(only a human readable summary is produced)." ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "merge_results, conflicts_found = co.diff.branch('testbranch')" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Conflicts(t1=Changes(schema={}, samples=(), metadata=(MetadataRecordKey(key='hello'),)), t21=Changes(schema={}, samples=(), metadata=()), t22=Changes(schema={}, samples=(), metadata=()), t3=Changes(schema={}, samples=(), metadata=()), conflict=True)" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conflicts_found" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(MetadataRecordKey(key='hello'),)" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conflicts_found.t1.metadata" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The type codes for a `Conflicts` `namedtuple` such as the one we saw:\n", "\n", " Conflicts(t1=('hello',), t21=(), t22=(), t3=(), conflict=True)\n", "\n", "are as follow:\n", "\n", "- ``t1``: Addition of key in master AND dev with different values.\n", "- ``t21``: Removed key in master, mutated value in dev.\n", "- ``t22``: Removed key in dev, mutated value in master.\n", "- ``t3``: Mutated key in both master AND dev to different values.\n", "- ``conflict``: Bool indicating if any type of conflict is present." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### To resolve, remove the conflict" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=e69ba8aeffc130c57d2ae0a8131c8ea59083cb62'" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "del co.metadata['hello']\n", "# resolved conflict by removing hello key\n", "co.commit('commit which removes conflicting metadata key')" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected 3-Way Merge Strategy\n" ] }, { "data": { "text/plain": [ "'a=ef7ddf4a4a216315d929bd905e78866e3ad6e4fd'" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.merge(message='this merge succeeds as it no longer has a conflict', dev_branch='testbranch')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can verify that history looks as we would expect via the log!" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=ef7ddf4a4a216315d929bd905e78866e3ad6e4fd (\u001B[1;31mnew\u001B[m) : this merge succeeds as it no longer has a conflict\n", "\u001B[1;31m|\u001B[m\u001B[1;32m\\\u001B[m \n", "* \u001B[1;32m|\u001B[m a=e69ba8aeffc130c57d2ae0a8131c8ea59083cb62 : commit which removes conflicting metadata key\n", "* \u001B[1;32m|\u001B[m a=95896880b33fc06a3c2359a03408f07c87bcc8c0 : commit on new branch to hello metadata key so we can demonstrate a conflict\n", "\u001B[1;32m|\u001B[m * a=69a08ca41ca1f5577fb0ffcf59d4d1585f614c4d (\u001B[1;31mtestbranch\u001B[m) : added hellow world metadata\n", "\u001B[1;32m|\u001B[m * a=fcd82f86e39b19c3e5351dda063884b5d2fda67b : mutated sample `0` of `dummy_column` to new value\n", "* \u001B[1;32m|\u001B[m a=c1cf1bd6863ed0b95239d2c9e1a6c6cc65569e94 : commit on `new` branch adding a sample to dummy_arrayset\n", "\u001B[1;32m|\u001B[m\u001B[1;32m/\u001B[m \n", "* a=eaee002ed9c6e949c3657bd50e3949d6a459d50e : first commit with a single sample added to a dummy column\n" ] } ], "source": [ "repo.log()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/Tutorial-003.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Part 3: Working With Remote Servers\n", "\n", "This tutorial will introduce how to start a remote Hangar server, and how to work with [remotes](api.rst#hangar.repository.Remotes) from the client side.\n", "\n", "Particular attention is paid to the concept of a ***partially fetch* / *partial clone*** operations. This is a key component of the Hangar design which provides the ability to quickly and efficiently work with data contained in remote repositories whose full size would be significatly prohibitive to local use under most circumstances.\n", "\n", "*Note:*\n", "\n", "> At the time of writing, the API, user-facing functionality, client-server negotiation protocols, and test coverage of the remotes implementation is generally adqequate for this to serve as an \"alpha\" quality preview. However, please be warned that significantly less time has been spent in this module to optimize speed, refactor for simplicity, and assure stability under heavy loads than the rest of the Hangar core. While we can guarantee that your data is secure on disk, you may experience crashes from time to time when working with remotes. In addition, sending data over the wire should NOT be considered secure in ANY way. No in-transit encryption, user authentication, or secure access limitations are implemented at this moment. We realize the importance of these types of protections, and they are on our radar for the next release cycle. If you are interested in making a contribution to Hangar, this module contains a lot of low hanging fruit which would would provide drastic improvements and act as a good intro the the internal Hangar data model. Please get in touch with us to discuss!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Starting a Hangar Server\n", "\n", "To start a Hangar server, navigate to the command line and simply execute:\n", "\n", "```\n", "$ hangar server\n", "```\n", "\n", "This will get a local server instance running at `localhost:50051`. The IP and port can be configured by setting the `--ip` and `--port` flags to the desired values in the command line.\n", "\n", "A blocking process will begin in that terminal session. Leave it running while you experiment with connecting from a client repo." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Using Remotes with a Local Repository\n", "\n", "The [CLI](cli.rst#hangar-cli-documentation) is the easiest way to interact with the remote server from a local repository (though all functioanlity is mirrorred via the [repository API](api.rst#hangar.repository.Remotes) (more on that later).\n", "\n", "Before we begin we will set up a repository with some data, a few commits, two branches, and a merge." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Setup a Test Repo\n", "\n", "As normal, we shall begin with creating a repository and adding some data. This should be familiar to you from previous tutorials." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from hangar import Repository\n", "import numpy as np\n", "from tqdm import tqdm\n", "\n", "testData = np.loadtxt('/Users/rick/projects/tensorwerk/hangar/dev/data/dota2Dataset/dota2Test.csv', delimiter=',', dtype=np.uint8)\n", "trainData = np.loadtxt('/Users/rick/projects/tensorwerk/hangar/dev/data/dota2Dataset/dota2Train.csv', delimiter=',', dtype=np.uint16)\n", "\n", "testName = 'test'\n", "testPrototype = testData[0]\n", "trainName = 'train'\n", "trainPrototype = trainData[0]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: /Users/rick/projects/tensorwerk/hangar/dev/intro/.hangar\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/rick/projects/tensorwerk/hangar/hangar-py/src/hangar/context.py:94: UserWarning: No repository exists at /Users/rick/projects/tensorwerk/hangar/dev/intro/.hangar, please use `repo.init()` method\n", " warnings.warn(msg, UserWarning)\n" ] } ], "source": [ "repo = Repository('/Users/rick/projects/tensorwerk/hangar/dev/intro/')\n", "repo.init(user_name='Rick Izzo', user_email='rick@tensorwerk.com', remove_old=True)\n", "co = repo.checkout(write=True)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "10500it [00:02, 4286.17it/s] \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 (\u001B[1;31madd-train\u001B[m) (\u001B[1;31mmaster\u001B[m) : initial commit on master with test data\n" ] } ], "source": [ "co.add_ndarray_column(testName, prototype=testPrototype)\n", "testcol = co.columns[testName]\n", "\n", "pbar = tqdm(total=testData.shape[0])\n", "with testcol as tcol:\n", " for gameIdx, gameData in enumerate(testData):\n", " if (gameIdx % 500 == 0):\n", " pbar.update(500)\n", " tcol.append(gameData)\n", "pbar.close()\n", "\n", "co.commit('initial commit on master with test data')\n", "\n", "repo.create_branch('add-train')\n", "co.close()\n", "repo.log()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "93000it [00:22, 4078.73it/s] \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "* a=957d20e4b921f41975591cc8ee51a4a6912cb919 (\u001B[1;31madd-train\u001B[m) : added training data on another branch\n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 (\u001B[1;31mmaster\u001B[m) : initial commit on master with test data\n" ] } ], "source": [ "co = repo.checkout(write=True, branch='add-train')\n", "\n", "co.add_ndarray_column(trainName, prototype=trainPrototype)\n", "traincol = co.columns[trainName]\n", "\n", "pbar = tqdm(total=trainData.shape[0])\n", "with traincol as trcol:\n", " for gameIdx, gameData in enumerate(trainData):\n", " if (gameIdx % 500 == 0):\n", " pbar.update(500)\n", " trcol.append(gameData)\n", "pbar.close()\n", "\n", "co.commit('added training data on another branch')\n", "co.close()\n", "repo.log()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d (\u001B[1;31mmaster\u001B[m) : more changes here\n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "co = repo.checkout(write=True, branch='master')\n", "co.metadata['earaea'] = 'eara'\n", "co.commit('more changes here')\n", "co.close()\n", "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pushing to a Remote" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will use the [API remote add()](api.rst#hangar.repository.Remotes.add) method to add a remote, however, this can also be done with the [CLI command](cli.rst#hangar-remote-add):\n", "\n", " $ hangar remote add origin localhost:50051" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RemoteInfo(name='origin', address='localhost:50051')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.remote.add('origin', 'localhost:50051')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pushing is as simple as running the [push()](api.rst#hangar.repository.Remotes.push) method\n", "from the [API](api.rst#hangar.repository.Remotes.push) or [CLI](cli.rst#hangar-push):\n", "\n", " $ hangar push origin master" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Push the `master` branch:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "counting objects: 100%|██████████| 2/2 [00:00<00:00, 5.47it/s]\n", "pushing schemas: 100%|██████████| 1/1 [00:00<00:00, 133.74it/s]\n", "pushing data: 97%|█████████▋| 10001/10294 [00:01<00:00, 7676.23it/s]\n", "pushing metadata: 100%|██████████| 1/1 [00:00<00:00, 328.50it/s]\n", "pushing commit refs: 100%|██████████| 2/2 [00:00<00:00, 140.73it/s]\n" ] }, { "data": { "text/plain": [ "'master'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.remote.push('origin', 'master')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Push the `add-train` branch:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "counting objects: 100%|██████████| 1/1 [00:01<00:00, 1.44s/it]\n", "pushing schemas: 100%|██████████| 1/1 [00:00<00:00, 126.05it/s]\n", "pushing data: 99%|█████████▉| 92001/92650 [00:12<00:00, 7107.60it/s] \n", "pushing metadata: 0it [00:00, ?it/s]\n", "pushing commit refs: 100%|██████████| 1/1 [00:00<00:00, 17.05it/s]\n" ] }, { "data": { "text/plain": [ "'add-train'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.remote.push('origin', 'add-train')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Details of the Negotiation Processs\n", "\n", "> The following details are not necessary to use the system, but may be of interest to some readers\n", "\n", "When we push data, **we perform a negotation with the server** which basically occurs like this:\n", "\n", "\n", "- Hi, I would like to push this branch, do you have it?\n", "\n", " - If yes, what is the latest commit you record on it?\n", "\n", " - Is that the same commit I'm trying to push? If yes, abort.\n", "\n", " - Is that a commit I don't have? If yes, someone else has updated that branch, abort.\n", "\n", "- Here's the commit digests which are parents of my branches head, which commits are you missing?\n", "\n", "- Ok great, I'm going to scan through each of those commits to find the data hashes they contain. Tell me which ones you are missing.\n", "\n", "- Thanks, now I'll send you all of the data corresponding to those hashes. It might be a lot of data, so we'll handle this in batches so that if my connection cuts out, we can resume this later\n", "\n", "- Now that you have the data, I'm going to send the actual commit references for you to store, this isn't that much information, but you'll be sure to verify that I'm not trying to pull any funny buisness and send you incorrect data.\n", "\n", "- Now that you've received everything, and have verified it matches what I told you it is, go ahead and make those commits I've pushed `available` as the `HEAD` of the branch I just sent. It's some good work that others will want!\n", "\n", "\n", "When we want to fetch updates to a branch, essentially the exact same thing happens in reverse. Instead of asking the server what it doesn't have, we ask it what it does have, and then request the stuff that we are missing!\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Partial Fetching and Clones\n", "\n", "**Now we will introduce one of the most important and unique features of Hangar remotes: Partial fetch/clone of data!**\n", "\n", "*There is a very real problem with keeping the full history of data - **it's huge**!* The size of data can very easily exceeds what can fit on (most) contributors laptops or personal workstations. This section explains how Hangar can handle working with columns which are prohibitively large to download or store on a single machine.\n", "\n", "As mentioned in High Performance From Simplicity, under the hood Hangar deals with “Data” and “Bookkeeping” completely separately. We’ve previously covered what exactly we mean by Data in How Hangar Thinks About Data, so we’ll briefly cover the second major component of Hangar here.\n", "In short “Bookkeeping” describes everything about the repository. By everything, we do mean that the Bookkeeping records describe everything: all commits, parents, branches, columns, samples, data descriptors, schemas, commit message, etc. Though complete, these records are fairly small (tens of MB in size for decently sized repositories with decent history), and are highly compressed for fast transfer between a Hangar client/server.\n", "\n", "A brief technical interlude\n", "\n", "> There is one very important (and rather complex) property which gives Hangar Bookeeping massive power: existence of some data piece is always known to Hangar and stored immutably once committed. However, the access pattern, backend, and locating information for this data piece may (and over time, will) be unique in every hangar repository instance.\n", ">\n", "> Though the details of how this works is well beyond the scope of this document, the following example may provide some insight into the implications of this property:\n", ">\n", "> If you clone some Hangar repository, Bookeeping says that “some number of data pieces exist” and they should retrieved from the server. However, the bookeeping records transfered in a fetch / push / clone operation do not include information about where that piece of data existed on the client (or server) computer. Two synced repositories can use completly different backends to store the data, in completly different locations, and it does not matter - Hangar only guarantees that when collaborators ask for a data sample in some checkout, that they will be provided with identical arrays, not that they will come from the same place or be stored in the same way. Only when data is actually retrieved the “locating information” is set for that repository instance.\n", "Because Hangar makes no assumptions about how/where it should retrieve some piece of data, or even an assumption that it exists on the local machine, and because records are small and completely describe history, once a machine has the Bookkeeping, it can decide what data it actually wants to materialize on its local disk! These partial fetch / partial clone operations can materialize any desired data, whether it be for a few records at the head branch, for all data in a commit, or for the entire historical data. A future release will even include the ability to stream data directly to a Hangar checkout and materialize the data in memory without having to save it to disk at all!\n", "\n", "More importantly: since Bookkeeping describes all history, merging can be performed between branches which may contain partial (or even no) actual data. Aka **you don’t need data on disk to merge changes into it.** It’s an odd concept which will be shown in this tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Cloning a Remote Repo" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " $ hangar clone localhost:50051" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/rick/projects/tensorwerk/hangar/hangar-py/src/hangar/context.py:94: UserWarning: No repository exists at /Users/rick/projects/tensorwerk/hangar/dev/dota-clone/.hangar, please use `repo.init()` method\n", " warnings.warn(msg, UserWarning)\n" ] } ], "source": [ "cloneRepo = Repository('/Users/rick/projects/tensorwerk/hangar/dev/dota-clone/')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When we perform the initial clone, we will only receive the `master` branch by default." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "fetching commit data refs: 0%| | 0/2 [00:00 does not exist on this machine. Perform a `data-fetch` operation to retrieve it from the remote server.", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mFileNotFoundError\u001B[0m Traceback (most recent call last)", "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mco\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcolumns\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'test'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mtestKey\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/columns/layout_flat.py\u001B[0m in \u001B[0;36m__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 222\u001B[0m \"\"\"\n\u001B[1;32m 223\u001B[0m \u001B[0mspec\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_samples\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 224\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_be_fs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mspec\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbackend\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mread_data\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mspec\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 225\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 226\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mget\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mkey\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0mKeyType\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdefault\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mNone\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/backends/remote_50.py\u001B[0m in \u001B[0;36mread_data\u001B[0;34m(self, hashVal)\u001B[0m\n\u001B[1;32m 172\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mread_data\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mhashVal\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0mREMOTE_50_DataHashSpec\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m->\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 173\u001B[0m raise FileNotFoundError(\n\u001B[0;32m--> 174\u001B[0;31m \u001B[0;34mf'data hash spec: {REMOTE_50_DataHashSpec} does not exist on this machine. '\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 175\u001B[0m f'Perform a `data-fetch` operation to retrieve it from the remote server.')\n\u001B[1;32m 176\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;31mFileNotFoundError\u001B[0m: data hash spec: does not exist on this machine. Perform a `data-fetch` operation to retrieve it from the remote server." ] } ], "source": [ "co.columns['test'][testKey]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Fetching Data from a Remote\n", "\n", "To retrieve the data, we use the [fetch_data()](api.rst#hangar.repository.Remotes.fetch_data)\n", "method (accessible via the [API](api.rst#hangar.repository.Remotes.fetch_data) or\n", "[fetch-data](cli.rst#hangar-fetch-data) via the CLI).\n", "\n", "The amount / type of data to retrieve is extremly configurable via the following options:\n", "\n", ".. include:: ./noindexapi/apiremotefetchdata.rst\n", "\n", "This will retrieve all the data on the `master` branch, but not on the `add-train` branch." ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "counting objects: 100%|██████████| 1/1 [00:00<00:00, 27.45it/s]\n", "fetching data: 100%|██████████| 10294/10294 [00:01<00:00, 6664.60it/s]\n" ] }, { "data": { "text/plain": [ "['a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d']" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cloneRepo.remote.fetch_data('origin', branch='master')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * Checking out BRANCH: master with current HEAD: a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d\n" ] } ], "source": [ "co = cloneRepo.checkout(branch='master')" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar ReaderCheckout \n", " Writer : False \n", " Commit Hash : a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d \n", " Num Columns : 1 \n", " Num Metadata : 1\n" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Unlike before, we see that there is no partial references from the `repr`" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : False \n", " Number of Columns : 1 \n", " Column Names / Partial Remote References: \n", " - test / False" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleReader \n", " Column Name : test \n", " Writeable : False \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (117,) \n", " Number of Samples : 10294 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns['test']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "***When we access the data this time, it is available and retrieved as requested!***" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([255, 223, 8, 2, 0, 255, 0, 0, 0, 0, 0, 0, 1,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 255, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 255, 0, 0,\n", " 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", " dtype=uint8)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co['test', testKey]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Working with mixed local / remote checkout Data\n", "\n", "If we were to checkout the `add-train` branch now, we would see that there is no `arrayset \"train\"` data, but there will be data common to the ancestor that `master` and `add-train` share." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=957d20e4b921f41975591cc8ee51a4a6912cb919 (\u001B[1;31madd-train\u001B[m) (\u001B[1;31morigin/add-train\u001B[m) : added training data on another branch\n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "cloneRepo.log('add-train')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this case, the common ancestor is commit: `9b93b393e8852a1fa57f0170f54b30c2c0c7d90f`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To show that there is no data on the `add-train` branch:" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * Checking out BRANCH: add-train with current HEAD: a=957d20e4b921f41975591cc8ee51a4a6912cb919\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/rick/projects/tensorwerk/hangar/hangar-py/src/hangar/columns/constructors.py:45: UserWarning: Column: train contains `reference-only` samples, with actual data residing on a remote server. A `fetch-data` operation is required to access these samples.\n", " f'operation is required to access these samples.', UserWarning)\n" ] } ], "source": [ "co = cloneRepo.checkout(branch='add-train')" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar ReaderCheckout \n", " Writer : False \n", " Commit Hash : a=957d20e4b921f41975591cc8ee51a4a6912cb919 \n", " Num Columns : 2 \n", " Num Metadata : 0\n" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : False \n", " Number of Columns : 2 \n", " Column Names / Partial Remote References: \n", " - test / False\n", " - train / True" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co.columns" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([255, 223, 8, 2, 0, 255, 0, 0, 0, 0, 0, 0, 1,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 0, 0, 0, 255, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 255, 0, 0,\n", " 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 1, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n", " dtype=uint8)" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "co['test', testKey]" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "trainKey = next(co.columns['train'].keys())" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "data hash spec: does not exist on this machine. Perform a `data-fetch` operation to retrieve it from the remote server.", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mFileNotFoundError\u001B[0m Traceback (most recent call last)", "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mco\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mcolumns\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'train'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mtrainKey\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/columns/layout_flat.py\u001B[0m in \u001B[0;36m__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 222\u001B[0m \"\"\"\n\u001B[1;32m 223\u001B[0m \u001B[0mspec\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_samples\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mkey\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 224\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_be_fs\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mspec\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbackend\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mread_data\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mspec\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 225\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 226\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mget\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mkey\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0mKeyType\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdefault\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mNone\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/projects/tensorwerk/hangar/hangar-py/src/hangar/backends/remote_50.py\u001B[0m in \u001B[0;36mread_data\u001B[0;34m(self, hashVal)\u001B[0m\n\u001B[1;32m 172\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mread_data\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mhashVal\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0mREMOTE_50_DataHashSpec\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m->\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 173\u001B[0m raise FileNotFoundError(\n\u001B[0;32m--> 174\u001B[0;31m \u001B[0;34mf'data hash spec: {REMOTE_50_DataHashSpec} does not exist on this machine. '\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 175\u001B[0m f'Perform a `data-fetch` operation to retrieve it from the remote server.')\n\u001B[1;32m 176\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;31mFileNotFoundError\u001B[0m: data hash spec: does not exist on this machine. Perform a `data-fetch` operation to retrieve it from the remote server." ] } ], "source": [ "co.columns['train'][trainKey]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "co.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merging Branches with Partial Data\n", "\n", "Even though we don't have the actual data references in the `add-train` branch, it is still possible to merge the two branches!\n", "\n", "This is possible because Hangar doesn't use the data contents in its internal model of checkouts / commits, but instead thinks of a checkouts as a sequence of columns / metadata / keys & their associated data hashes (which are very small text records; ie. \"bookkeeping\"). To show this in action, lets merge the two branches `master` (containing all data locally) and `add-train` (containing partial remote references for the `train` arrayset) together and push it to the Remote!" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d (\u001B[1;31mmaster\u001B[m) (\u001B[1;31morigin/master\u001B[m) : more changes here\n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "cloneRepo.log('master')" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=957d20e4b921f41975591cc8ee51a4a6912cb919 (\u001B[1;31madd-train\u001B[m) (\u001B[1;31morigin/add-train\u001B[m) : added training data on another branch\n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "cloneRepo.log('add-train')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Perform the Merge**" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected 3-Way Merge Strategy\n" ] }, { "data": { "text/plain": [ "'a=ace3dacbd94f475664ee136dcf05430a2895aca3'" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cloneRepo.merge('merge commit here', 'master', 'add-train')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**IT WORKED!**" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=ace3dacbd94f475664ee136dcf05430a2895aca3 (\u001B[1;31mmaster\u001B[m) : merge commit here\n", "\u001B[1;31m|\u001B[m\u001B[1;32m\\\u001B[m \n", "* \u001B[1;32m|\u001B[m a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d (\u001B[1;31morigin/master\u001B[m) : more changes here\n", "\u001B[1;32m|\u001B[m * a=957d20e4b921f41975591cc8ee51a4a6912cb919 (\u001B[1;31madd-train\u001B[m) (\u001B[1;31morigin/add-train\u001B[m) : added training data on another branch\n", "\u001B[1;32m|\u001B[m\u001B[1;32m/\u001B[m \n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "cloneRepo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can check the summary of the master commit to check that the contents are what we expect (containing both `test` and `train` columns)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary of Contents Contained in Data Repository \n", " \n", "================== \n", "| Repository Info \n", "|----------------- \n", "| Base Directory: /Users/rick/projects/tensorwerk/hangar/dev/dota-clone \n", "| Disk Usage: 42.03 MB \n", " \n", "=================== \n", "| Commit Details \n", "------------------- \n", "| Commit: a=ace3dacbd94f475664ee136dcf05430a2895aca3 \n", "| Created: Tue Feb 25 19:18:30 2020 \n", "| By: rick izzo \n", "| Email: rick@tensorwerk.com \n", "| Message: merge commit here \n", " \n", "================== \n", "| DataSets \n", "|----------------- \n", "| Number of Named Columns: 2 \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"test\", layout=\"flat\") \n", "| Num Data Pieces: 10294 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (117,) \n", "| - dtype: uint8 \n", "| - backend: 10 \n", "| - backend_options: {} \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"train\", layout=\"flat\") \n", "| Num Data Pieces: 92650 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (117,) \n", "| - dtype: uint16 \n", "| - backend: 10 \n", "| - backend_options: {} \n", " \n", "================== \n", "| Metadata: \n", "|----------------- \n", "| Number of Keys: 1 \n", "\n" ] } ], "source": [ "cloneRepo.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pushing the Merge back to the Remote\n", "\n", "To push this merge back to our original copy of the Repository (`repo`), we just push the `master` branch back to the remote via the API or CLI." ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "counting objects: 100%|██████████| 1/1 [00:00<00:00, 1.02it/s]\n", "pushing schemas: 0it [00:00, ?it/s]\n", "pushing data: 0it [00:00, ?it/s]\n", "pushing metadata: 0it [00:00, ?it/s]\n", "pushing commit refs: 100%|██████████| 1/1 [00:00<00:00, 34.26it/s]\n" ] }, { "data": { "text/plain": [ "'master'" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cloneRepo.remote.push('origin', 'master')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looking at our current state of our other instance of the repo `repo` we see that the merge changes aren't yet propogated to it (since it hasn't fetched from the remote yet)." ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d (\u001B[1;31mmaster\u001B[m) (\u001B[1;31morigin/master\u001B[m) : more changes here\n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To fetch the merged changes, just [fetch()](api.rst#hangar.repository.Remotes.fetch) the branch as normal. Like all fetches, this will be a fast operation, as it will be a `partial fetch` operation, not actually transfering the data." ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "fetching commit data refs: 100%|██████████| 1/1 [00:01<00:00, 1.33s/it]\n", "fetching commit spec: 100%|██████████| 1/1 [00:00<00:00, 37.61it/s]\n" ] }, { "data": { "text/plain": [ "'origin/master'" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.remote.fetch('origin', 'master')" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=ace3dacbd94f475664ee136dcf05430a2895aca3 (\u001B[1;31morigin/master\u001B[m) : merge commit here\n", "\u001B[1;31m|\u001B[m\u001B[1;32m\\\u001B[m \n", "* \u001B[1;32m|\u001B[m a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d (\u001B[1;31mmaster\u001B[m) : more changes here\n", "\u001B[1;32m|\u001B[m * a=957d20e4b921f41975591cc8ee51a4a6912cb919 (\u001B[1;31madd-train\u001B[m) (\u001B[1;31morigin/add-train\u001B[m) : added training data on another branch\n", "\u001B[1;32m|\u001B[m\u001B[1;32m/\u001B[m \n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "repo.log('origin/master')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To bring our `master` branch up to date is a simple fast-forward merge." ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Selected Fast-Forward Merge Strategy\n" ] }, { "data": { "text/plain": [ "'a=ace3dacbd94f475664ee136dcf05430a2895aca3'" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.merge('ff-merge', 'master', 'origin/master')" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=ace3dacbd94f475664ee136dcf05430a2895aca3 (\u001B[1;31mmaster\u001B[m) (\u001B[1;31morigin/master\u001B[m) : merge commit here\n", "\u001B[1;31m|\u001B[m\u001B[1;32m\\\u001B[m \n", "* \u001B[1;32m|\u001B[m a=bb1b108ef17b7d7667a2ff396f257d82bad11e1d : more changes here\n", "\u001B[1;32m|\u001B[m * a=957d20e4b921f41975591cc8ee51a4a6912cb919 (\u001B[1;31madd-train\u001B[m) (\u001B[1;31morigin/add-train\u001B[m) : added training data on another branch\n", "\u001B[1;32m|\u001B[m\u001B[1;32m/\u001B[m \n", "* a=b98f6b65c0036489e53ddaf2b30bf797ddc40da0 : initial commit on master with test data\n" ] } ], "source": [ "repo.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Everything is as it should be!** Now, try it out for yourself!" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary of Contents Contained in Data Repository \n", " \n", "================== \n", "| Repository Info \n", "|----------------- \n", "| Base Directory: /Users/rick/projects/tensorwerk/hangar/dev/intro \n", "| Disk Usage: 77.43 MB \n", " \n", "=================== \n", "| Commit Details \n", "------------------- \n", "| Commit: a=ace3dacbd94f475664ee136dcf05430a2895aca3 \n", "| Created: Tue Feb 25 19:18:30 2020 \n", "| By: rick izzo \n", "| Email: rick@tensorwerk.com \n", "| Message: merge commit here \n", " \n", "================== \n", "| DataSets \n", "|----------------- \n", "| Number of Named Columns: 2 \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"test\", layout=\"flat\") \n", "| Num Data Pieces: 10294 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (117,) \n", "| - dtype: uint8 \n", "| - backend: 10 \n", "| - backend_options: {} \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"train\", layout=\"flat\") \n", "| Num Data Pieces: 92650 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (117,) \n", "| - dtype: uint16 \n", "| - backend: 10 \n", "| - backend_options: {} \n", " \n", "================== \n", "| Metadata: \n", "|----------------- \n", "| Number of Keys: 1 \n", "\n" ] } ], "source": [ "repo.summary()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/Tutorial-Dataset.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "CQhd0TTQCMeh" }, "source": [ "## Dataloaders for Machine Learning (Tensorflow & PyTorch)\n", "\n", "This tutorial acts as a step by step guide for fetching, preprocessing, storing and loading the [MS-COCO](http://cocodataset.org/#home) dataset for image captioning using deep learning. We have chosen **image captioning** for this tutorial not by accident. For such an application, the dataset required will have both fixed shape (image) and variably shaped (caption because it's sequence of natural language) data. This diversity should help the user to get a mental model about how flexible and easy is to plug Hangar to the existing workflow.\n", "\n", "You will use the MS-COCO dataset to train our model. The dataset contains over 82,000 images, each of which has at least 5 different caption annotations.\n", "\n", "This tutorial assumes you have downloaded and extracted the [MS-COCO dataset](http://cocodataset.org/#home) in the current directory. If you haven't yet, shell commands below should help you do it (beware, it's about 14 GB data). If you are on Windows, please find the equivalent commands to get the dataset downloaded.\n", "\n", "\n", "```bash\n", "wget http://images.cocodataset.org/zips/train2014.zip\n", "unzip train2014.zip\n", "rm train2014.zip\n", "wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip\n", "unzip annotations_trainval2014.zip\n", "rm annotations_trainval2014.zip\n", "```\n", "\n", "Let's install the required packages in our environment. We will be using Tensorflow 1.14 in this tutorial but it should work in all the Tensorflow versions starting from 1.12. But do let us know if you face any hiccups. Install below-given packages before continue. Apart from Tensorflow and Hangar, we use [SpaCy](https://spacy.io/) for pre-processing the captions. SpaCy is probably the most widely used natural language toolkit now.\n", "\n", "```bash\n", "tensorflow==1.14.0\n", "hangar\n", "spacy==2.1.8\n", "```\n", "\n", "One more thing before jumping into the tutorial: we need to download the SpaCy English model `en_core_web_md` which cannot be dynamically loaded. Which means that it must be downloaded with the below command outside this runtime and should reload this runtime.\n", "\n", "```bash\n", "python -m spacy download en_core_web_md\n", "```\n", "\n", "Once all the dependencies are installed and loaded, we can start building our hangar repository.\n", "\n", "\n", "### Hangar Repository creation and column init\n", "We will create a repository and initialize one column named `images` now for a quick demo of how Tensorflow dataloader work. Then we wipe the current repository and create new columns for later portions." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "HGXOwLJ3IWPq" }, "outputs": [], "source": [ "repo_path = 'hangar_repo'\n", "username = 'hhsecond'\n", "email = 'sherin@tensorwerk.com'\n", "img_shape = (299, 299, 3)\n", "image_dir = '/content/drive/My Drive/train2014'\n", "annotation_file = ''\n", "import logging\n", "logging.getLogger(\"tensorflow\").setLevel(logging.ERROR)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "fHehOEhwCMej", "outputId": "210f9b87-9c59-49ea-fd31-92ba18d140b3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: hangar_repo/.hangar\n" ] } ], "source": [ "import os\n", "from hangar import Repository\n", "import tensorflow as tf\n", "import numpy as np\n", "\n", "tf.compat.v1.enable_eager_execution()\n", "\n", "\n", "if not os.path.isdir(repo_path):\n", " os.mkdir(repo_path)\n", "\n", "repo = Repository(repo_path)\n", "repo.init(user_name=username, user_email=email, remove_old=True)\n", "co = repo.checkout(write=True)\n", "\n", "images_column = co.add_ndarray_column('images', shape=img_shape, dtype=np.uint8,)\n", "co.commit('column init')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "QENDY8LvGGhb" }, "source": [ "### Add sample images\n", "Here we add few images to the repository and show how we can load this data as Tensorflow dataloader. We use the idea we learn here in the later portions to build a fully fledged training loop." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "g61tY81hHr8c" }, "outputs": [], "source": [ "import os\n", "from PIL import Image\n", "\n", "\n", "co = repo.checkout(write=True)\n", "images_column = co.columns['images']\n", "try:\n", " for i, file in enumerate(os.listdir(image_dir)):\n", " pil_img = Image.open(os.path.join(image_dir, file))\n", " if pil_img.mode == 'L':\n", " pil_img = pil_img.convert('RGB')\n", " img = pil_img.resize(img_shape[:-1])\n", " img = np.array(img)\n", " images_column[i] = img\n", " if i != 0 and i % 2 == 0: # stopping at 2th image\n", " break\n", "except Exception as e:\n", " print('Exception', e)\n", " co.close()\n", " raise e\n", "co.commit('added image')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "dvFci5P8Lm7C" }, "source": [ "### Let's make a Tensorflow dataloader\n", "Hangar provides `make_numpy_dataset`, `make_tensorflow_dataset` & `make_torch_dataset` for creating Tensorflow & PyTorch datasets from Hangar columns. You can read more about it in the [documentation](https://hangar-py.readthedocs.io/en/latest/api.html#ml-framework-dataloaders). Next we'll make a Tensorflow dataset and loop over it to make sure we have got a proper Tensorflow dataset." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "Sc7XGXMVLuDO" }, "outputs": [], "source": [ "from hangar.dataset import make_tensorflow_dataset" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 357 }, "colab_type": "code", "id": "tb5g_JrJVbqT", "outputId": "a8fe4e7d-243d-4dae-dc94-66364342a913" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * Checking out BRANCH: master with current HEAD: b769f6d49a7dbb3dcd4f7c6e1c2a32696fd4128f\n", "(repo_pth=hangar_repo/.hangar, aset_name=images, default_schema_hash=b6edf0320f20, isVar=False, varMaxShape=(299, 299, 3), varDtypeNum=2, mode=r)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/hangar/dataloaders/tfloader.py:88: UserWarning: Dataloaders are experimental in the current release.\n", " warnings.warn(\"Dataloaders are experimental in the current release.\", UserWarning)\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQUAAAD8CAYAAAB+fLH0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzsvGmQJFd57/3LrKx937eu3rfp7pme\nTSPNIo1GGgkhWUiAAGFWs5nF4Gtsv8Zm032xMS9gVoMBGxAYLEAgJCEb7RrNII00+9I93T3dPb13\nVXXte1ZVLvfD6BLXEe8N60ZYcXHE/L5lxsk85zyZzz+f85yTR9B1nStc4QpX+J+I/7cbcIUrXOF3\niyuicIUrXOHfcUUUrnCFK/w7rojCFa5whX/HFVG4whWu8O+4IgpXuMIV/h2vmCgIgnCLIAgzgiDM\nCYLwsVeqnitc4Qr/uQivxDoFQRAMwEXgJmAVOA68Wdf1C//plV3hClf4T+WVihR2AXO6rl/Sdb0F\n/AS44xWq6wpXuMJ/ItIrdN84sPK/HK8CV//vCjtsFj0ScAKgKCq6rqHrOoIooCgqJpOZdquFIIqY\njEYEQUDXdVRVQ9N0DAYR0Gm3FcwmM7V6HZfLgdyUMRnNoGvogCCItFtNTGYjrVaLdruNZDACOqIo\nIhpEJMmILDcRRQNGoxFFaWMymahWa9hsNhSliarqgIhBlBAMOu12G4MoIRkN6JpOq9XCbLEiCgKS\nwUCxVMTpdKJqbQTdQKvdxGCQMEgGVFVDEAQMBgPtdguTyYyIgChKNOTaS2VULGYz7XYbo0FCF0Uq\nxQJOhwMNAUEU0TQdAWgrCibT5T5dvq8RVdPQNQ1dAMlgAAR0VaUhNzAYDBiky6+BZJAQBBB1kVRq\nFUHT8IRDSCYbqAoIICKCeNn+mqaiaRq6pmMymzAYDMiyjFGS0AH0y21AAHQBQeByH4xGGnIDk9mM\nKBgwGAxoukq71UKSTJfbIhmoVauYTGYsZivLyyuUqjX6+zuRDCYUpY0kGQFotZpYrdbfPlNN0ZAk\nCdEgIYgGjEYQMbxkB5nV1TTxrhi6BqJgQGm3kYwGAFT18rUgoLSbNJtNLBYHgqih6yCI6uUyBglU\ngUw2i9PtwmIxo2kaICK+1Od2u4UkSfzPaFwUL9u32ZQxGk0IgoCqKAiiADqomvbbPl1+frx07eW6\nLxsSBAFURb38zigKJpMJTdPQNA1JMiEKIqqmIggaqqaiaxqVSpmVVCWr63rwP3LeV0oU/kMEQXgf\n8D4Aj9PC33zkdiqVEpIksbywitlqIxiNMX9plo5YGKWtUa3WcLvd2O12pqYuYDCYuP32W/nM//sp\n3vWu97C0tEIoGGdpaZ5Go4HH60TVFUaGN6EoGtVag2x6DaPRSH/f4EsW0HjooYfo7u6mJ9HHt751\nL5/5yqeZnLhAOp3huv27KRRy+P1BctkycrOErguEwj6qlRZNWcFsNpIrFLGYvPgDDowWM8nkGmq7\nTSTg5/zFJTriYbS2Rmb2LIV8g46BUbo3deBweUmn0zgsZk6dOMHpE+cwqSJvevMbCXeHkdUmiga1\nhozfebk/Ykulpmvkk2keu/8BbrjlIN5IkEqzQaupoIlGOiJRAoEA7XaTZrNNrVaiXK7idnuJRyO0\n5TImm5NCoYDT5+DE8VPs2bkdo9HIT771j7gjO9h8zVZ+8MWvs//mLWSLBm55/R1MTp1n55YttIBz\n58+wc8suCuUSkxfOcN3+G1heXGJkZBMzU1PYbDZUtc3p0yfp6enD7w3Q1dXFgw8+yGvfcBeVSgXR\nABPnL+BwuRAFiUQiQWYjhaqrBII+jGaRoe1v4guf/APe9+nP8dcfeDcOW5DBER/btm0jl5Eplkr0\n9XZjNBqZuzhF0O1mZX2e3pEB2oqA39PNn3/oY1xaneajf/QeLHYLs+urZFeT/P5b309Xb4yNTJJc\nuoLFYqPWqGMxGTHbdSSrkdZahUDEwyOPPc3pYyfZvf9abjx4PWanhKgbQBU5fPgwbU1l+/YtaJpG\nq9UC0YDdbqeQy2ISBQqVKh2JGIV8CY/HRzabJxhyUyyUsdkcpNMZAgEfitJCVVUi0SDlSpHcRoZE\nZz9ra2na7SbRaBS1VcPhdKLrOtOzFwkEglQqFUKhIOvJVfK5Itu3byeXy+H2OGk0GvTued/Sy/HN\nV2r4sAYk/pfjjpfO/RZd17+j6/pOXdd3OmwmlpZWkOUW2WweXVex2a1ks1li0Q7QRVwuN9FonFKp\ngslkwuPxoGkaU9OTHDhwIzPTc9RqNWq1CkaTBIKO1WrF5/NRKBRot9ts3boVu92OyWihVKogmS2U\nijXuev3dtFsqPp+PW24+QDK5RiwWY3Cwn2q1iihKJJNpNFXEbLbidLopl2QETFjNEucnzqEoCugK\ntVKRarlIo9qgp6Mbv8NPOBKn2mgRCAfYSGZoNmWcLjOSRSSZSdPWdboGBrj2wH7e/Pa72bt3L/98\n7485e+okrWYdm0UivbqOrus4HA4mpy5QLhTxeFxs3baZp554lIDHjd3mxGixYrfbOfLccyiKgqZp\n+Hw+isUifX09KK0mVrORSqVCq9Wi1VZBFLjuwD5WVhewmU2szyxy5+vuYmBolPf/+Z+wY88efvmL\nXxDyB+hNdFNr1Dl3/gwel5PJyUl8Pg9btowxMTGBw2FnYvIcmqaQz2ep1Spce91efD4P8XicCxcu\nEIlEmDh3jpYsk0mlGRsbQ9d1ItEQL7xwmGKxwOzMLKm1NBbFTFg08tZ3voND3/8B1123jze+6XVs\nHd9FLtuk1qgSjgQAkGWZcDiK0+mkr3sU2lZEwYQn0cfy8gXu+8H3KWbT7D6wl3e+9Z3cfPMN/Nuv\nHuHi1Dwuu49GrYbf7ycUCeO021HbGrFglJ8+8HPuuP1dfP3r3+Mzf/0xXn37zWSLKXKFHBfnZkmn\nM9x4400EA2EWly6RSq/TbDYxm600Gk28Li/JZJKBgSFy2QJGo5FCIYfL5SCdymE2W6lWy4iihs3m\nQJZbWK1W6vU6RulyhFipVEgkEjidTkCjVm9y/vwElUqVRCyOJIgEfX5MEvg9broScWamJjFJImqr\nSa1ceNnO+0olGiUuJxpv5LIYHAd+X9f1yf+/8t0xr/7e27fjdDqp1+tUKyk8viDBaIJ2S8DvdZLN\nZjGbrKyuruJ0Omm2GkSjIeqNMi57AFluEYtFOHXmML193WgqVCp1Ep2dHHr6Wfr7B9F0gdTaAsFA\nnPVUmgM3HiCd3kCWWyhtmXg4QC6Xw2A2oaoqTqeX1ZU1cvkUb3jj67g0t0E4HObkiXMMb+pBURSM\nYpH5xVVC4R5sJgMbhQoSArVaDV8oyIlzZ7hq21XIlSY2pUTZYCHWGaSUXGPq/AXc/iE6+4fweFws\nrszRquXxRH2IZoGIOc6n/9tfIjcUPvf3X6BUy5Mr5xke3ITf7eLEiRdJr13CoMFGusANd76ZuaUF\nevp6ELQ2WlvB4fAwMzXN0EgvhVyero4uzpw9wdaxLZw4ewHRYCLeE6fVahFz2Lg4d4lf/sPf8+Gv\nfxmn5iCn5nj0Z7/kjz/6J/zxu/+Y/+cT91DUKnR2dtCSG9QbGk25hskM7baEprZxOu2YDBKFYg7Q\nsNttrK8naVYVTEYLTqeTRG838/OzBAIByrUqPn+QdDpNR9RDS24hNwQcDjuSw8zJI4cJhHsJRSI0\nlSxWsw9dqOHxxFEVA81mAwEFWW7QkQixeHEGm8WKy+fF43Vz/b7X8s2vf5FqI89vDr/IzXfcic0s\nYjE7UdpNTKY6jUaNp546xY5rdjKyZYTUUop7/uLzZHNlfnXyB2SXNliYm8cT7cZmMlEuZmnKKi67\nh+9/59u8/R3vQBcFjOY2ug5GyUpT0SmXCgz39VEpZpGsPhS1iSCoyLKMpisoLYlQ2Icg6ChqE7ku\noOkKyeQa1VqRSCRMdmOd8W27XhIQI1abGb1toFotk06n8Xhd+P2XP5KlcoZAIECj0WZ1OYPL7cBs\nlkill9n7xr85qev6zv/If1+RSEHXdQX4I+AxYAr42f9OEODyWCsa62DT6Ag9fb0MbtpGIjGE0+qn\nXi2ycGmNQqFAo9FEEAwoioIgCNhdLtRmgwsT50itL3Hk0LMU8y0k0c6FqTkkg4mgL87evdciGtro\nuozDYcfj8bJvzx7m56bxe324XXZqlSyNZg2L00JPVxcOWxhZ1ojG3cSjUc6dmqJSlZFl6Or0k0yt\nogsa5ZqR/sFttHUdg8mO0WCi2VYJhhJ4XSFG+odZW04jGS14+sZweXzUqyqbd+ylY2CEzr4YDqeR\nQ089ik2S6O3qZ6RnjHpaoa42ufP9d/OeT34QzS6QTa/z8H33c+jBB3nuiccopLMoeAl1D7P/lps4\n+fwLbN80hkWwEo91E48n8Pu9DI8MITcUeru6OXfuDF5/mI1Cle1bx1hdmqNZlUlEYigGC5VSnt7B\nQZR6i/VclrXZJB/68Af50he+yPv+5MMk+jtZnbqEUteYnbpIuVik1VSQZQG320M0EkcXoFGvYxAk\n4rEu0skcEX+cnp4uOrpjBCNBVlYWiMUi2GwWQn4fQZ+DaMhFNieTLykEO0J0DXZx/OgJbr3pIE6/\nke9+66u4PSEEg0C7aURptWk1ywQCPgTquH0SK8k1bA4Hjz/zOF/58r20WhJ/99W/xhGwMjQySjjh\npqcvRktXyeQWaDQzZHMNesYPkC81+eLffpF7v/UPdPWGueXOq3jNm64hP6dQ2JAJ+CNYkdFbFQSt\njdNqx+d1c/fdb+DZQ8/gd7sRjWY8Xj+IOiZJIeB30lSa2Lwu5i6d459/9F1EUcTpdGIx26jJZUwW\nK+vrG6CbSafTNOsN+nq68Lq8lIp1dNGO3Lj80VMUBVUR0DCgqA18ASs2h4aADVmW0QUzHm+cekMn\nU1zH4bFispkYGdnysv33FYkU/k9JRLz65/70TRiNRvL5LJ2JMEefP0Ys1kEk5uKpJw+x+5prEQUr\nui5Qr5cJhjw0ag2aSoVmXWcjuYzb7eXY8VO86tZb0HUVVWtiMzlotNoEQy5MBiOqqjM5cRaX243F\n5qanr5fkepqAL0SzUWN+8RJ2ixmXx0+5WiOzscJg3zCNRo1wvIOFhUWS62vs3befUKSD9ZV1BINE\nW1WQizlcXgeNRhOH3QPAwqUpduzeR2ojgyAItOQGaqtNu90mHAuzvJrEZnUiahp2s4lcZoNAIMB6\nMoPD62agv5O15DoWm4tqqYjdZqNeyfKNr34NuQrf/dGPOXbqKLVyhuzKOjomovFOPF0xOjs7+N63\nvsOffPTP0DSNWjnDs88eZt+1B5mZmWFkdJBquYjTE8FgNpFaT/LkAz9nfGiQ0Gg/WhO2bt3KenqV\npx9/nN3XXMfw2GYefvBB9u25FlVqM3lhHq/Xhy/ooVSs4PN40TSN2ZkpNm8e5aGHHuKNr3s9+XyW\nTWNbmZqeYmpqhv6hXlRVxe/xk81mCYfDOBwOnnzmaW6+6RbUpkwmm+TGgx/kzz54gP0H7yDRGaeh\niShqi+XFRQ4fOcT+/fuJhoIYTEYMRhPhUBSt1WbT4E386t/+AZfTSyAcYvL8KYYHNzEze4p4Ty+S\n6ABRoF4p8/rb3s3WreP8048+yeLSHHarj0ymhaZWCfojTFx4HgQJjy9KJDiA0SLgcTuZmjqPomh0\nRrtZXl7i0qU5duzbBYDVbEGSJMrlIq22TCTcgdpWqNdl/L4gP/nJT7nllltYSy0RjycI+gNMTEwQ\ni0VwWB3Iskw+nyUYCVIuVzFLRsrlMoFAAIvFwtmJ5wiGutBUCZMZrJKFaqOIw+0iub6BxWxD09tE\no1GKxRK1Wo3NN3/k/16k8H+KUTJSKuTIbqyzvHiJudklXE4fVoubfK7CyKZxMpk8VpuBWj1LvVEm\nky6QWl+no6ubixfnMJuNKIrMnt07KReKoIIkiAiiTrlQQm1pGAwGjr9wio5ECK/Xxuz0LOvrq4gG\n+OQnP4MuGPD6/Dg8NnK5HEajkVAwzupKmnZLRJbbuFwebrnpIOfOnuAH934Tt92E02YkubIEBhEQ\nWVq+xMXZCSYmz+D1+3n83/6VXDKJ2JSRtBaNaoFGvU56Pc2+a65iZXGKsc39PPDQz9hx1XbOnT9F\nX0+USiGPKIo0alVaDRmbw4vN5cYXCvNHf/JR3vKut/DBt7+FuRdeRMvXKcsK3/veU5x78TTDvd3k\n0uu8+fffytLSEsVikTOnJ9k0vBnJqKHSwu8Ls7acQhKaaGqTYMBJo1Zj/Kpd9Pb0UG+UmZ69iCia\n0IC63OSH3/0h267ayd9/7atMTExgNoi4XJcTXhaT+beZ+56ePpxON+98xztIra1z+vRZzp47zXo6\nRTASxGJ10dM9SLUuMzY2xvHjJ/n4xz/Jrh27SK4s8/RDv+Yzf/YZNvfbefu7PkDv4CCZXA6jGQyi\nzpaxEf7g7e9g25ZxVldXaTcbGPQW81NT3HD9rSysPovfb6bSLHB+8izBiJ/1VJItm3fgsAV48MEn\neNXBNzA7P8ezL/6Cz33lL1lda2C1RDEZnVhtXuI9IeZXphjoGSER6SYWjKJR+20uplorY7UZUbQW\nkViQldUFDIJEb98giqKg62aCoQQuV5BSucGpk+ewmG184xvf4M7X3o5Oi1i0C1mWWVtbweuzs7i4\niNwqkExfpKMzSD5fxGzVOHzkKbw+N41Gg2QyicvppVgsEo2GMZo0WkqJUjFLqVjF7XYTDIdoNGXM\nVgvNdhOXx/Wy/fF3QhREUaClKLjdboaGBrHbrYTCAWq1Cn5fiEg0SK1eRNc16k0ZQRIpNXI4vR6y\n62U6O2JYrU6GBjfTqLc5dfJFCvkNDj3zJA63nWcPPUex1KAsa2zaPE4w0IPN5uPW19xJvtAkn6vw\n8Y//KZlMGpvFjq4ZcTq99PcOIBp0wrEwXX29SJJEpVSk0VTYdfUeOhM96IKRcqmKx+2kUMojiBL1\nukYs3ofB5KAsK3R3DxOOdJDOprHbXGQ2inT3xGnIZZ595jCdHQnS2Qyf+ORfcvToURqyApLARnqZ\ndDpNOBzHarYQCvhBBb0NnZ1djI5v5ff/8PcZ2b2VruEBTj9+lDt2x7HpWbr9Qc4enaJQTWFzWtC1\nFkajkUhHHMlopV6tcfjIIYY3DeL3BclkclgEC5FIkP5d17C2kEQwuYl0JDh75gJ33vE6bDaNt77j\nbh554AHe/5GP8OzjRxjfNIraqmOz2SiWNqhVM+QLaWw2G9VqHbnRwukO4A3HCEU6iMcSjI+P05Rr\nVKsVNE2jWMxz08F93Hn7QZRCA63U4ImjkxydzXDfL+6nbTSTKedp0mbp0hpWi535+XlsNgfLy8uo\nkoDdIrC2sMHdb/kIh55+iEvLy5w5O4HV4qKvP4jX5eWBn/6Ia8Zei65UuenAVfzqkX/EJBlZXFwl\nl8sgN+tIkolkMkk+s0glq7B5aAe6aEeQrOiCitPpRKDFRnqN3v4xLBYvxUYZk9NGZ1cPtWaLZHID\nu9WHqjTZSKWoloqocpGbf+9GLs6c4S1vez2pzAomixGDpGGSjLjdbmxmC8GAg0pZJhYZIJ3K4nEb\n8PrC7D9wM063F7MDgnE7RquTTZsHOXrs15w/sYDZLBEMhJFEFafDgCQ26Ip1kN3IEvAFEISXPyL4\nnRCF5kvhdCAQ4NjxF2jIVVS1TSQSoViocmlujbHRbaAbETAhGazUq20adZ3pqYs4HHaazQaHjzzN\n8MgQ27buQpZlRjYPMn9xjrvf+hZ0ScQgmTA7LMzMzODzhrgwMUUsEgJdYXZumlg8zEYmidvjoacn\nwZmzxxGQ8HoiLCwssby8SDAYpFgscmFikuTaOmazmUajgclkwul0UiqV6O7sQW0rOO12OqMJ1tZX\nWVhYIBLtolgss2PHDmoVhe6uTdTlMnannUszCxx7/iSxDj8DQx1cWlwi0t3HP3/726zNzpDMbrCW\nTtFWVXSDxHoqi8loY3RsG5ValZWNFRJbO3H2hnFFE3z8Yx/j2j3bsEteWi0Fo9GIz+sERWUjlSGR\nSDAy3EO1VuDM6QtoqkCrXiMaiZBNrZNOpfB47Whai76BHmr1CkefP06pkWPTSD+dkQhXX72DL3zp\nczisDgyaxsBwP9F4Ao/HR71colUt8/yRZyjWigwODlEul7FYTRz9zXO4bFZajQoP/uw+Hvr5/eQ2\ncmzaNEJJzzG5NsU/PnqEe7/zeUxuC3MXZ7Fb7WzZMkYiFuXwoUPUGk0aLZVYohe3zYfV0UummOXr\nX/8Y1RZEPQl2b92HpDfJXpS5ftvrGB7t5bmZ+9BabWQ5j6CqjI8OYrOIxCIBgn4fpVIJm82GP+Al\nvZHk2LHnWViY59KlS6ytrKG2asg1GaPRDHobq11A0DVEBHp6egj6/LjcTtbXF3E4HCiKitPhI5et\n8emP/yUtXaUhtyjmShhEXpoybpJOZ3A4HICC1+tFEAyUSkV0ocXUhRl02lTLeTbWs5w7OYXDbmJ6\n4iI37r+FXbsGyGxU0XUDqbVF6uUc6dVLbCSX8LlsaK06kvby/fF3QhQMBhGj0cjs7CzX7ruezs5O\n/H4f09MXmJufxmgCQdCYm5/G4bBhsUoMDffTkQhgdaqkMutsGd9BV1cPiyvL6MY2mzaP4ff1E470\nYpB0wn4vhfU0CAbMdifFcg2jSSG5vnR5Hr23HwGJSDjK5LnjrK6t4PMGCQRCPPfcYcxmIwaDgXQm\niyy3GOgdoLuzk/VUklKlTLlSIRIOkE2vYpTatOQqhUwKj8OMqrYxWU2kNnIMDA+RL6SpVCqkUusM\n9I8hYGG4vwdFbZHNNimVNIYHB0jOTvHRT3yC+MAQM2dP0hHw8MLhp5ibmiAc9DI/P8ul5SW2bttB\nwB/h3X/4UW59/dsxBzroGRrg2JGnMGtNRuLd6PU2Y1vGLtsSmaG+flaWswQ8HVjtAomOALrWIhzy\nUShm6Bvoxmq1Ui6XiccTZPN5LFYPjz30PE5vkCce/xXxWIgPfOAPOfbCUaxGCVGQmJqcRGy3MRng\nzOnjbN+xg96eATaSKfp6eqiVa8RiHXz9S3+PWbSQTmd469veQSZT4Af33kduo8Ktr7qDF3/xJXZc\nM0JydhKMTTShjq5InD31IvGwj+GhbhrVHOXiOpHeAMNbD1LOpIkF/VyYOol1Uw8//fEDfO+7P2L4\nYA//dN+nuXrXXs68OI2qC+iahN1up1ZrMj+3xNLSCqdOHMNqNuL3e2lrbbq6uti5cydyPcvIcCfN\nRhWXN0C0I44s11HaDXRdZyOTIpVcwWQUMJlUKqU0SquOqlZQ1Qp/89lP0dfXxfv/8MOMDm0n5I8z\nMjKCrtTIZDJoWot8foMnn3yaUrFFW6lTLK9hMBtotSwE/F4ikQhNuYHP6yUejWOVPHQnOllfTVIq\nZ/H6fBgkiUCoh4kLayyulKnUGiwuLzE5dYGV9MtaogD8jiQae+IB/Y/esI/u7k6SqRUEUSMa6aBc\nlZm7tIDb4QG9jtsTxe8PsJFJIqgKzVYVr9+B0WAjn10nFOnAarUjCgZUVcfrc7CwMI8oGXG5HAia\nmckL0zTkCl6vh6t2baOQryCZrKylJ7EZOpAbNdwOJy63mWIpz/j2XZw9e45MPoeoQyLRRaOqUK8W\ncXvsmKxuSqUSdruVibMn6evr5dL8LPuv3YdgkJidu4RmcLBp0yZ0tc2hJ5/g4MEb0ESVSzNztDHg\ndftwW0SMbjeptQzdHV1cnJvBICqcO3uCgYEhxsZ28sPvfIuBkSH27r+OyekFApEYIwMdTM5Mky+W\nuX7H1cytrqO3Feq1Er/42T9jVdtMTZZ553vuomvrEA5nEJvNgUk0UCjliXVFyWwkKWZyZPIpRoc2\nkyk1aVTKjG0eRG60yBZr2K02Ah43Dzx4P/uvuw5vyM3D9z3A1fv2c/9DD/H+d72ParMCZtCLNU6f\nP0vPwACaYGV4dJxmqcz9P/8J4UCIG/Zfz9Ejz3DmzDk2bd6C3Cxitpi4/sbradXbGIwGbEEfG6tL\n1Opl3P4wlUKVoC/G9ORpIsEENUXH7DXhtFqYfP44Fc3M7a/ZzYXJc2wb38emrhv4/s++SKI7Siad\npiPqQ1VE5LYBxBqRcILpmbP4ggESnX0szF/CZDLRrF92dLs1QEMuk8un0IQK9VqL/p5BTEYnuWwJ\nDCqbNu8glykhSG18XjcXzpzC6vbT093D1OmzJHo6efSxf+OuN93F6sISIg0kswtEncnJ87idPvr6\nh5CVGlq7xcpimkRfnGZDw2Sy0dbqWKwCzWqLdkvFbrfTbLYoFEoEAl6yuQ1URScS9bGRLOLx+PD7\njSwtLONyusnksnh8Abp7+1heT9K35z3/dRKNOhqdXTEq1RLbtu1AQmf24hQWk8hAfycdcS/j49uw\n2Wzk8zlkuY7T6cRsdiKZHMgNlUyqSCwcv/wFTidpt5skk0lMJgvr6+tspNdxOy3sunobPp+NmekJ\n5qfnWFvOEPLGqBU1LFaRp55+FFWTmZycxOPxMDN9gXw+z/DwMJJkQlEUiqUsvX2dlEp5LCYzZpNE\nuyVz22230ZQVto5vJ5nKUinXiMTi9PV0sbayxKmTx9l59S5yuRyFfBGz2YzRaMThtFGTK8hyjcWl\ni8xfmiQWD9BsNuns7qLRaCAIAre/4bWkc1n0tsLQ9jEG+/tYmZzBaTIT8Pn4+a8eIBoLY7dbOXHq\nJG9569uxudzccPMOvvD5+9AVI5fmLzA9dYrDzz3O/KUJcvl1JKOG03V5OqxaLaPrKlvGNvPorx9H\nkIx0dHZiMBhYWlpi06ZRAv4QqdQGtUadbG6Dmw7egNNpp91uorYVFF1l77X7iMfj9Pb2snhpgYpc\n5X3vfy81uc4vf/UAkqhxYeIMPo+LUqnC/usPsLqexuoK8pvnz1PINXBabQR8HWgtHaPBBJpGz6bN\nlMUqosnKYKIXuVLiZw88yZbRMLIsMzZyHbt33MJTz32fru4oxXKGcNjJpflFmu02mUzypdWxVWKR\nPpJreaYn5+mI9TM/s4TZbMXhcKGobcxmMzabnXCwmy1jV+F2BXA5ffgCXhwOB5nMKopWIZVapFhK\nIxl1VtZnWVyewmhSeO43v+FRhYgaAAAgAElEQVTpJ58iu7bO2soKJqOZaqWGquhs377z8tJ3XaeY\nrxHwRylXS8i5JmvzSR5/+HEqqRqTJ+dwu92sra3TarUJhUIsLy/TaLRIdHSjqQZqVQWnz01LaSNK\nJpweP2aLg1AwRrFYZXkpiVFyvGx/NNxzzz2vnLe/TL72lS/cs3ush1w2j6apPP/8EUZGNiPLbZLJ\nDKFgiFq1QbVew2QysLQ8h81iwm4yYTSKNJttAhEXjVaNSrmGy+XAZrfTaMgIGMhnNshuJOnr7kfR\nIBSOceDAQXKFJCur53E5wWRwcuLUETq7OojGEjhddorlAlbJxDNPPknI52V+dga13WZsyyjVmkwo\n1ImsaHR297CwvExHRwIdDZfHS6VSJt6V4NnDT2HQNLwuB36/h3whh9fvpVorIYoi/kgYm8XG8eeO\nkcymuOnAftqtBiaTke6uHmbmpvEFfKyurNLZ3U3EH6RplDj666eYPnue+ECczmiYH337+7zxTW/k\nm1/5Mk25gslsIp3cIF8t8aEP/ynrmXmuumqQT/3FFzh99BTv/eD7UBSdZDqLroHfEySZWsNuNjM7\nu4DX4yGTSbO6vIxZEnG7LSRTy4wMD/HlL36eLZvHGb9mG81GiVa1wcljp7j+wI0cO/Qcozt2oCEh\nN2TymTXUZp5YZw+nT57hzMnjvPudbyMSDiJKBlIb60RCPi6cmyCzkeGLf/cDHnjkcV516zitJpgs\nXpx2E416neT6MgaDkd7ODmjX0LQaf/7hv+aHv7yXt9zxRhZSWfr7A7z1D+7CYrNhlAx4TA4OPfMM\no1t3Uq3VsTtsCBipFGtYbRba7QZ+j4tUcgWH04SmqhgkA6IBnE4HBoOExeymraksry1RlxsIkhWL\nxU2zJbO2skJnRy/JlSTPP3cMk8OHy+7jFz/5BVdfu4fde/didztwej20hBaPP/oUo6NbUDQNXzCK\npurYLEbm5mbZunUbc0tZxneO0zvYRTqXYsvWrchykaXFFfr7+5GbNSwWIx6Pk0ajhs8XoN4o47Db\nAAOtRoV6rU65XKJvaIBoLILTaWV64gzf+/nh5D333POd/8gffyciBUVVWFpcp9lUqdVqmEwmVlZX\nUXWdeDxKJBaj3ijj9bpxOp3s33897XabbD6H0+VhemqOWrUJmoFWq0W1WufI4d8gICJJElarnXqt\nQSqdIZvNMj+3xOzsPOFwmGgkRq1WJ5VKM751Mz6fh2w+g6JotFsqM7PTRKNhlpcXsVrtVKs1Usk0\noihiMpnI5bJMTU2RSMSZvHAep9PJ4uIiiqKQSSUZ3TSELMssLi+haCLVcpl6tYIoSvh8PpYXL+Gw\nGTn6/BES8TA2qxFJ5PKwRxS57vrrESUTt912O21ZZsvYGOVKkYO33kwkFuTE87/h2SPP8PHP/neO\n/eY53vG2t7CwOE84HCQYCbGpv59HHv4lu6++inZL4S8/8UFUVWVjLY2IEZfdy9JCCo87SG93H1fv\n2k0s1sHaWpLhTaOcOXseh8OFw+6mWCnTbDYYGOzB7/VRLpcZHd3Mi0dfYN+e3Xz7q19jbWWZc+fP\nMDFxDlmWX1pqHmB9ZZWHf/kIb7zrTbRaChMTE7jdLsKxCIFgkFwux2OPPcbZ01O8at8WQqEgDqtE\nu1Gn2WywMLfEQP8wwUCAmqJTqbZ57vCLPHZykUM/+hV3v+F2/uZzn6LRsGB1isjNKu2WxokTJ2g2\nZFotmUKpwvTFS/T2dVMslrG77GSzGeqNGqtrKxgMArIsUylV8bgDbKSznDl9DofLiyia6Ih3Ybf5\ncDrcKGoTk0liZGQYuVnG43VgNYvc+nu38fOf/5zhoRFcrstrVRqN+uUftTSdgYEBCvkMqfUVSpU8\nmWySer2G1+OhVKrgd4vo7SIep8TS3CSV7DoA/oCXSrV0OVlrsSAKEnabC6NkJRKO02w2MRpEWmob\nq92O2WpnI5enXK7SbLQYGBh62f74OxEpfPHzn73nbW+4jnqtxML8PNdffzsul4/fHDlMT3cfhVyR\naEeMVCqJqiq0mm3sLgeBSIhqqYjPbaNZa9KoNGnUqmwa28TIyAjptTQLc9Ns27GDSDgOZhGn2Uom\nu4zTaedf7r2fd77z3URjY7hcJi6cn6KrZwBRkOiId+L2eOlMRLFYXKwnN9i+7SpsVjterwtdU5Fr\nDRLxMHK9wOryLEGfm410ic54J2fPnMDtdnD1rquYmJwh0dGF3Wqhp7uLldVlzCYLF+fn6OiIk06l\niMZ6GEr0UpFl3A4XXoeXzMYGxWKZzo4eyuUys5dmeOLQE4wNDZNcXad3oIvp6QnuvOtunjvyHE8+\n+jAX52f5zOf+lq6uPr7xzW8wMjzG5MRJxrcPMrbtahRJAqnN4vwye3eOUSiWOHP8OTYPbefRh+6n\nZ3gbLbnK1Mw0W3fuoLO7C6vRzko2j0HVyKxvsH3vbo4fPc62zTuZWVigu7OXoc3D/PIn/8L1NxzA\n4LOzffMYU+fPo6oCmO006yVGtwzRFQrww699hd6hPkKDfex71e+RTq8xNDjGrr3bePi+p7nhqi4u\nbmS5cPo85469yPJKhvWNFEd/c5ijh45w/MUjSIKXz/7Vl9magD/4izehm3yU83X6hxKcPDlFxB+n\nkq8wvnWU1bV1YokOrFYvPX2dzJw/RygcZ25xGblWp6tjmM6uAWZmp+js7qCtaFycXmBkdIxSqYTJ\nDrViiZlzM5RbDYJRH+nVFCYLVGsSansVp93E0888zr89/K/8909/HE1oIRmtBLxePC47qdQaDouL\nYCBIoVjAbDLgtBlwOlyoDZFCLofb68RsgaYMK4tLjI4N0lbs2GwWLGYjRslAsVCkVq1gMZuZODdF\nS2kSj8fYSCURBB2XM4Suifh8QWrVKkajRLGUB0XgS997+GVFCr8TovDVL33xnjsO7GYjlyPWESef\nT3Jh6iwHb76eeq2N1+un2W6ysLDI9u1XUa1WsVrszF08j8NpJ5vfYOu2XZw6dwy330ujUiW5so7f\n50UQNdbWU0gGCYfbydKlJVSlTaIjit9vIptNc+rUETwuN36/j3qtgs/lZyOZxm61MXVhEq8nwN49\ne5hfuIjDYWVoaJjFxUU8Xhe5XJbHH/81N9xwAF0zY3e6eeHoMW677dVYLDaqlQbBgB+rzcjK0jyt\nVotQMIyqasQi3XjdbgwGga5EiL/61D3cdvAG1jJZVMFIRyJOu93GZrOi6W02kln27b2W1ZVVpi/O\nsLS6ysimHditDv714Yf5q0/cw9pGnqC3k2ImzwM/e4J3f/BDBD0hYr0DfPNLX+ehX/ya4bEurr9x\nL088dojPfuZH2K1FwrEAXYkQ6XyWSlNmx86dlAtpDILKk488xNDoKB5fCLvNSqlUAARMJiO6ptBu\nyvzyoZ+zdWycp59+loOveg2ptQxbNo/j8/kv28pu5+iTT9BSWshKi517b6Stw9LiJYxGD/lKhauv\nv4Htewbp3buX4aiNJ/71QfzhbnbdsIUDB15F3+AW9t62i1tevYdX3/pRJlvTvPrWgzhsHfz4X77H\nwsIiPR1bcNskRKGN2+tCEzV8Pj8qVoLBCKn0CsFICIvZgqDUCEc7kHWZSr1AIh6jmGvgcrlQ9BKt\nVhOz1UGis59UdpVw1IVRt2C3OrHYDVglO1MTJ4iE+jl1/ByPPPIon/n83zIxOUmuUEDQFFRBpVyt\nYhCMVBsydqcTu8NFWxUJh/tZX13EaFEpVnIoBj+x7iGmpk8TDPoxmZ2UWjkqhTKpVIpYLEGt2qS7\nqw+ny8ns7CyZ7AYms0AoEMVmtdNWmjQaFVaW56mWizhsFlpyA0WR+foPH/+vIwrf+Nrf3XPDrmGs\nNhvlcoXuzgF0XaRer6NpYDRKTE1PMj6+hUuX5shmMyhqE6NgBt3EyOgo66t1AiE33d09qG0VVdN4\n8JcPsmXLZqamZjCbzKyl1vB7PXi9LtZWl5GMdhRVZ6BviEq9SkttUipn8Hg9ZHI5+vp7WV5PIYo2\natUG41tHOXHiBBcvzrF161ZMJiNuVwCH3U2pWKWjo4NGo8ATTz7K5i2bSSVT1Gstunr6kJsyqqpz\n9PmjZHM5HE4bzWaZZCqN0haw29y89z3vYvr8JJFolAceeRiXx4fXH2QjmwVdBAScTjeVSpnX3PF7\n1OoNLkxNEgh6aGtNTp85x5133omiaPzoR9/lxhv38NP7f0qqmkYQ2ly7bRdPP3SI4b4ert6/h0S8\ni/d95HU4zRILs0U0tUH/2Bb6+/pYXlymp7eXck3GZXfj9HppNDXsFivFYp7rbrmFQ4/+mtGRTfx/\nn/1bXvO617Gpb5BapcHg2DAb+Sxmi4VnHnua44ePUi0VGNg0RH//AC+8cIzNW8dYmp8nGglRKufY\nfe01HH70CL3dLqSmxPJCm9ff9V62bh1DKcYIhPy022VmXkjymhs+xPjoCMP9fgobBXTJQDAQ49Zb\nXo2oy3z5i1+mt2+ApqwwdeEcie5OVB3y+eLlX5M1Hcmgs7SwgC/oxWD00pRFVK1Nu1FEEiVs1gCl\n6uX81PT0CuNb9mA0erDZdarVDbyBAHPzMyQ6/Xzj69/mxeMv8tE//SgmmxOv18/gwCAuuxs0HZ/P\nS0tt4/OF0TQNWW6yvLyM2+NG1zXsFj92m5NqMQeagbXli5iNJurVNpIBSqUK3b1dpNMZRNGE0WSm\nVCzgcrnYs3cPVpsVva3SUtrkcmnC4RAer5dqvYiKgGQyY7MH+NI/PfBfJ6fQbissLc/RbtawmCTy\n5SQunxW314vFbkFuyzjdPlIbGep1mVisk4A/xKXFebo6Y2iqisNRQ1fqTJ45TaPVwOZwccudB/EF\nY+zYtZWN3Coeqwu/L0Qg7EJWZAZHxilUymiSCYvdj6a5cNhj2P0BRAP8/P6fYrdYiUW9lMopRNHI\ntu3jDI6MMjkxQy5d5PS5k5isEh3dcQIxD7lClfe+972gK3R0xlhanWUlmWLywjzBWIih0T72XncN\nzaaOIDhx2F1cddVVPPDIIxx98RjJXJovfOozvPtdH8Buc2IzGYkmOjBaHXhDcVq6RCQ+wOHDxzh8\n+AjxeJzk+ga9vSNcd90BVpaW+eEPv8PIpkE2NjZIzi5w7bZdbN08yjNPPMxr7r4GyS6CwUU5W+fo\nmQXWMiqf/eYDDA5dw+LsGpVaDUGyk8mXWVlOsvfAfn764x/yr794mOXFVbq7e1mcmMHpcvDU07/m\nmmvGabYMfPfH32XX3s2ceP4FbBaJE889x74bruMPPvwOCtkC6/Or3PvdH/KWd72btqIzumUnFy5M\nE/H4eezBX3HLOz/NsdPL6JoBi8PAamGVUqNB93Y3hlaBL/zt5+hK+JlA4V/u+yyl5Vl88Q4ERWN8\ndCuzFxfxh8K8/yMfRH5pOu/Xv3ocSXJQypcoVwq4vAHMJgelcpvR8euwmN1MTh4nEnVezn9EYtj9\nTjQjRBNDGExhOrvDLC6dJpNbxB2IohotLM4uMDi0ma997V5iYQ8f/NAfsnnHDlwOJ4uLy9TrLdZW\nL2I1KhSySVpNhY2N9cv7hIQjRKNhJEMDu0OirVcwWAx09XWzMH+Wjo4YdqcNi82I3Rpi685x6jWZ\nWCRKojsBBrBbrXQk4kxPTVLKFjEYBARNxWKx0WopSCYz0VgPfl8UXRVp1rMv2x9/JyKFL//d5+65\n48bdLC4uo2otjGYTZrMFs8WEgBmP24/ZbCCdWqero5N2q0mlXMBqMFIplVhanEfT20xOTvDa195F\nJpdlYuI80ViYtdUskUgQp8uF3+djI58hHAyTTmUwSy5URUbVmjTqLYJ+H2ajkeR6HotZ5IaD1yKZ\nBZKpDaqVGoVSmURnAslgxWK1YDabicTixONx1tbWKBVr2KwebHY7kxMTL+3/UCbkD1As5NBUje6u\nPmxWNyASj3egKC2OnzrBHXe9jvnZBXx+Pz2dnWzbsYVMpsRKag2Py8Py8hrhkIffHDnE7mu2Ewz5\nsdvtjI2MEAgEkCSRlnJ5B6Lr9uxmYHSMkfEBxgaG+fSnvwqais0tYhI0yrkCm7b0UK9nWZ6bYfs1\ne7j7tp3c9+172bl9M+slHcGuYJOc+AJ+pmemiccSvOqW66k3qigYEGiTz6WZmJyiWG0yMjLOa1//\nZn7w3XupLF/i+JnzvPq1t2NA4Zffv58PfOzP+PVPf8Jgfz9mh4Nms4msKQxv2YTPFeSD7/sUm+I2\nPvnXH0dtNnGHvfjtVqpCjSM/O8ydd/83vvaDr7L5xrdx/5f/CHc8imj3EXK48DocPPX44/R0dVKo\nFlhOrrH32v0UymWCDg/LS2uYrQ56+/qZvHCW1dUVXA4vpXKJpaUlNo9vQddV5ucv0BnvpV5XUbUW\ncrVKOGTFYTeiCwoTE+exmJwobf1/UPfeQXLdZd7v55zOOYfpmenpyUkjaTSSrBwt5IhtsDFgk4N3\nWXaXF152Yd9l114wBtvswtrserEB44DBOOKAbVnJsnIazUgaTY4dpnPu0/n9Q763bt2q+17qvnWr\n4FR1ne7Tdc5/z/f8nuf3PN8PlXSBJ37xCPd88XOIgoyWlnbi0QRSMUu9Vkan16FSqhi5NI7F2oDJ\nYCObL6HT6oiEgyjlMow6M+PjE/gDEWQyBaIMLCYrVouVWCyBVqtFpzUwM7tEPpunXJJQKAVkSpFk\nMk2tXkOr0yPK5SwvR4hE44hyFVqdAZlCwcL8HHabg1w+QzKV4PHfHvrzSR9+8q8/urfba0Kj1VKr\nC3gavMhEJSql9qpZilZNPBmnrb0VvdHI9Nws6UySRDaFTJTR2d7B6MgF1g4NMXZlmvn5RbZt287I\nhYu8//5pDHojfV19pLJ5bA47iVQBq8VBhRQmswsBLSsG+jlw6ABKtQqnw0mDq4Hhcxcw6GzE4mkC\n/iUGhwaZnZ3Bbvfg9HhQ6TQolTLGrlxBodBQKcspShJanZ6u7m4CwSA2s4V0OoXZZPgghagSDPop\nV4rIZTJEUcng4FrOD1/EZFSjUFbxNLj40fceRmlU0NndTiQQoKN/JflkEpfDRbko8dYf3gFkXBi+\nwIq+FQT8flRqA6Agncvx9BNPUEjmcXgbqGUTjJ0b4ZZP3MXbL+3jrjvu4tLwKGt3XI/R2sjYpVHK\nFYE/vLSfNZv6UekspHN5XC4XSpWS5tY2NGoj93/vn+nt6WQxsIyvtZETx4+SiKf4yC134GttIp4t\no5LLOPHeC/zL9/8dOWp+/btnUeYKzPlnUWk1rN22hcbOTnIS6DVKTp85xqHXj/O7A6f4x298nKJM\nw9K8n1OHj9LWN0BLSz/FUpTv/ee9lIMJFi+M87lPfYpYJoVBrqVcqzA1Pc2aoT7MVg0FSSIczmI0\nWimWivhavFTkIvMz89htTnyt3SiU4Gm0UK3WoFqipbmVek3AajSRyeSJxeMo1XVsRg+ZdIY6NYKh\nIC6nh1QqRWdnBzOXx7n+hlto9HmxW22Ew8tYzGbEeh2LzUEdqNaUNHi8ZLJZdGo5WnWVxbl5NGo9\nZpuV2aUZ3O4mDHo7NpuL5eUgNQEKxRIKlZpSpYpGp0arUWG3WwlHlikWy2RzBWrlAqVyHknKUCoW\nkSnlTM1MIpfJUWpUFIoSjU0eliPLyGUyHHYrD//sz6jQ+OAD3713z8b12GxObDYLUjFFJBxmcXGR\nbDZHoZChtbGVVCLJ5Ytj9PX0cfzYUTrb2+np7iYSjrFq7VoQFZhtRkKhAGWphEpZY8M1g6QSERQy\nOROXJ8mkklTKBaanL6NRWchmsii1RVLxJM3NzeiUGt564xVmRy5z+dwY0WCIeDiAQaXj2IGDKBCQ\nlxX45xaJRaKk41kanG7KkgTyItV6iYtjFykWJcavjLOir59apYJcIcfrbUdvdKDR6EmlEwT8Qbq6\nejCaDSTSUSwaOzmpQkUGXZ1N1DJl6noFjmYPF4+dJZPL4Ha7KWQzbNu2BY1GyepVKzm8fz9Bv5+e\ngUEqVTDpdfSt6GBobR8nT46gMRu5/e7PM3zmLbLpCJcuj3DizSPITSba+5twmOyIokAiNs8bT/2B\ndCKKscGKXiHy2qsvMbSul9nxCR57+GUSkQX++r/9BbNzS2y85ho2bVzHiQNv09jo4vSxfRTiEepF\nGYtnz+C/coliuc7Kzev50K03sxwIUa5UCCcjWEw6KpKEtJziG/f9krdffIDW9l50Gj2dq1cwGwjz\nlY/8JVu29NCxcgCVzs0bB0/wjw99g+GThzDW6ohGG96eNqrFFMlsmVhSIhZP4GvxYNZUOPPeIVq6\ne9EZnTiMCi5fukxLaw8arZrl5QShYBxvSws6i4l0NovBaKEuFDCYFNRKalBCIhNBQY1sbJnxi6M8\n8fNH6eruYt26NaTSaZ58/Ek8nnZEmZpyrY7LYyOXyyBTiOh0evLZDNSKJJNREukKLS0eFhevkElE\nsehsZDIRarUyiUSUar1Cb2cn8UgCb2ML4WAIq8VItVKiXq/i87VSrwt4m73EU3laWhopV0rU5TXU\nShU2m4VmrxepWEKlVnFl/AoKuRqz2U61KvGvT7z25yMKP3rogXu1tSRP/epNQoEJNm/ZikqpR6+z\nUCiUrppR5HJIUh6n08Hhw++xfft2gqEF5DI5Or2R8HKE5XAYURAwGU0o5XLcjmaKRYHWjk4ERQ2P\n14pWpUer1RCOhLC7GtColdSqNWQqDYlYimMnTnHtnh1YGu1obFr6hoYwO5ysXDeEt6uDTdt3MOsP\nIFcI1EoSI8MXOPbe+1wevUAlXUBWkbN3816W/SE+/6UvcOz0CaoyGXWZknypSDIWxGjUYNAbQKag\nVJRIRCPo1Uqeeeo37N65jVgkhMFm4rFHH+PrX/8GiwsL1Mo1GpsaSMRj+FpbuHDhAiq1gnAkjN6o\nR6XVIgBarZJ8PoNep0Eql3n/0GHyyTTbdm7mzPnTuBROPvapzxIOTzN98RJdazdw6uRplAoFdbHG\n7s1dvPr2ce648wauXJmi2e0hmkiSz+a4/RN7uPGmvTz3y6fYv/9ddu3eg8lsIRaYxetpIhBKsHbD\nBobPjmKW1XD39pDMltm0YycvPv8bSqUqFrub3p5VlOsCF04dY2pujnWrurjtjo+RzCURVSqsjQ2k\nZmf59N/ejdKiRZDKzI5c4s5Pf521bSZmJ8dZjodwNXo4f+Y8XW2riYcXMBtU6PR68vk88zMLTM/O\n4nQ3E4+nMZo1tHd38vh/PUq7r4lyvYTRZKAoFVj2zzA/M0+tXuXy6CX0Wgter4+j7x+ir6cHg96I\nUq1hcNcuPvzRT0O1QiwR49e/+zUf/9THSKXiKFV1KvUiMoWcUrGEAGTSBaCOyahDLiro6OqlXC2h\nlCtQqTVoNRqq9QqJRAqPpwG90UC5VCQUCqFUKrHb7YRCIUKhAKIoY25uDq+3iXw+SzIXRsqWiUVT\ntLf0IwoCSoWS2YVF8vkCsXgcp8OCx9OMgBqDRcf3//03fz6i8J+P/vu9N+/uoqlRy+rB1SATicbj\nVKoVKpU8TqeFuiAnncvQ29uDPxhEo9XQ2uJhamoGpVJ99aNQIpMpKeSL1BFIp+OEIws0NfmIRhK4\nnM3EEwWmpsYZHFzD3NQsCrmCWh2mZ+fZvmM71UoFm91BMZ/BZDFj0OgpSyXy+Ry+lhbm5+ZYDC+i\n0ImgqTF0zRoG16/E3WTH1+FlaO1aTp4/ydiV8+z/w+tcPn4KVQWsSiMqVJRLNXp7VjA/v0RzSxu5\nfB5Pg4vA0jxOp4O//+b3uenmG6jV4KYb9zA9NkNbWwcXLp1HKhSw220olUq0Wi1utwt/cAFfazta\njR65XEY+l0GlliGKKpaXo2zeuJG2LhcPffchOlf1UUuEOXjsEDqjEimaprnHy8DKDUzMzfCxm2/g\nu//yAD9+7Mf86If/xax/grwksX3LTuKpCPGcRO/qFYyfP88n7robo9nAO2+9w+kjhzGrTew/eZae\ngT462r3klsNIOj2BRJZIcIl8Ms2qdRvwtXcxM7/AIw89BDUZn/vIR9i0cxf797+Lr7+fU+8e4NZd\nn+XOT+zErtWhQIEKkc17P889H9vAtl276ejuo4KGoaH1eJs8hBJzVwM1msZkdqHUqJienqbJ0wgy\nkKs1NHoaSMRzrF65hmo5i8XUgNXuQK/XkkxE6erspVquYLUYGbt8kUw6js1mZWF+HrVRz8jEJPd9\n45/Ztm0LRr2dxYVlNq/fiM1sR5DXr+b3ei2xcBJRFJkcH6e7o4NkPEK+kCdfKZHNFjCZTSwHg5SK\nJQx6PQ6nk9m5BUymq+mzICpwOe34/Yso5CocdjcatZZqtUSlKiGXq0in8qhVNmw2M1qtmgX/ODJR\nRaVWwd3gQa/X47A7KJevzkvo9UYWFiZ59Mk//PmIwo9/9IN7925ex/T0Ar5WH9lkhjOnzpBOptix\naydLgSCCAFaziVAoQFNTE/V6lbGL4wyuGWJhcY6ODh+ZTBJBKCGXyRkfn8BgtNDd00upmEavlTE7\ncwV3gxmr3UUqnaXB3UgmmyCZTJJKxUjE4qiUKhKxIOFYijpKlHIl8USClFTA6vCSzecZ7OkiFoqj\n09goZetUinWoiyws+anLlRQLJXbsvpaqXIaptQmt3cxSPMSKwV5q1RQajUA2meL4gXeZGbtENBik\nqakRpVbD+q0rkcvk3H/fD9l70y0899QvOfX++9x2x+1kczHSqSROZwNXLeYVGA1manUIRaIUMhle\nfP55Nm/eThkRk8VCXiowPDKBoJJRWIjz1ukZ+ptbkBm1RIIF/vDqYTZcu51t2zcRKsT55Q9fZXDT\nIMFggKG+bgbWrOT0uTPYHB6621rx++f48Edv58SJo8wuRugZ6iMXClFRaOlo9XFy/yFGLlwikkvw\nyc98kYlLU9TrRbp6OylIZbo72ymkY1gbXaxaP8C3vv4wGpvAqqGVaJQetLoSX/m7e+jobGcxEmVs\ndIQbb/8mLz//b6zbMsqxbLEAACAASURBVIRMrqe5pQmzVeTt199EodIxetlPR3c3NpcbqShhVIkk\nExl6B9bQ2tlDLBzFYtaTTEaxWDT89tfP0trewuTMFKKoZGHJj0ZrxuK0YXM4SOfSmCxGCoUyHZ09\nPPrQj/n8X3yVXXuuxaTVsxyZ4amfP87QpiFSuTRmrQOL1cFyMIpGZyGbl2jr6KZQSpJIpVGozFhN\nDqr1KoIgQ67U4HJ78AcC1CqgUipJJhJ4vA34F/zIZCI6nY5SWSKRiCMgYLPbUWt1aNU6stkMGr1A\ntVKkXodKWaBcKZLJJDCYdORyuQ8K9TJCoTDvvL0fq93Mfz27789nSxIENFoFAyt7EUWRUGCW1at7\nuOP2mxgfH8disaBWaJibm0MECrkMGoWSbdu2MT07S6YgMT01RyEjoVYZyOclVq9ejd1pIxxJUUeL\nVNRgc7RRqlbRKNWk01mUGjUFScLptLJ3716OHDlKTiqgNWkxG7VMjJ1Ho1Fy6dJF9AoVxVSCDYPr\nCMUSSFUJjRYcLjM1IU+penWWQS6IdPb2cfHiFdavWYevwU2D1UF/7woS8TxqnZ3TZy5RV4k093bS\nPbQKb18X4UiE4wcOM9TfT0uzk1defIb/9rff4ktfuYeyVEGsqYhGMzgcDqbG50klY9SqEn5/kHQq\nS6O7AY/Hwxe/fA/P/PJpkvEwo6Oj6HQamtuauXn3DZw6eAKjKsmtX/4wd3/1q4TyEoFQASkv8urL\nbzJ+8jIdzWpeee4lbrjjZt54+Sg2k4eXX3uDhel51CYDvStX8Marb0BFRW9fN+mMxHv7jrJx9xai\nQT/7332Pv/6Hb5HNZnn4Jw8yP3MZu9NKPBNj27Ub+elj/061KNBg1XPu4DkOXJnjtptu4DvfeZC7\nbr0FZ6MdsZDhwFtv0aA3snb1Su779mdY1beSSk2OQJXzp89AWUUknkakxpq+FmQykXxOQiVToFKp\n6O/vpyZUOHnqOC3NDczPz7C4OE94OUapXEMQleg0aowmHd5mH0qVjFQswcJMiIbmHoxmJ6vXr+fc\n6TN8/p5Pszg7QqGa4ZGH7+fV117h/h/dS6WgptHZQSi2zIlTJ+ntW0FDQwPdXb2oVToUKj0WZwPV\napVaXWJpeoFMSsKgs1MpKbDZfEilIk1uN3arhfByFLu7AZ3Bhs3agFKjxtZgQqNRUAP0BguBUIiO\njg5EVEjlEnJFDaNJg8/bS0tzH1RUaNWmq3M/opwaVfbs3UQ0Evmjo/FPQhSkYhGFQkFraytWq5W2\ntg5GR0d5++23cbs82KwOdDo9g4NDJOIpfD4vGo2KJf88K1f0MTS4liV/kGMnT1yFk9SFq+OlNica\njQ6rxY4gCORzEjJRQyKVwmazMTU1hcvl4rnfPs/8/DwrVgxgNVnQa0x4PB46O7toavZgMGo48t4+\nnn76V0xNXfnAUDNLLJYiEg5hNhmolMso5FAXBcLhMK0tPpKJNNlsHpPJiFIuUirlyWbTbN26lXgk\nikGnx+VpQKpUGFi9im0bNvL4jx9lYWKCuZkJhobWI8oV7Lx+L/6QH5VMTjyeRK0RicTCxBJRisUC\nCsVVr4ez588RCATo6emhVqsytHqQeDyJTqlDoVby+a9+jnJFYHpsFpNKwY7N60mWRVocFqbm5mnr\nbKG7z81iIIFOqefSVIxUTqLN104ik+bS6DCLs4s0NTXh8TajUqmoAR1d7RQKBQw6I9ff/CGOvv8+\n1XKVez7zZbR6A3mpzMZrNvHrp5/F09SEs8GJUi7j+OUL2OrwzW9+h0tnJnnh/dcJzgY4uP8ga9de\nw1I4hKupkw2bNpKNx5CXa2jVanKFIrlCGbvDzNlzx9HoVYxPjCFQJZtKEVr2U60VkaQ8brcTAJ3G\nQEtLK6lUivb2dkRZDbvdTj6fpbGpAZVagd1hpVIrYzdbqBTz5DIp/un7D1AqqzGrmnBoPWRLKr76\nV99maX6ZYjnO3PwYBoONOjLGxsdJp+Jk0gkC/gVEEQw6HW1t7RTLNfLFJXLSPDnJT1VMojDWUKlU\nRBIJ5EoVRoONXCaPgIxcoUgqmSGbySMVC0TDYdLpJGaLkXA4yNTsxFWYDALVap1g2I+gqFOqVEil\nUtTrNZQKBXarA7lMjdNl+qPj8U8ifXjogX+5d8+mVZRKNTL5Ahq1md6+legNBpxOE8lEBLVGy9LS\nEi53A/F4FoetgVAojFqm5Ne/+gU3ffhG+lb0kUkXcDhsGAx6lgLzrBka5PLYOMuhZVauXMmxo2eQ\nijWUKg2NjU2ADKVKT09PL5l0AkGoMz4xi1xQ4vW2EfTHcNjdNHtbGBxchZTPcf7COUqVIpVKGUEm\nw+lwkUrlkcmVTE2OUa0XqdeKDJ8/zZUrYxh0RhRyOY2NHi5fGaNYLlEuSjgdTuSiDKvFQiqbB7UC\nndaMy+Pl0uQEenURebXEpmvXc8utf8+tN++ip6uPxhY3qWSWSChOY7OXdCrF+Pg4O3fvRKvXYXPZ\neP/QEWwWC/myQP/AapK5HI4mO4PXrGRh3k94wc9H7/snmksp7vnid1i/YSVrNm3hvv/xGF9+4Gss\nnb7IY889yAtPPs91N+5CpzFQqqrJpJPIZEpqMpF3Xn+NNrebyQsXWbdpC+7mJtoHh+hZMcSR37/I\n8JkR3G4T3R3tPPYfj/K1b38Htc7Awffe5/DhM0wcOIXboOXh3zzLJz++i/EzZyhpjGzbvYdsvoC3\n0cu3v/ltbvzwreTEImaXG43KiN3dSDDsp9fXwNkTJ1mxYhCVSovH7SEWj3Dw/X20t7dTLpexGi0c\nPXEST7OPOnIEQUEgtMTg0DYSqQJms4OgP0wykcJiNiNVq5hNRj5550fYs2cXt3z4JkbOjvDD79/P\nHZ+6g217byDiD6DSaChJNYrlAk6nDY1ajSgKWI0azp0+TpPbTjZfRykqkaQcJosVX3s3Doedmcuj\nzIyPIdYreJp7qMqgWq+RTcXJ5tOYTCakYh6ZTE44HMdpt2Iw2dDpTcjrckqlGg6HC7EOQlWkXC1B\nXUImgFolkIgEMOrU5ItFpFKBXCFLJpzk5y8e+fNJH2RyGTqdgWg0TlEqE08lOXHiBGfPnieTSkOt\nzuzcAhdGR3A4bJjMBmRKBUJdJBj0s2pwFWazleVglFq9TqkkUamU6Onp59LYJLWqQFOzD7/fT3dP\nD4V8Bv/iAqVi4QNVrTP1AYNgeHgYfzDA2Ng4C/NLNDc3o9VetdXK5fKEI3EGV6+lxdtGvSaSS2co\nFAqoVCpMRgvd3b0YtDo0mquOS5u3bESn01KplAEYGBigLgg0NDSSLxQIRyOMXLqISqEgsLSESq6i\nUqrT5POxeWgtj//8cX79q2d5/JG/5fTRs1hMZman5kkk4iDWWVoKEI/HKZUkJq5cweVyEU1EEWoC\n1WqVWDLGxPgYakGJ1eZi5aq1GO12onPTVMIxvN1e1nQZ+e1zz3Py4Ht0uQyIwRBnL19gfjpK+8ou\ntCoNh9/bh1BJUUimyaYznDp2HL3JiH9xiXg4iH/Bj0KjRRTlVBHo7W4nFPJTlAq89PwrdKxYTVYq\nojcauO322zj81kEUNVjZ7aOUCBJLxKmUJFQyGfl8gXQ6jlyl4dzZGaRaDYfViUaQcWXyCosLc/T2\n9KM0OIhnCqRyEhaHndmlBXR6DSWpiN5w9c2YzuTYsXUbpUoRu6MBt9tDKLhAuVSiUq8Qi0UQZTVS\nyRixSBBZuURdFPjmvf9AsZTHojPwzDNP8NBPHiC8OE0hG0et1TE7t4jNacRi1RNfjiDUazgdDtRa\nPb62dhBFTBYz2UIGs1lPIZ9l9Mwojz/2NHKlg3Ub9tDk7SO0HCASXEYhiKjVavRGA8ViAbEu0tzS\njtHqIFeoMTu/wOLiPH7/Iga9GkQZqWwGRBlWuwuD2UkFGal0FqenCUQVUi6PVq1BIZOjNRr+6Hj8\nkxAFpVLDk08+SWtrM22tHvb94SDHj51hzeA6EFQYjHa6u3ysXztIPB4lGFxkfGKETD7Kzg9dy+q1\n6zk3fJZKrUi1etXt1mZzEVnOks+VaevsoFItksxE8PncrFo9wPnhs2hMCmTKKruv3UJHey9mi5c7\nP/Fprr9uL3a3g5a2VpaCC+RLGbZt28rc3CxSMce582c4+t5RdGodap0aZCKH3nsfs83OwnwQmUxB\nrS6nd8VqZHItM7MLOJ0ugsEAMzPTVMslZmf9KORaAothNDIds9NzmM02AokodZUcl62Z6Uie7Tfc\njM7mQ2e0cfbQEf79gb/i7Sd/w4ZNG1GolOiV4HbZ6e7vo9Hj4dmnniQTT9PT38P0xCVW+5oppcLk\nCmHOnzvH+Pgk6XiUfUeGWdu0hWQyQFks8Q/f/AzhwBzxfAFHpcK6oQ4S/hl2XruNz33yO/zFl/4W\nhajD43JSq9TZtec6vvi1b3Lu6Am8LQ0YjXoWMjGq1SIKvZYr47Ooyip0yPnInbdhV2t54Zlf8ebz\nL3D27bf45Ic38JXvfI2//PEDRObnsOschBJZelZuIT2bJBaIYmzZwue+9FHS2QzxXJz55QBNzc14\nfc2Mj0+hVOjYvn07HSu7Gbs8QjGXwulsYtPgRnKFGuWqQDC4zPFjpygUSiz7lzGYNeQTOc6ePQ2l\nCoVsmrxUQKZU09jZw6nhc4i1Cm5jE7994iXMbg/fvf97mGx26modyXCEkdHTNHpsFHNZ6hIsRyOo\nVCoCS4scfHc/dpubbK7Mof3v4LTbWJxbJBWO0eC2c91119LS4aVEkVIlQTmfxG63g6AGQUdbWyeh\nUBidUcfl4ZPY1GUCC9P4mttoavKCUKZSLXLx4imcHheCQs7SXIBarY5e66Auk4NMi1RS0tjQgUZh\nJBFJ43Z7/uh4/JMQhWw2w6c+dReLi4uMjo6i0SqIx8scOnyQTD7DUnCJC+fPYzFakHISbqeT7s42\nLBYbE+MzICqpIZLMpJGJcpqavMzNzX2A3soyMnKBSqWC1WxDkkpYrVb+6qtfJRpLsrjgJ5+XEASB\nI0feQ6NUUa/UkCtVxBJxVCoVoigyPT2NQqHk9OnTDA0NsXXLJsLLQWr1q8SfzVu3kMlmUevUKJVK\n/H4/MpmMdDKByahDqRAJL/vxtTRz5fIYra2tyOXy/7MecPDgQWQyAZVaRioZZXJqnFw+jrelF4PR\nTLqYZfdNG7h8MYtGXiMyG8Ftd6LTWVCrNIQCfi4Mj7Bu/Ubsdjs1ajR47MxNTOBwOHjp9d+jkisp\nFiSoVLHbPJTqKg688jbZssTk4hJml5pIvIIkVGj0uHn5hRd44ZVXGVrTjqwm0NbjpampiY62Jn72\ns0ehIGE1mpiYmcfX1gHlKlNXxtDKqvR2tbLr2s20dniZW5il55oBNu3exp6br0OoKpEXa5x5dz+J\n5QjFepVAPMJHPvUpfvidr7OQWuLY6REcwLYP7UAtKMnGk4j1GgaDiVQiiUajxGDSo1YrKeYlHDYT\nvT1dlGtVBgYGrmLb1DJyuRS9fT0YNDpSUoZ6qcTm3btoaWxieTmIUqmisaEBm9XEwtgEPo+Hc8eO\n8aV7vsS3f/5jRi+eRqvWEPYHifiXEeUyBlasIJXKYDabKVXKFMsVotE4FouF7dt3IkklvM2tXBg5\nSygUYG4hiNncSDqdxtfqJbIcRYacYr6IXnO1c1GlUTM1NUU6lcLtaWR+cQm1RkV4OYROo2R0dJRo\nNEqlUuHEiRP09a5gafEqJOnSxYvEw0kmJ2coFito1GZkcg2iQo7OqCMYWoJ66Y+Oxz+JmsLjjz1y\nr7wYxOv10tnexaZNa+jocNHT206ukMPj8RBdjtLX00+tWqdcKXL69En0OjMjF8fYvGkrMrmceCTG\nzp27mZub5fTpUzR4GnC6XDhdVirlEvlckUujF2lsbiSdyREMJWhuboEqJBIRGpucRGNxmhtbcboa\nSCayLC0u0tzsIxFLkkykWX/NOnLZAqlMismpCTq721la9FMpVVkOhZiamUSj0XLNug1MjE8S8M+T\nzaQZGT5HpZRjfmaRN9/ch6fJQ6FwtcjV0dFGNpuhtbUVk8lEMBjAZjOj11oJBoNkUxK1ukShrmJg\nfQ/P/eoI+fBZcpEMzatXIcqgu7WVpWCYs+dG6e3qxGjWk85mCAdCKDR6env7OXbkOBq1mlwqjVQp\noikl6W+xcvpcCoUo0uFzsDQ2i9ttxOm28+Krb3HPVz6LXICR4eP4BrrQKXWcPfkeu/du5dEfPYxV\nhEgiQWNbJ5KUZXF8kqMHDjBz8RLdK9eQqQtcf9td1BQK3DoP3/jyf6dUyXDDR29k/5kTV8lQrT5s\nVjOnj7zNp7/4BbRGPZ/89P/ghad+gN6kpVit0tbegSTlKRdKGI0alBoI+f1cHLlAb2cXKrWOWq3K\n/NwsWqMOUamijoDJqEeu0CCVC3iaGhkdGeGaTTtJZ9JYHFZavK3EYsu0NDbwlbs/TTKd5MypUzzz\n8qsE/XMI6Rx1ZKh1eix2B+ViCZlMTq0msLA0T66QZ+OGbRhNZhKpBBdHRnniiZ/T19vLjR/+KIVC\nCafbTl0sAhK1sgyzxYZcrsSodxCLLGO2OrBYzUCZYqkEghKj0YTDaeHIkeOolDIQBBQKNfFYmLZ2\nH9RktHe2I0NGLBKhwWuhVMkhl5cxmU1MT48TT0bw++dp8zWjVAk8/F+v//n0KTz4g/vvvXHHKnK5\nLOlMmlSmTHOLj2QqyZrVq6hV4NLoeerVKhabGRBp9bUjihXq9SqiqCCbyJHNxqlWa4TCIaw2K22d\nXSRjSRLxONFojGQyzvbtG0gkEyz5F5HLBZTyq4LQ1dnH/v2HuPZDuyhLJaan5zCbDOj1ZirlGkqF\nDIvFgkypQCEqUKu1NHp9iGiRCjXsFgvVksT6dWtJp1JEYhnsDheORg/5QoH+gZXMzQfxtbWzfcd2\nKpUyJ4+fwOfzotIoWPQvUi4LJDNFWts7KUhZLHYrpUIWn7eBeDyBy+WkVJdx7bXrKMkFGls8KNIl\nHO2tnL10Ca/Xi0apotXbilFnJLScxO408/Ybr7Oio5sNO3fS0tRENBZn48a1tHU7uTQ1S7/XwuyV\nAP0bBhFkEqVYkvlcmlt2buWpx56mbaiP139/kHyqwJtvvMzea6/n4pVJaqkEVMooNQIrVm0jV8uz\nNBvg03/xJXLxed58+g027l7D6UsXkVHjRw89zBOH3uS7f/P3mC1aPvuFL9G/dgN6oxnqIs3NXaQz\ndbLZFDs3rmf7h/dw9MBrbNiyiyuXRymXalgtaqhWOHVqmHXr1xOPLCNTqa528fmXEEspbHYbsWgS\noVynUitj0LvR6uWYDDomJyaxms0UynkavA1kkmm+cOdnqcsq3PcfD1KLZDlz4jjd3T7szk5KQh29\n3kg6HUOvN3Dx8gh6gxZBAI+7EUnKYLa4EYUyUiZFQ1vnVSBRPEJdFFAr1SQiERrddhBzmExOkukY\n5WqOcgVEQUNZklCIMgLBKE6nGYvRTq1S4czZ42zbvgez0Y7daUAqFHA4HMgVAslkGrPBDDUBtdaE\n0eBEo1WhEFTksxJCvYpSkOGyu9BrTVRFAw8/9ts/n0KjRq3G6W7G4WpEUKiRq5WEoxE8TY0c3Pcu\nl4bP07diPUqtAbVGx08ffRT/0hIKpYpkPEEkGMLlaqC9owu92cL1N3yEVl83YxdH0em1WCwmWltb\n6eru5czwCEaDna1bdlMuQCSWQWuwsxyOsmnjOt549WXiySwejweV6irBuVqtcPL0WeRKHVqtk+m5\nCZCJ5Atlzp4dxu32cODwewyu2UghX8Pb1E06GcXn9SCvqz/wWSjw0Y9+hN6+FaQzBSwWG739fbg9\nLkZGhqlVwGw2o1IpWA4HefOtd5BTx2FzIopy2ls7EEUNWoUOg6MBk9rCH946xD9/71F++vV7cZdK\naEWBwVVrSedzzC/M4HKYCccTfOLOu3nrrX0cee89fvfiy6wcGCRXrVPXutm6+zaePuhHqVeBUo3V\n6WFufIKbbroJmV7JmrW97Fi/ncnpLHd8/Dq++fff4uj7h+nrbGHszBWyySgbtu3hxJEDLC3P09Zm\nZ+ziOCffu0xfh5ZyWUIrA70o46dvv84dg5sp5+tobHZmlhaIxZcpSVlKxQJnz57mxOFX0cgUbBzq\nIeUPcc2GXTx0/31MXp7AbjZTLRS5NHKBLdesIR0OcXl4hNByBKlYRVDqMDe2MzIToq23l2AkhLe5\ng2wuhrqq46Xf/Z51/WuYHPcjygzk4nD3HXfx6tnDrNmxg7nxACkhSyER5bV/e4qF4AJOh4OFxQCn\nT5wmEFjA6+kgHFpGo9ZxaWyKCyOXGD51iJPHj/DK629QKVQ5NTyCydt4lfhtNhIK+skkM2RTctLp\nNGJNjVAzUC5VcTU6mV28RDQ5h9GkoiDVKFcr6IxaVqwcoFqvIZMrOXb0FC5XAxq9DrlKTjKTYn5x\nkUKpSGNjE8VShkg0RCAQQBRBoIrbrWf00knC0TmkbPCPjsc/iZXCT/7th/duG/SRiseIhUN0tveQ\nTaeJR8M0eVuwO900NDaTl3IUSwUaG1zIxTqSVCWwuEh/fyeXx8eQqasoFHoC/hBqlZLFhRlMJiMG\noxG5XIHD1YB/yY9CqSQUWsZsNmGxW1FrNEjFIpWShKfBg8lk59SpUxgMBmq1Cj5fC83NzZwfOcmZ\ns0dRKMxYrBaaGt1UqiXmFqa54cbrWFoMItRFDh48RFePl1Q6QbUsIKIABM6eO006lyWTy5LPZ65u\nRSYzyGVqVq9ZjSgIXDh3mvmZaT53912cPnuepiYfCwsBanWRWqWG3e0ilcnS1NFKJpGhubuHS5cv\noBGhZ80qFDoFy6EINrsdq9kKSh2yuhydTktndzctbZ3IlSpczc3o1TpWrlnFvrdPMDLtZ123hqVo\ngXSiRrKUQqU243GY+N0zL3J5KsSt191AsQJCpUwut0x8LsjeG7fzwx8/SWd7F84OJ1adF6kscebd\n92m06/CtWU+jrZWvfe47LA2f4rljMzzwja/Q2tXKwQMHEGugMVkxWayUqjV62lt4/tlXsXiM6F12\nqnk5boeVrVt38cprr2DU6VDrjIgqFceOvMdyJMGeW/aiFJTMz81hMJvoHxgCuUgsFqahqZN4Lsk/\nfOs+NuzYTnNbE6lUhjdf/Q0P3vc/eO2dtylKVXSCjv1vvUYpOIo5nSQ9M0/3hkE0ahvtq1bR5HaQ\nysRwmh0MnznHvv37+Oidn6Szq5XlYJDugZVsu/46tOUajY1eZEodBp32KlzWoGH08iXGr0yjVNVQ\nKQ0sLgWoCzmMJit6nRG9wcpiYJ6mRgcBvx9RqFOtVggHE+j1SsxmIxqNlly+RDYjsXr9BuQyEY1W\njVTIkkqlaGtrx2I2k87EcLstXLw8zq6bb0UQRJRyOQ8+9vL//ysFQRDmBEEYFQRhWBCEMx9cswqC\nsE8QhMkPzpb/t+fIZArkcjlqrYZkOkOhlCMY9qNQK8gViggyFZVilsBiALPBTlNzO03eFno7W5ld\nnOHU8AU0WivFrMjc1CTlUpxobJFytYKvpY252VnC4RBXLl9k3drV+APzFAoZotEQExcv0t3aitNm\nxWRqoFLTEVpe4PobdqPWyDl67AjDI+eJxOL0dqxg19bddPb6kMsEDu07SCSc5a03D1MsyAkvJ6hU\n6nzik7dTrykxmhoolHOodAKxZIz+FWtJp0qsGdzA4KqNvHfkGGqNDoVSy5WxKVQqIyj07L35FhaX\n/bS1tfDiS7+lViuhVAh0dHuxm/XojWqsJj1rt6xjceoUrxzZR/ve21heXOae27/MlQun0ehNFKs1\n9u0/QDCdprmvi3g4xNTiLLlamWMH3kRrUBKJRfHZA6wctLHv9SOs6F3FnH+ZbVvW4fD4UJobKAoZ\nHv7+3/Czxx7htRefoVArsXbL9XzhLz/DwROz7FjfxeabtrBz3Y009nQgUqC9SUPHmnUszS7xyx//\nlEdf/Cd+9cIZHvm7T9CzuYeRiyfo72slm4lg0ZvJJlIYlSI/uPenPPDoC8jzIuGpCZaXxlhamOf5\nF35JR3sLlVyGp37xNPKanvPHT9DT6mXk5GlGTp1CL5eh0imYm58gNJ/EpG/h8tgwX//S3/C3f3M3\nTpueH337fuYvnuK6m/fyyLM/pyDlEMo51FQYeeMVfHKJrTv72LDJxsF//T7/+OEbyPonSQWXaLA0\nImiqbLxmgC999g6y8TxajQudwYBBbaYQzrKcnKdUiJKLzJGIhMglkxj1Llo7hzCYdSgVOoaHh2ny\nuNAZNCSiSZRKLelMkcBimky2QC5fIZurIIo6Gr1uqpU6iwtzTE+MUi4WaPW2k46nkKQK0WgSqVhH\np7vKoygUa6g1DgplDauHdjA+PEutrCNfEP74uP7fgcEIgjAHrK3X69H/y7UHgXi9Xv+BIAjfAiz1\nev3v/1fPaW1y1B/85p1kMhlcLheyuniVoej2oFDpKUplCvk47R0tzMzMkIhniEWi9HZ3o1ALzPkX\nsducVCoVutr6GR45icvlIBZOo1IL6HQmdDrNVYdkQYveaECplCMIMpqaPExPT6LXm4nH4yiVcqan\nJujv70cmk+H1ekkm0rx/7Cgd7a14fc1EwxlGR85z0w0fYmJqEavVgihCJhsjtBRn7/XbyaQLTEzN\nkUjEUKkV9Pb0s2/ffkRZDalUwGZ2k0rE0Kh1rBjoQ62WMzk5S1trB+FICLlcxGI18NILL7Jy5UrU\nKj3FQomZuWm8Xh+FQhGLychvn3wWRIHHHnuUZ154jpXrBsj542zbvodYNcPkjB+n3YVWJmPi0nlW\nr72GK5MLRBbGuX73Hk6OTyLLZXnyJ/9BMCnnW9//Gs88+Ah/d//fICksjF84RaNLzi/+4yW+/7Mf\nk0ymKRbgVz//Gfd87FZ+/vK7fOK2jSxnCoSX8txw425+99vfUZq8wrZbb2FiZpZYLMKtX/gMl06N\nsOND13J6ZIxcPOmFDgAAIABJREFUfJmt27fwyyd/zuTMItdeu5vGpgZu/dT3+Pytm9m+ey0rBlaT\nTmcJ+Bcp1crIRRGtXEk8lWNs9CJtLgt6u42hTZuYmriIKMrJ5kVqsjpv/v5lbr7pDi6MjfL1B37A\n09/7Pr959QXeeP8IweEpUrEIWgVEY0HGJ6cY6OiE0GUMmhrFUo14MIrObGR8cRnb2j1I2TQdAwO0\nrVhHPhElmYxzdvg4s7ML3PMXX2NhYZ6ClMNqbqBczSKIZTRqA8lkHKNeTyScoLXZQ6laQ6vTMTY2\nRntHKzJRRZ0yyWSSRk8rxUqRuekZOjraCC0HSGUyqBVqtCoZ77z1Nnd/9gssLi1htJhZXg7S0dFB\nrpBkcnIMvV6PxezCYDAiCJDJZK42QuXTlPIxGjd97Y+Cwcj/PyvC//NxC7Djg++/Ag4B/0tRKEkS\nM1fG2Lh5M6Ojo7jtXqxGO9FwHJUmR71eJhaNcvbsUdwOO719a3E6GslJWeanZ9m1axejoxeQCXUC\ngRBFqU6tKgdBzcT4JGvWDJHNSAiCjOVwhLaOVmr1EiaTgYsjFzh79izbtu4gnYqxfftWGtxWxsbG\nyGRymE12SqUSKwf6KdWLHDl2hHw8h8ViIpPP0NJkY2zsMh0dHSgUGhrcV8ddY4kM1WqVjvZ+LFY9\n2WwOt9tNNB5BIa9htdlxu92cOn6Cru4OUqkEHo8HtU7LjoFrOXDgAK1t/awamsfpdCKTq4lEIrR3\n93DNunVcmZimq6eLVWtW0OB28Jnbv8Ljz/6Ur9z1Ze665zP83V//JSVJwt7Wyap1gyBXcf7wQaYu\nH8BiHuSpJ1/i4YdeZNVgJwtzV4gnRXZv9DFz+DkqlTLhQABfTwPd3n7ePvA8H//s7fzkn/6VPbdu\no4iagS4vx4ZHWNHbwsJ8kJahtfz66ce44aMfoaXFyYuvvseaGwVmTh3m7n++j1cf/z3f+M/7GTt3\nno5WH4tUiMcybNu+ly//ZTeBQIBf/OIXrDIp6VrhYWDgGuKp1FWh9DWSTsVwOuwsLISYXpxnz94t\nZDNRzp4ZoaerF6e9Bb3VilxWY3FunF5fGy/+8nmy2SX8193IxjUDDLb7eO6+f0HSqVjZu5LldAmb\n04klC40draRlSYyCgkKlQDAdpdlhZYXORL0YRGXUIYVmCVuNXDp/gWw2T0tbF12tPVwZOUMkEqNY\nLGIfslDMlbDZHIhCmVKhhM5pYiwyQ4fXy+TYMN29XchEUMiUTE9eRiYT0Wp1+OdnyOdS6DRqosFF\ndCo1nrYuKtUCVy6Pcf11ewkszSHlctTrdZQyJelkEqPBjMPiIZfLIdYFSgUJu91ORaozO7nEwMqV\nBIp//Mv/f3elMAskgDrwX/V6/WeCICTr9br5g/8FIPF//P6/3ftl4MsAHpd1aO+AGpvZwsaNmykV\nC+Ty0lVYisPJ7NwMvkYfJ86eYOeu3QQDCdRKNVannZnZK1gsNvJ5Cao1+vr6WFicIZGIIRM1KNQq\nqIsUchlafI2UJAGrzUSxWGBqaopKucy6NUPMzs4yOzvLihV9JDNZbLardmcAGo2G2alJ0oUc9ZpA\nb1sbZpuVUr3KwXcOsWpgBQhXW6frggJBEIjHo1CrcPb0OTZv2UZNgLEr49x2220sLi4Si/lRq7U0\nN7WSyaR4/nfP8PE7P83MzDxlSaLF18iaNWuZnZ0lk8vR2t5BsSBx4MABVq1ahSQVgTrlQhanw4Kv\n2cfR9w+zeu0GPn/3F/j2D75LJR5l+4c+xPT0PIVignJOzqc+/t9xuMsc2P82Dz/yCHWqrO/rwalX\nMnLgHC+8dYhcHf7pga+zEFzknbePsfv6bbSZtLzw/CH2fGwP0UCS0+++jMfXxa2f+xwnD7zJ0Lbr\nUJit/OM/3M+eFX2omee1N0bZNNBO1841fPdbz/LDJ/4RUS5g0JkJ+QPU6wKZXA6DVYPb5aG3d4BU\nNgZyBW+9+gf0Wh3nRy+xfesGEtEIHreTpUAAj6+N9NIY5oY+NFo95w6/yp2f/Z/UvVeUXWeZrvus\nnHNeq3IulaoUqiQrWrJkyQkcwYBtoJtkoKGNe9MNTdNgmmRwg92kbTIYbJywcZZsBSsHK1XOca1a\nOec894U4e5yLM85mjL3PGTCv5hxzXvw33/+93z/fcD9L3jB1sRahKuWhf/8wH/3we+nr7OT1A5ex\nNLpZs64fKTVsBhOJkhhlrYSlrZOFhQmk+RzaRIhKOUMdMXWxjFIqhlSmwmi2kUzlsHg8jKVzGI16\nzGYz8WAQn99Hz5o1VEpVpBI5I6OXaGvroFAsolapkMmVmM1marUaEjGIRCLm52cxGg3IFArq1Rpn\nzpzmrrvuJhAIoFTKCPj9SKVSyuUy1WoVkViC1WrF5XDyxlsH/szDKNPgaWN2YZb29mZkcinFYpFy\nNUcgEEAsErGmu5uFhWX0RiO5fJIN7/7q/y+xcTsEQdgI3AT8g0gkuvb//lK4uuP8P+46giD8TBCE\nIUEQhtQKCfff//GrFlUry8RiIfrWdF21Rfd6yedK1Go13J4GVoMBzGYzhcJVivLo6CixeAiZTIJE\nIiKZilKpFFEo5DQ0OvEHV9FoVUAdrVZLIpHgxRdf5NChQyiVamxWBz6fj/7+fmQyGZlMjnyuiFql\nRYSE8fFxyuUidrudns4+jEYrr732BkeOvM384jJWi4NcrkQ6lUUQrurXFUoVXV1ddHR04HTYkEgk\neDweent7mZycJplMUy4XAVhYmEOn16BQKEilUjQ3N9PT04NSKWdmapp8NofH6aGQyxOPJ1i//mp8\nXltLI067mQunz3Po6Em8yTgLU7MYlRr+8Z+/SCVboru3h6ce+zHFYop4tMTTLzzPJx+8HW8QTp8+\nzZtvvsXs9CqCRkMsH+Ly1GWK5Qoei5Rjr7/Jt775LDfctZ/9N9/CxRPnuTy2yNmLw4QjPgxqLRJE\nJNNxFpdXyCYypDIJ8tkCUlmFRDpBMVtDqhbz25+9il2lpqWlicbGZqbmxrGYHbR1dFMu12hvbSMe\nixEK+BEVSkR9PjYPrqe9s4V169bR07MGBCnpRA7EIEFELLDK4oIfpdrAtm07eOfcGDKJHLNDze13\nfYhHHvoGLc29vH7iPHd/+D42bliHVBChVOkoCQJmrRady8Azf3geaa2AQS6mkM4hoECoS9BqdFTr\nNQSxQCqTRiIRkU1FsOq1zM170evMNDY10NnRweLiAuVykYZGNxsGNiMS1alUMvj9PsIBP5lkggvn\nTnPo8GFisRgOmx2ZTEaxkCWZTOPxeBgeHkYulzM3N4/D6UIqkVEpltBrleRyBVZXAywur7B58zUA\nhIMRAv4QMrGMs+fOkE0WKGSKdHT0sm3LTpo9rSzM+zHonCilOprd7X9xUf8fC5gViUQPAVng48Bu\nQRACIpHIBbwtCML/azxNW4NF+NYDtyORSNCpNVwcvsI111zDxPgoG9etp1QqIZfpEEklTExOI5fL\n6e7sIhCMgajChfPvsPPardRqFfLZDFaLh2q1yoG3DnDvfe/B7/VRKlVYWV6lb20Pk1Oj1Ot1spkS\n27ft4sknn+T+++8nHo8Ti8VwOp1kcmGGr4yyb98+AoEQY6OTWJ0eDDo91VIGuULF4OZrmJueIpvN\nXtUylOqYrDbGhy+yvq8HkUhEsV5Hr5Fy4Z1LtDR1IFco0Bu0nD17Fqn0qm5+fHycBx54kKmZSZJ/\nhoNLSz46O1uZnBoDRBSyVXbu3InX68VsNlKplsjns8zMzbIwv0JLSxsfu+9evvBP/0YlkUbX6GTn\n+k0YO7VU/Un+85u/45fHnmZldpR6sczLz73CNds2oFbJUNucmOxu7r7502h1Mva0GFC3tjKzNIPF\nrafR7WLDlt38yz98H6Wsyq9/9zC/+tq/U9Dp2Lb7biTyFF2Du+hr6uKfH/wsN996OweefpZ9e7by\n/T+8zqY2C3d9+C7a1lzDM8/+jpbmHiRSKQPrepDW5KwuTdO6po997/okX3rgdvzRDP3rhshXCug1\nUvxeHz09axi5fIn1Q334vVH6OjuRyEWcPXmCTFHKj7/8IzZt9lAxKfjGwz9lcm4Cp9VKXSIQCIRw\n2J0oJBJmZscwNjbT7uzghz/5Bh/7xwcJjU3T2Ormdz9+jD1r+1BKlZTKOYxGI/lqlbpUSbVWRiWF\nnEhDWWHA0tzI1MQ069ev5+hbB3A6nYCU9tYWzpw9QXt7K61tXQSDQVQaJTq9GqlcRTAYRq9XMz4+\njsvlwW238tTTT/PB+z7K9PwcUgrkc0U6O7uo1mscOXIYp92JwWBALJJTqwl0dXXx7B+f5JrNg9hs\nFiYnJ6lV6ugNarRaDYFAEKPBilar5szZU9RqNWq1Gp965MD/t0hBJBJpRCKR7v+6B/YDY8DLwIf/\n/NmHgZf+l4sQizl//qrsWaaUMTS49WoHaetiYWGBTDpJNLyMy26gVs4Q8M0yM3MRu82IRAz9A71E\nYyFy+TSRSASfb4VQKERnZzvHj5+kLgKLxUJ7RysO59WxoL+/H5VKiU6nYePG9SSTSTQaDd3d3ajV\nWqRiDV1dvSQTJUxGJ/v370enVaNWa0mn8rhcLkaHL1KuXO0QyWScfD5PMZfnN795AplMRqFQ4qU/\nvUJdEFCpVEikUlKpFJVyjaHBzXg8jczOziKRSHj76HGcdgfNjU3kcjnsdjuTU2M4nBZmp6dZ29+D\nXC6lt7cbpfLqugcHB1nbt559+/bR39+LPxzj3o+8n3OTfnL1LK8dfZuWNb2UNArE5jJOixKj3Upj\nexuVaoFIKMy50+dYmFtgeXmVXz/5NTQOLYuxOAaTlk/ffw+ky/R09DI2P0OwVOVbX/8SCDLWre3i\nfX93L2dnL3PjvhuxGZU8+/sfIakl+eFPXsRtljF+9iTVupj2hja6+/pYmJrC7XZjczmpUKdcF5Gu\nlVjyBTj0xltcXPGzGAnw6qFXyFfLNLU10trSztbtu9DoDFdnZ62NuHeRklJGMhwl61vl8PPPs2Vn\nG1uu34xMYyCbS1Mvl5mbnqJarRJLJJEiwmxU8vprryATW6gJcj74dx+mWq7R2tTB8tIKmzZvoC6U\nKeaT5LNJ4pEwVCqYtGoMKiU6uYRKKoVGIiG06sNoNBJPJlg70E8ilWTTNUMEwwH6+tdSqdU5euxt\nUpksSqWWp558Dp9vBYfDRiqdpL2jDYPBwMLCIsVCmZOnT5PNF2lsvIoann72GRKJBPtuuJGOzlbG\nx8ew2U3IZBJOnz3F+o1DLK9cbXZNTS04nW2IxVpm55ZRKAzU6iIEQUJzUztr+wYYGtz8F9f2/874\n4ABOikSiYeA88JogCAeAh4F9IpFoFrj+z8//y2toaIh8Ps+FC5cYH7nEysI8YyPDOJ0eujr7EIvl\nrPrCbB7awm233Ulrayvh0DJjI5fIpOIopBLMBiNtbW00NrmoC0U8DU46O3rYPLgZnUZLJOgnEY1g\ntzpw2jzs3XMtPu8S27Zeg1wm4vChAwxfuUA4HESjVdHd3cX8wgSxuJ9qrUAyGuTypVN0dnby6iuv\n4PV6EQlikvEUEpGUeMTHT37wHT7/3z5FrpAnkYjRv7aTUqmKw+HA61ukXMnj9S1QrZXRatXMzc0y\nMNCP1zvP5MQ48XgcpUIB9RpNni6iwSI33/gepiaWmZ2bJhjyc/HiRRCkXLo4QiQURqXSEI9lCQaW\nWFic5Z//7S7uv/dT7Np7PclohBf+dIavPfYtdrfeTGpikXq5yH/84FHi6Tj79+2hvbebrRsHyKxG\n+O1Tj4IEwqMX0dvWsWXvNk4fOYVJb0ArAn8txjc+/6/84oVxRt8eZW/vEA888FnENTmF7DLdPR2M\nT0+gldWoKuoEFsJs27OLt05d4sjLL5CKxBgfHUZczHHm0EEOvPgUdruT3z9zkBadnK6GJvZv2YpN\nIyUys0Q0nuDSyDAjY8M47WaGZ0bxexc5+9oxfvjNrxOMV7DYLXz1J/+dTE3Dt7/zKI89+jDr+ntp\navWQSURpcjup1QSSySr33fsRDrzwY954+Ze8c+wi0yfPcOriMbR6LfMTAYbPXyYdu8p2LZbylKsp\nxPUcybCPRDBIXSgxMXyOsZF32DS4AaNWg0gqsHf/dfhDKwSiQUw2Ozqjle7ePiKxBBq1jjU9vWiV\nSuanZ1ieWyC8GqCcq7G6GmDX9XvYtmMrGoWcqakZbrzxRj7wgQ8g1EUcP3qcuflFdu+5jngyjt6k\nYdPmDWRTaa4Z2k65JKDTGjGaVHR0NuNyuVAqFQwNDeJbDaJUaXG5mxFJ/vJS/z82PvzvXN2tLuHr\nn72VbDZLvV6nr7uTeCJKrSpCLJag15lZDcxhNOqwGF2srq5isxsIR1KMj03R3tmAUqm8GrbpbuTS\nhXO0tzVhtzv5+S9+zf33f4xSMU8xXyCWzOByOIlGoxiNRtQaOX6/j6WlJfR6PXK5kr4164lGoxSL\nRdRqFXq9nmg0xsLCEuVymXQ6jVwu5YYbr6dUrFKv17l48SKdnR2IJXWUCh3lUhGlUo5MLMFoNJLK\nZoiFY4RjUcQyKe++5V0899zzuFxGCoUiA33biSeCiGVVotEozU2dRKNRwuEww5eH+dhHPoLf7yUU\nCtHQ0MTCwiL9/f2EYlH0ej1Op5OZmWn0RgM2q55NOz/JFz+2i8/+/BhPfOtTmO1qTh76E77RZeYn\npDz25Jdp6mjjRw//iDvuuIGlcID4ygrFCsyPTvDqcS//9sC15LUddNhFFMIxvvebg9zc3cx7vvBx\nfvYvX+F9H7ubkkiLq8PKvhu/xrFDT/HOxQu8+fPvsH37Dup6FS88/RY/fPa3FOtZrAoVBw69Rqkk\npre9A0drK4jrfO/RX/H8gUsc++PjmN0GnnriaTZu2kjfukFef/lPuGx23CpIFjL8/ge/ICWGdQOD\nuLuc3PPZLzN3aZhUNo7eoGZuapJUJMHAunW8c+Y0EqmIm296F4cPH8busNLZ3QtiDcGYn5On3+Lv\n7/scZ4eP4LTY+Nwd/4RSIkIqEvj7OzspJqOU6wLbb74DucSIWqMiX60gstg4fHGMm265jVQszsz0\nMPFEhn033MjrB1/lQx/8GL7VIBJxDa93Fa1WSywSoKevh1KuRiiyQCgcuGq4KhOolmX4An7aOxqQ\ny6UUcjkKuTyRSITO7i7UKj0jIyNs3LiehcVZisU8mwY3EwqFiUYj6PUGVEod5UoRp9OJSqVicmqK\naq1EsVi66ufpMmHf9NG/aHz4K2E0PvLQmhYtlUqJdQP9TE6M4XY7mZ9boLOzk+GRK/SvGyIU9GM0\nmIknwridbhQqCWazFYPBhEKuQCQC/6qParVMoVjBvxpm0+Ag5y+c4dTpk1gsVorVOmNTozS3NqKQ\naZianECplFIpl9m6ZTtKhYJQMEhXZxulUhaXx0YgsIpGo2BmehYQ09jQis1ioqmxgcXFRVZWFrFY\nTCTiEXp7+giFQpw7dwqtVodQE7G8vIxEJqPB4wHE6PUmJDIJPq8XtUqDVKpkeXkRpVJJIBCiv28D\nfl8Ui8WMQi5n27ZtnD93Dn/IT3trGwaDnlgywTsX3kEhERMOBtCqVbg8zei1JkLzEXYNNfPVR//E\nliYb33/+GHadmj1bdrFjxwBf+e5nKPj81PI5+jb18oNvP8Kdt95BR986fvOTx7njjps4duQKzaoM\nVpGdrXfu48kfPI7e6cTsMLK2p5Ef/eBt7v/8fXz6gW8xNT/PWqeEwJyPlbFxPvKJDzN+4jDn3lnk\n3bfvpm6w4F9cQKu309rZjbgmJuT1smn3foYnxvnqt5/iKw+8l86OForFLKVCjv7+HiSiOhu3bcFg\n0GI1GpgfH+O+T/8DQqVKppTi+htuY3zkFAvzM3gcDlwWB6FAlM3bd1ItFSiXyly+cpGB/g00tHZh\nsjgYm5pFkEkxO+w0dfQyN36Zrdv2YlBKeenpPyJUIVURk08lKBRqTM3mkGkiIJJg1qtIFzNk4xls\n7ka0bg8KjRq5uE7/wACReAStQkatXCSXiBEMh9BotbjdDRjMRorFItlcAY1ajkajIZXKoDNYySRj\n9Pb28NIrb2C1WwiFQpjNJqxmExOj4zz93B/o7upDKlHS3tGMUqlgbnYJkUigWisjkSgw6M0kk0lO\nnjzGo49+n4H+tbS2tZHNZhGJRIhFUv7rt39DgqjvP/KNhz5422Y0ShW5TIJ4dJ5n//Aand09eFfD\nuJw2qpUSCrmCZCKNQInV1VUcdgfnzp5BpzUiQoxCrmR1dQWzxYhUKqe1tQWJqM7Z82fZsW0HBp2e\nQiFPb2cXja5GLl28yL4brufy5RHWrOlncmKSWq2OWqWjXK4wNTmLx9VEuVhDqzYwNLSBVCaGXAZK\nhZTxkSsghvb2NtRqFU6bk2A4RD6TYeeO7YhFdURiyBZrZLI55HJYWJyjp3cNP3z0cbbtWE+5XMRs\nMaPS6EglksikAvFYkJpQwe20UyxmWFpYYNvOHZTKWXx+L2v7+lj2+skXSuy4djdKpYo6AvVamcW5\naRobnKjsJj5+724s1RLPHvgRhWSCjuZOBKmc0FKYkeVpHnnwR+zZvIbd24aY8Yf5/ZNP4rFbqRZT\nlKJRPEoBCPP2qct89POf4xvffZIP3H0Hi5fPsfUaFXKtnWv37+XG2/cye/kCJ98co79LT8Ou3Uwe\nPsTIVJr9778Ft9NOOV8iEPYj1Gs0NHXQ1dvO+dOnMBjMOA1KPnjv+6kLBY4fukQoEGTN+o1MT02T\nCMb55X98hZv+7r08/quf8s1v/I5/+8qnSJUFJmeXcLgbaPE0MjdyBbW0ztlTh5ArwLsawGRxce89\nH2J+eZq6WI5IIqGpsQGDTk0oEEQpkmJ12KiIVETSWX77kxcQCSKy1TrRAsxEa+Ty4NLUWVkMcPzk\neewGN/lClkSuTFN3L8Njo6xfN0ggFEGn06OS6xiemMHi9FCvlFi3boCZ6SmmJ0exWG2YTWZMZgtj\nY7Ns3XYtqVQSjVrC0vIMvd09lMpi7HYbdocDjVqNVqOjb80AZrMem01PJBxheWmFlo4udDo109Oz\nCNSxunUcOXyIfK7AZz/72at28ZXKVXl3uYzD4eDhx5/72xFEiZFg0rsw6lxYTI20ta/hgx/+EFKp\nGJFQoVTMMjk1zKrfS6lSoL2th127rkMq07K4vIDWoMTpdBCJhNm0aRNzs0vk8wWi0SjJdIL33vl+\ndFoTUrmCJk8TuUyeA28cZONgP8V8gQZ3I1KpFLvdzsaN6xGJRKRSCarVMj6fj2QqQalc5E8vvoLd\n4iadjVCqlmhs6WZ1dZXx8XG0Wj35fAmNUkNdqJJOJ7kyOkI4EcbT4EIsAYutga6uNbz20ot8+O/v\nJp+v4fWGCYVCZLJxUpkw9ZoYrcaCzebgxJm3kYiV1KuwND2NUiano7WNSDiGw26jp6ONS+fPYdTr\nGB6+zB//9CJ/OniAL33za6gVRoIRgZ/8/CDPfed55GIxn/3YZ8gsLtPjacHtauQHT36Hf3/ox3gj\nEXSyInffditSmRxPYzfH/SXEBiVLi0EGNw0hKtVY12PGG1hBarFiVbj4ypd+SCoUZHpkFbHRhtte\n46Z3D6Gp1hhbLmJQyjCbVSx4lxFqZbRyMd4FH1PTY7z+wgu89upLuFwOtm3dxPjYOyRSabbt2URz\nexdCKcrEO6d47/seZO/unXzvc5/h7nffw7mRoyz6k5SLNe685d3EVoJY5SqmFsYZmRmjWKowNTnP\n5s1bMBrlXHjnGE/97jl6mtcQC0UZmx5HrlTR3t6KRFWjXFcyN3wGVSZGCYGyQo5KI6EmFVMugVKj\n5M1LeU6NZZhYKPHor17md68fpW1tL0G/j7amZhamprBZzMTjcTKZDIODg0gVciZGL/HLx39AV3sL\n3Z1tVIoFlhbmOHHkOA6LmWee+g1yydVD6NmZeebnZlhdXUSolZiZmiAej1EXSiyvzBNPhIhGo4hF\ncqwWN/I/O5abTCYkYhkWRwt3fuAe3M0NJDJZtGorWo0Rh9OJyWQkmUz+xfX4V4EUHvveww81mipI\nJBKmZ6ZoaW3EoHchk14N1rDZzBRLeaxWM5VKkZMnThIJJ3jnnbO86103kUomyWaLmM0WUukkg0OD\nQB2bxYpYJEWpFjh77hShQJxatYrJpKOpxY0IOfNzyxhNRl599TWaGhv46U8fp72jk2q1xsaNgxQK\nBer1KuVyiR07ruXw4cO0dTRz6fJlYvEw73rXHSgUatKpDK0tLRSLRQRBjFKpYU3PGjQaLaJqjWKx\nRDKZYHF2jmg0Ru/aDk6cOM/ePTeQTl817BhYu45aBXLZPBKZCIvOyuzMLA0NHqLxMC0tHSTiKTKZ\nDG5PI50dHbQ2OfCurlAq5tl/7S6a7FbuvvMODh58ndbWZvLFMI//6iBr2xu49ZYt/OS//sDR0ydR\nW+R0Dwzx8Qc+wcypg2RiGcQ6PaNTVxDJU0zOZNm9oYFMSsHxU+d57wfv5o1XDxNb9fGee2+nFIzw\nqX/5FJ/68uN85lMfwmR3Y6yUEcvKGM3N/Po3b3D7ne0Y3J3YPDYaGiyotGrC8RC+VR9rWuyYjBq8\n8z4cHi3xWJZKrcTi3Cxe/wo/e+RRtDoDf3z6axx97U0uLOU4884yWm0ZBIGBNV0YDRKOHnkdk93K\n4Obt1AUle/fdRKGc4fjxI0glUnp61jK0cSOvHzlI/8BaHHYPq4Egl0YuIinXifgWifuCfOQT30IA\n6tRQiEXIa3UUUhFieR2LSkAmq6Mxy3nwm/9Ox8AA66/ZRj6TpV4vc+XyBWwOK0qVnGKhyIrXh7uh\nga72JjQ6HX6/n+XlJZRKJSqFHJ1OjUqlwGDUIxIEZmdn2LZ9O1aLC7GoikKuQCqRkEgkcLpcdHR0\nMjMzS6VS/bOSVsHi4hxKhYJ0KkWjx0U6nSYWidLc2ETQH+TI0cNs3Lgev38VkQj0eg0PP/7Hvx2k\nUKsLDG6e2M+OAAAgAElEQVS6llyxRlfXWpRqFSveWZa9U3R39rCy7Gf79u0EAwEcDge33HILzz33\nAg6bHO+cD3FFRiy8QiEXJJWMEoslkMjUjI8vIhbLkMkU2Kx2tm3bQS5foFgs4vV68flWqFMllY7z\nrnffzOzcJJ/57KfRaDQ0Nnq4cOE82exVcolcLicYWmXrtiEyiSoeZytWsw2FXEwplyGViKLUqBDL\nBBoaOyhXZGRyVXLZOuVyGb1eSzaXQiypc8+97yGdrLJh/SZy+RQajRqRIGFmehGXy4JKJZBOhCmW\nS3Su6WTOu0CqkPufLk0dHV2Mj44wNTnOk79/Cre7Aa3Bzolzw4jVOmL5HKVyleXlZRTaMl/8/D6G\nxxdo2bSBh3/9Xb7wrx/jwqlx1FIdC+NzPPLLIxx6/QQXXj/It3/4KOW8gr9/dw+/Pz7N5psHqeSr\nJFJipKUSRrmYeijNmHeGBe8yR1/4Ad6pK3z2I/9Kb38LG67/BDJFnCoSaoKIBW+Arq612NwtuFrW\ncOt73sNdd93FNbfeglLr5kNfewKbq4GhbevpaOtheXSB08++wvd+80N27hjk+MGTJJRN/Oz5X/Gd\n/3yATUMb6B/o4VdPPIHa3spdH/goelsLNoed9q5mLo+PotO62bp9HzOLAULxLIJMwr0fu4d4NIjd\nZKNv7Vpu2H8zVqeL6+/awz8/9FtkchEqpZgGvYBHX6fDocDmENiyq5/vvvwMX//jH/jmc89jdFlw\nNXiYXVxCZ3Sg01lobWvg5PGj+JYWSSaTaNVK4uEgY2NzhMNJQpEwMqkSoVbHt7KIyaBFIqpjMhjx\n+Xx0dXWxuLBApVxEpVAh1OqcP38eq9VKpSqQL9fYODSEp9GD0WpCkEL/hgHMFhsdXZ2UKxUMaiWi\nahmtRonNbmTHtYMIoiLBkJfz58/jX/0bs3hXyGVU6iXsLjPBqJfV5SATk6PYHWbSuTjrNnRx8Z3L\nWE1OYuGrzsX/+uUHcTd1Y3HZyFcTuJsbmV9apbm5mWwhTzAYxGhTcfnKWZKJHDqdgWQqQHNrI06n\nm3AojsttRaURE4+HiURCAMzOziCXKchkEyhVUqxWKwsLSyBIUMjVjI2N0dzYyKaNG8ikEly5fI5w\neIW2Dg+XL18kFFvi+JkjqPVyMrkEK75F3C2tGC1WguEAniYnZ84eR6WWI1NUGRl9B4lURCYToVrJ\nMDc/iaguUMzUsdkNRENhWhuaKGfzpDNxlCo5oVAIqVSKXC5m7017uXD5HHIZtLU0YdDqkIlrdHW2\nU6vm+OCH72N8fpIv/MenuXDkBPlijpreyIOf+SS37nsvX/i3L3LbnTvQ93fiX54nnaqw6do9VONF\n/IEqkxdnGdq+h/v+8UuEJVX6dl/LE68dRFcoQknBQijAay/+jp1r7Dzx22Msz77Jb548TW+vDInE\nyHX79pMr5ohG05w/f4VCvsrK0gqTlyb4zdPH0QPhWJZCRoTeKGZ85AK//OOvGLlwiTaLlSvnzvKJ\nBz5GJV2mkI7z2Lcf5olfPM6NN92AUBFAJgJRgUhoGYWiRiS6gtEoY3LiEo1uE5mkH4Qyx954jUwy\nxtT0BdK5AOVcCoPBwPkDU5gkVVTI6DLVcerAZlMwtH8bP33pBT7zHw9TrEkw6E0o6iKq6BkenWJ4\nbIpkOku5VsVgUDGwtovVlSU62hvxuK247AbsNhMiqjQ0O9Hq1TjcHrZs34FKpcLhcCGRydm4eQi5\nXE5LSwvlSgGjyYFcqeD2O97NyNgIYrGUXL5MOptFqZJy/OQJfKsB4ukMSp0Ku9NJR1cXsXgGu6OB\nXL6EWq1DodRz5fIYI8OTtLa2IZH+5SrJv4pNoVqtkghHMWp0OG1OYokYHR0deNytaDQqLl4YY2hw\nK3KZiny+yJo1axFLNAio8PqWOHHyGNFIjEKhQDSWoqOlGavZQDTsp6OzlWymgEKuQSQGpUpOJhXF\naTWxvLBCg8PN2q4eNm8aRCxRoDPYSWai1KoCzU2teFf8CHUxTU1NnDx1DJfbgde/AqIq3T0diEVS\nZufnSCRiIKpit7hZ091JKOTF0+CgodFGLBLhuWefxmw0EY+n6Ozu48yZU7S2NtLc2IJMIqWvu59q\ntYzL6cbqcpHKZwgFomzcsI5iocANN9zAyJVhqAskYnFkUjGVSoVkPEFfTx8ej4f2zk5mZuaJptIk\nEgnyuSqRYAypQs/RA69z9tRp1FUp1UyFvKjEzTcO4p0pEQzO0NnuIp4R8fLLT7P7hg0szAVolsLS\n+Crx8Ay+xRDimox8qcwbR6dI54o4GlxoDEYGt+2htUvB8OwSlVKe4386TbPFQEEsIZv0ki9lQajR\n1eZBJJQpZ2JIJEaOXBzjxg2dSCgRXA1SzJVY19dIuSJDkk5z6dIIw/OLLPmmQchhcThxNbdxz4f+\nDoVMYHFmjODKMhq5FJ1eQToVo6+9DZEgoqerGZXiquelf3UZp9nK9p1br87mXi+lSh61SsKxk6+g\nkYBOWaYsyBjY1MvA1kE++c0vE4wnqVeqjA2PkE/niaWy+GZm2Xbddob612AxqqjnM5SKVXp6+2jr\naMe76uXVg68SS8dJJKPs2n0tblczg5s2IhaJkMqUPPPHFwlEImSzKarVChabE4lMSWNLC8srM2g0\nGqzuFpyORjyeRix2A267g4XZZa7dsRO7w4xcLEGlsFIuQSAcpLNvALXegEKlQmXUI1drsNnsbNy4\ngc7ODqRSyV9cj38VZwo/fOyRh/obDVgtNpaW5/G43ShkOhYXVnE49fhXV7HabERiMQx6Ax2dHfj8\nq5TzBVqamnE5GwgEfNxw015WAwHi0QwajRyP283KUoBNW9aSycbJ5yEaTpItlDCYLEgVcrKFMnqj\nhVQqh1aro1quYDHqSaXzOJ1urFYTiOpUKiWamlsR6jVUKjVP/eE5BvqHSKUSqLVa3j52nB3br0Wp\nsFItp3Farbzy0mvY7B4W5xcwG/R0trdQyOWpVwU6u3rJZSqEIyE0GhXjo+No9CZm55Y4fPht3nP3\ne6iUM0wMX6aro51EKs2GgY1UK2X6167hxPGjyORyfveH5xkbmaC3u49Dx47ibmzArDdx6O232H/T\nDbgtJuLRJC6Ph47GBr74mUeIhiYRallWIov0rHOiqhn4whPnOfnWUf7bx79EavYKlgE32xs1tKzZ\nQk9XAYlYz9hkmFtu3oHe0sjo1BzZTIjrd21l43sf4vqOXkzdBvYN7iQTPcNKSM6dH70Nnc3ExXNn\nSCwHKInKJFejLM/Nc9OnH+HEc//J33/+Vk68dpgzhw5x0x03071pF0cP/ootm6/H2tXC9uvv4Hc/\n/i+WZieYmrzMlh27QCynb9MWFlcmkUmV6A16UvESw8OT2FyNJOIZnA4rBo2BeDROMZdDIhPhW11h\n285tvH3wDQw6A4FIkJtvvoGtN2/mtve9m8Y1/azffh27brydQhbEyHHYHcjrFbQqBRKxgNvTgEqp\nRimVMT09RjwapbuvD63eiFSuwGgysW7DRkLhCFPDw0SicewOD1Whil6v5eSZ0+zffz1Go45IcJX2\nzmYWFxdRq1UolXIUCjFKtY7gqh+prE65niQRz4AgIFWokEoVFPIlDDo16WwQrVpNJltBohIoVfOs\nLPqRoCSfyqNQSrh8+QrlcgW9zsx//fYvi6L/q0AKYpEYs8WGd3UFtVpNPJ4GRHR2dqJWq6lUKgjU\nKBWuCpNee/VVzAY9tVodgRoNHht6vZ6gP4LJYEQQ6ojFUorFPA3NZmZn59FoLOTzeQwmJZ2dnRhM\nRpZ9XmKxGBcuXCCXz5AvZFEoFExMTaHVaonHEkgkEsLhIFKpFL/fz+uvH6ChoYHrrttFNBZhenaW\n7u5uduzYgU6nQ0yV5eUlagK8+/Y7WPX6aG5x43LbANBqzAh1CbVajUwmRSqVgHqVSr1CuVympamR\nbVuuYX56gumpGQbWDRFN5BGL1UhkCiLhBJOT46wbHMBiN/D+u9/D0Kb1PP3M79m5YxcGvQmTxcz6\n9Vep26VyHb3BTI0KL7/6Evtv7Wdhzo835gWDhHgij7cUY9Aj5wv/9CBOZ53zR2cRSTWIw2FeevMV\nolE5a9e0UilXkeaKaMtFyigYuzALkioaAQqlaZamUsxGkyhlVfQGNVarFa3Gwt69+xHrxTQ6XIR8\nXnLlDAM2E12tHkKrCc69fpAPfPLjzM7O8cYLv6Wnvx91o4EQReLZIDv27+aTD36OoQ1b0Wg02Gw2\nfv2zn2G12AmHw3gDSygUGnZduxuZqkYqHaVar7C0Ms/AxrWI5CLUGhkuu53VJT8SqQyVWoZdryVb\nldDdOYBObUah1OFsbGV8ZorlxQVeeelFvL5l1CoFXt8ysUQUqVRKJpMjmcmgVqtxWG0sLi+xuLxE\nMBhEQE4qWaC3ZwPvu+d96A2aPzekNGqVgpbWZoqVMnVBQKlWMzUxSSIeJx6LUCwWyefzxKIhZFIR\ndocVlUJBo6eBWlWge2A9lUqFtuYWioUyGuVVwxan045QqyMVgcvloiZUsbuteL0r3HLLLQiCgNlq\n/4vr8a8CKXz/ke889JF7dpNMJ7h46Qo37L+OaCyIVCaQyxdBJEEmFZHLZUilk2wcvIaR4QnUKiW5\nQgFnYxuVsoDN7mZhbhmDwYxCoaaQL1IoQKUqINREyKRiOjqbuXJpBL1Bj1Imo1gSI5LJ6GzvRG/Q\nIwg1SqUaMrmSWDTK0pIPo8nK2MQEmzcPYjZbqdfqlMuVP5NMjEQjMWQyKSqFHJBgtOoIBBOcPn2S\nxmYbctnV34pSqYpY3H+Vgu124fMGMOiVtDQ3kIoHKKQTrK540Wk1ON127DY7cwsLSOUy5udnyOVz\nVIUaCpWGw4eP4fK00dXZDojo7e3C02Dn8KED+FdXiEYzzM8uctNd9/D8M8+g0Chxt65l3y23YbOa\nWfHlUVT0fOB9N3LrrTdw8017ue/j9/DBbz5JT6OFQ0fmyJXz3P6BPTzxixNocmFGIkXW23W09rtY\nGFvErNby2OO/5E+//j5/ePYlFgMS3njpbeQC3HrrZno37iGaCiJR1KkUS0iqCpYXZ7jv0w/S02Oi\nrauZp/7zB2z54F6q0Tzf+fYjFCaW2PWuu3jj8DEO/vY57rzjvdQoMz0xSaVeY3p2ikIhS09nF9lU\njp7ubo4fO0Fvj5Pw6izHjp7g/fd/ju98459Y8S1jNbjoaG9jdGKGQqGCSq2lu7eX0bEx3E0NSKtl\ndFoDQd8i83OjCDIJuVyBwc3X0NrWSj5XRKfXUixVMBrNyGUCuWyUTCpGIOWnv7MdrcWNSqEikYjQ\n1OSkXAQJas5dOEJ7Wxcda/sIhCLU6zIWFpZZ07uWbCGPy+VErVXy2itvolYaUChVdG8YYnZmmp41\nfahVWnwrq2QzWaQKkIlFGLQalpYWaGlvYPjyOSRCGX9gEZlYQj5VJBUPIxUXWVkeo7O3n3KlgsNl\nZWF+lJ89e+xvBylIJVIkiGlu9NC3poPnnn0KhUxKtVpFrTWi1hpJpTMMbhpCLBVRKGSxO6zUalVG\nRkZYWFhALJISjyXRavUoFAoaGz0IIpiYHMPtduPxuPD7/VTLFVQqFSKBq6nNWi19a9Zw6vQJUokk\n9WoNpVJJsZhHrpTT2t6CWAylUol0Jkdvby82m42mpia8Xi/pTIJcPolMJiYej3Px4juo1WokEhFr\n165FJlMgl19lsMXjMTo6uujs6KZSqQACapUKg07DjTfeSCDkZ9d1u7BYTFRrdXKFPIhFVGtlZHIJ\ndpebXXv2kspmsNhtnDx5nIWFBSwWE5VKhXcuXEYklqJR62loaOD9H3gvT/zyFxi0Nm6/9b1IxGL+\n+OxzaHVKYv4svWs3INaoqdaMLC1FOX/mHGee/xoHphP8yyduY7kMy3NeNuxci94sQy9IUaukVPNF\n6oKITFXE2IpAY4ebagbMVg0NCkjnpEilcgSRwOKyH73OhkKlYWlhkanL5zn49Gvs2Ladw889zfTC\nJFZTO6fPHUdVkrNl334uT1xi/Mo4D3z9C2RqZdwtTbia3Fy7eyfXDG2iu7ubZe8KoXAYqHPtzt1I\nZBpEMjlr+7uZvnyZe+65h02bhpArFKRyReKJLE5PIw5nI16fH6vdjECZYrnC9NQiJpMZvV6LSa9j\n+/bt5HI5TGYr1XoNmVyBTm+kUK4QiYSwWi0olQo8Djtjw1dYWpihXisRj4RZXl7mzMlTPPGbXyPU\nRczMzREMhGnraCOfSdDa2ECxWsHhcHFpfIIro1e49c67yOSy+Obn8C0s43Z5WJibIxwOYXe4KVVF\nTM/OkEhEGB8dQyaRIiDDZnWDSEEyU2RmahaVSoXBoMNis1KtCQiCQCwWY2xsjPUbBv7ievwrQQpf\nf+j267aQimdRKVToTUZqdZDJlBTzZZYXl2jv7OP02XMYTWbaWlpJxBJoNBqcLgcyifzPFlQimho9\nJFNRlEopgeAqgxuHuHz5EieOH2fvnj1UqiUsJhcrXh8WiwmVRkEml0CtVNHd1k40FCKeTmO2GJDI\nQCKWYjAYaG1rJugP/k+SU61WQ6VSkC/GAchlS2wa3EaxWCYSjWI0WFlaWkIpU6JUyqnWBXQGDWMj\n49SrAmqNhosXrpCIJ7hw/hIgxuF2EwhGUai1lEtXNRVOp51cLoNGLUUmEXPxwllaW5oRiwTUKgVd\nnb2US3Wc1kb8Pi99PV3ks0XcNhOFdBStQsV1+7bx+I9+TiIcYMeOQdRa+NYv32RyZp733LGXcCDC\n/OIyu4f6WZhP89h3v8wPv/JFJlYVJJdDXHNbPzabnpG5VVb8i/zDp+9heD7OpXMTKOo11OoCG9qN\nPPPmMtf3SLg0D/t292Jr66ZncJBjB8+y5ca7eOOJn3LDHbv59jeewGIScezAER76+sNcPHuKgweG\n2b17M9s/cDPrBrewf+c1RGI5pEoBv9eH0WiiVK5w/NAR9Fo9VqeLWDpD/zVb+MNvfs+mXduR6xwI\nVRWNLjlyuQGzyYLF7kRAjFymwtXgJpaIUCoUGdx2LclwDv+qHwGBdDqJCDlSmYrh4UvkCwVy2RSF\nQppqpcpqIIjb4yGbCrO84EOj1mHQaanXaswvLJKIpdl/461o9Roksiq9PQ24XI0U8hnMOgNVkZym\ndg+HDx/GIFcxOTaFWWdi89bdpAopmppdLIweo6O5nenpeUwmE2fPnUZrdmG2OhnYsJZCOkEhV6JY\nLKLUGTEYtITCQdZt2o5IVAKJgFguRaKQYXa4yKQLuDxObDYT8UiIR3998G8HKYAY3+oKNaGKWCrD\nZLCgVupIJXNcuXIJk1FNMhZEhBi1Ssfhw0eRSAWQylGptDR6Gsjn0hw79haRSJh1A0NMjM/T3tJL\nrVZHo9Gy5/rrWVz2YTQ7iafjuBtcBINBsukM1WKJnq5ufvrLX5LM5Onq6kKlVOJbWUUlFyOhTD4V\nIxxYJpdNE/AHuXjhEhKxnFSyCHUFTQ2dDI+cx+ubpcHtJhmPYTXq6GlzIxLqyCQiwrEgrR3tyBVq\nQsEIO3dvY826QW687X0Y9Q4MOic2WyMajR672YRSrkKEjHpNglikwr8SoKWhFblciUKhYOP6DSSj\nEUauDPPOxZM0tDVQk2joaF1HOp+gItQYnb3Cm4ffppCLc8veLYRXvIxOTnDHLg1D3XY2v+tfyWZi\n7N3eQUUkpblVTjw6TnOvB5tRTVEEPe52/KOT+GMiIik5b5xZpFYV4TApWLe5je9+7wCvHJxlm7pA\nuSZDbldgtNlZXg2QDk7j8ahJzL5DwpfkscdOodKAhDL/8NV/4Tu/+DmpuAin00hVgK9+8T8I++cZ\nGRnh5Vd+z/ylEdb1riUSjrK4tMyOvbtQaWWEVybZPdTD5OlDbL6mm8XJOTQyGbGIj2ymgiDU0ej0\nODxtiCoZfAtT2E0GHFY7TquL5fl5pCoZJqMOEXWUKgVSjQqLWYVKDkKxhFmroV7IoZLX6W5zIBcK\n9PX1YXHpaOhwYW9wUlOL6Wp0YDPIOPjqM4RCQRpcbrQaFWaLjlBoEQklYuFlnv3t79mwfoDFsJcd\n1+9g0b9ANhuFioDV4uKW227n0sv/nYHeVhweN919azl/6gzLc2ME51fwe6O0dLbT0t1GOjLLpbGz\nTHtnUKBg7brdNLYOUC6BGBUzYwt0dncil0uJxZMUS395Nf5VIIWf/PCxhz50+x6UShUKhZLLV65g\nMOpocDuw2i3oDUbEyFCrZDjsZkrFLGqtlmoVlhZnEYlknDx5hLvfdydSmcDigheZXCCTi1EsVslm\nkpSLZfoHennydy/S1OxmanoUtVxHPp2jp7eH+bk5BoeGEEvEFAsFPA4XZqORcDhEKpXEbDajN5mo\nVWuMjo7Q2dXG3OwEoUCUSrlEKhVHb3IikytIpXNodRpMJiNVoU6pXCKbztDe2sHs3CKVWhWX241C\nqSERDxOPB6kJZU6ePolEJGF+bg6T1cKbb76J0WCgq7OHZCaBUKtisVupVGssLvuYmBglEPaRSIVQ\nKDX0dHVgMaqIZ5bJJiScOn2W5uZmNDo1wyPnEYvVeANRbr3tTpSyCv/00ENc22knsirB0dNGLS9l\nZuYK1XwNkTTPIz/6Om+98iaHXztPt0vP2Goau9HEoVNXuOX6zWSXJjkxEua667oJRCt0t6mpyhWM\nr/wP6t40SJPzqvP9vW/mu+/7Wu9S+9bdVdVd6m5tLbWk1uJNkjFgsA2ewbrMMFyYgQEMM4xYbAwX\n7LGB8YVhGBsMBu9YUkuy1K3eu7qrq5fa9/2td9/3NedD60YQc+MOihvzAT8RGfnkyScy88s5cf7n\nyfP/F3npFz6M0eXm7sw0JreLWKrAzOVFLs7M851vfpa/+e6riFUFbp2M6xen+K0v/A4PP3KKM+9/\nkmQsBko1zz71fmy+ELVWlb6+Pgr5Io36fY3IaqNDPJ1HrlDjdPopFsrI5TIiB7tUqmU83iA6nZ3b\n92aoFvMM9Pbw/e98B7PZyOtvv8nIocPIFSKdRotatUij2cLjdFApFalXy5gMBuKxGK12k57eHvL5\nPEqlArlMRKcxkckUSMfifPcb3+PJ979As92i1anRatYJdXnY3V7j9vQSgd4BurtHqJcKVOslugIB\n3nztVfxeJ6FwCIVCSWx/D6VKyfzsbbLJCFvzq/gDQaptBY9MjqFWy8gVCqQzWUwmK/F4ilJNhclo\nw2GxsbGzSb6UQerIKJYzZHN5RkaPsLq8hqjSozeauXl7mm+cvfXDkyk0mg1EhZz1jVUWFufo6gqi\nUqgx6PXs7e2RLxaJphMEQl1oNBrMFgeiSk+tVmFzZ5tSOc+//Bc/S6nQRGrflwQ36C1USm2ajSoy\nmUS+kCUa3ePFH3kOq8VJKDDEwGA3Dqeay5feAFrMzS1g0FvJZAvEkjl2DxJ09w2TyddIZco0mw0a\nzTIajQqHXY/JIpAvJiiV05jMWtRKDYePTGB3ODCZLHh8XcgFkVjsALPFxNy9WWQCKFQiJouRdDqN\nVqslFPDj6fJz4sQkfQMBBoe6iSai/ORPfhifz0XkYI+hwRFqzRb1ZoullTXC4R5+4mM/TTSewu3p\nw2xxAQLVSouFe+scxDYIh4PY7BZmbt9gcOAwjWYJta6BxW4g0H2IYmSP8PAAn/7DL3Hpje/z7371\n13nk9JOk61l+9BM/x/L16zz/zBN0VEpq5QqDASef+uRzlOptXr94CXW9jBwZP/4rn0Yna2EQKly8\nl2Sn3mRzZZ3Xvv8KPkcQm9aMqdHmrevzfOyxbn7uJ3+FX/vF/8Ti3Vv82Zdf41c+/RtsbGzwi7/4\nS1RLbRQaKzdvTHFrdgaNXksyXiOfa+H1hJEhIrUhGb//S29PuJvp6Wl8XT52d3cIh4OsLi3x+mtn\n2dlZwagTcVh9SFqR0+9/mlwux9HDI7QrZWSNJq1mnZ29XWZu3aSYSRHZ28Lv8dLpdPD4HARDbkQB\nRLmAQhApFnLcvncbk8XI3OxtPvKRF/jLv/7vnL90BZenG1HQki9UCYQHaQsCgiiysr5EvZZBKaru\nMyD97M/TaspJZwp0mi10WiVyQcAdPsyTP/FLaFUi19/4BwqFAmffOsftmbs02yKhsJtMcpV2LYHP\na8XvNeCyqRjq9iBKTerlHEeOHeXIxATLqyto9UZMZjvNlsCjjz3xnv3xn0Wm8KUv/MHL/R4dOr2O\nhx9+mEq5gCDIKZbKpJJJivkcuVyKcqlBpVInk8ngcdvQa0VEuYDRqGdpaZFo9ACVSsndezOYLUYk\nJDa21mm1agS7etDr9MRjuxjNamLxXVrNFqVKG63BQqGQ4+jRY9yYmmZoaBBRFGg0aiTTSQ6/y9+4\nvbXF4OAIyDpUKk2czgDJVJaD/QM2N7cI93Zx++YMzXYZvd6MRmvkxvQMXYEQi0vrKNRaHHYbY4eP\nUClXabfbVMoVmi0JJAGvz8/qyjIDg4No1UqKxRLxWBytVovVZmJ3b5vz58+RSsZxu5x89zvfZGf9\nAI1GTSKxy+TRR7l95y61ehWVUssTTzzJb/7mf+Sp00/xyIMn6Q7343YFmJ9fYqi7//7WabPKhx49\nwq+9/Ff8+1/8KTQKHVcvzqBVyrh44Q2MGvjEBz/AZ/7yAjZ9DZ/XzdWZZfSdCr/xHz/N2+9M86+f\nH+Hrf/EWzsPjLMf2eMwDn/zFT+Hw2rDYlUgSLGxcI7cbQV2vo/a7+NaXv8LOepyT7w9z6/YiL378\nx9FbLIT7e1DI4MjEMRxWN7fv3MTl0KJWyRHkkC3m2d7ZIBjoIhAIEYkcYDObaDVbpFJJTBYzbk8X\nDz78KNlcAYNWT76cwefpZXcnQn9/L4VqiYGjk7TaoFbr8Lh8GAx6kDr0DvRRqJTRG0zsb29hNRlJ\npuOYbWY6ckjlCph0BnweL2q9HqVGh9/jx2y2odMbicfWsDsdWFxuBkcnKBeL1NptBKmDzxtmbW0Z\nrZc6lXIAACAASURBVNYAkoDRbESl0VIoVujydVGpd2iqrbjdJi5dvMhHPvkvcXq9mAwqOlIVQRDx\nBwcxWtzE03l8R04QieTRaTQoFQIGvZ5iLs/s7TlcDjvlUoF6o4bZZGR7bY0/+/pbPzyt03/4ud99\n+YkTvWg0amq1MvPzs9RqFax2G06HB5vVRa3aQaGCYjGHTqNFFO7P7XYbhUKBer1KMNjF9s4GSoWG\nbDpPONxNrdmgOxgkmcyxv5/AarARiyYxm8wIYhtRrkGtVtPTHaDZrCAq5exub+APBokeRNEZFDTb\nNaqVPDq1hXy2SrWWQSEqiCeyLC8v4/b46OntIxnfI3YQZXi4j/6ewyATkHXqHD/+ECsrq/T2htHq\nDERjCTrtNhqtAkGQMOp05LMFLl5+hxMPnaRQKDN3bw6vqxuZvI1SqWBpcYV2q8WhQ4McnzyJDD1P\nPvUYLpeLtbV11tcXOXbsIRaXZpmfvcvzH3mBg/0IT5x5FqPZyCv/cJajExPUOy0Uoki1Uaery8Zf\n/dVf4gn38du/9Qv8xIu/yod/8nlsjiJBmx+x2WB9eYvvv/4mo+OjdOpRrF1DdA7WKDR0zC+sYtBY\nmF+8xu5WAVGZIZdu8qFHh8l1KihVaspNFasLG9QrKqLT1wg89Qy//vn/wLf//Gv8ypd+jY989Kew\n+Jxsb2xweOAQxXINvUlJuVwnnU9hNtowW0NkizWUeiNGVZsunwelQkkqnScQuF90bTQbeNwejAYL\n6Wya/b0dnG4LMnkHQVCysr7JyJExNra2kZpNNpfW6Ds8wezcbUKhPmx2N8Vyls31PVrVDoICGs0m\nDrsLSd6m1W5TqTZQqzT4u3xcvHSRcrGIwWhGUEoYDRoyiSyj45OkE2kuvHWJWrNNpd4gFPSRiMZo\ndepkM3GMFhtdoQB3Z26gUqvZ3thCo9Vx+8YMDpcdm0akiYxaR2R2boFULM+Jk8fIl7K8+v1X0KrU\nGGwapGIFtVJgL7aNSaMCmUBTUqI1qFGoZBhU9/1ke2uJiYkRfusLf/fDAx8EUaRSKRKPR9nbizAy\nNEF3aAiXs4tIJEkqlUWhhHa7ic1mweVyYTZbiR4kkCECIjabA5lMoFgs4nY7sTnMrK4tc+zocdpt\nidHRUfr6wyytLWJ3WKhWGnzx81/F7bGhUHQolausrKyiURsIhUIYdDp8Hi/FfAGFKJLJZFAo5CBr\n4vcFsNlsmM1GXC4HBqMas1WFyegkEArTbMtYWV8jn8+Tz+dZXllCb9CxsbEGkkS9XsdqsbG5voVe\nZ0SlvB+Yjh17gHq9SafTYWLiGHsHuxhMZvYiB5w7f4Gnn/kgkYMkSpURq8XF2uoWo4eGeeiRCRxO\nI+trK7z5xhuEw2FuXLvO2bNn8Xrvp8Jur4d6q0k0esD6zhY+X5BOR8mJk48yNDLE0soqo+MhOqUi\nOqMLvdfDK29eZH8nw2akzlMfPIlBAUGbjVpDwmcT2VnapZjPUsy30YqQideQC2AwaDCZLGiUatKJ\nOBMPHKcuaNmKygh5nNz4m//Ocx95DLPOzH/49G/THQ5jMBhZWrnL+sYKWr2OerWG1+VFFEUq1Rzl\nSpbI3iZqtUg8HkGjVXL54jmuXHobuRx0ejVyoUMiHsegM+Lz+1EoVNgs1nchZBGpU6WrK0gkHqFY\nr7K9sorFaEKtUtCoVYhGozhdNuwOM81Gi7W1Ndp0aCFQLFepV6rkiwVqtRrd3d3ksgXy+Tx6jZml\n9VV8fV6EegO1XOTRhx9CatfRq1S0G22Gh4fxhnqQBBGDyUipWkEuB4NJS10mJ5kvcuL4OK3YAt/7\nxleYmrrA0MQRVCoFxXyJlZUlnG4Xwe5+2jIFjWyZVqtFvVpDaMJGIsnK3h46k5nugVFsdg+lYppS\nIcOx8QnWVnffuz/+s8gUfv8zL//CT38Ui8WOxWzFZHWSzmZRiCIWs57dvW1EoUWj3sRqtXBr5ibx\nRBKX28Pe7ja1ahmD0UKlXEeS2qRTCcrFLCuri8zeXaBUTLGwMItcEHjo5MNYLDZUKg19AwHeefs8\n5XIWp9NHsZTH67Lz5ltv4HQ5sTocdFoyUokMXV3dKDUC27ubGPQOdvcP6HSa2OwWvJ4gmVSd0bFD\nqDQqRIXA5tY6MnkDr8dFsVRmauoqP/9//hzzs/cIdgWJJ2OYDBokqUEoFKRSrrK3f0ClXL2vAFWp\n0pEqqNUaLl2Z4vSTT/LFP/48bk8AhULJ7OwNPB4vSHIGBrpwOy2cfeMVTpyYpN5osbS4wM+89Cmu\nXbnI+soGuVyK7/z9NxCQM3F4gkQmTiweQanSkMkXKJZzfPSjL9KWF/iRD/wWff1Wjp06wSd/49/y\n2c/8DX/4n3+ZD/3mdzguT/H95TohZZWfeul5TGKLitePrbzPWlXgoAA/++JjmI4MIWp09Ng87K7O\n8tlP/wlPPtfLhTcukthNgl3Hsy8+g8mhJh4tIlMqcDocpHI5WnWJeq3A7u4+Fy9eoloqYzCaqZSr\ndDoqukJ9NNsSPb199PT1kownuPz2ecbHxqlLDRaXZknuR9jdPUBvsBHo8mM0W6hVyySie4yNHEGj\nFyjkM1hNVurVGkajgUa9QTDk5trVi3T3DJEr5FnbWEKqNjl6/AEajSL1fAq1RkmpXMBg0CNJDWxm\nKxsrOwhyLYGBEGa/l2whSb2URaFU4nS52d/bx9s/SrnaRKPUIMiViGoTgkyi3WjQ1z+ATG/B0zOM\nwuDgoSfeR7lUors7iEJsoFMrkDXbpBIxjDoVdpuVQrXKuXNvYzQY6e7uZXN1HZNCTjWXYXVxmcDg\nCIJKQySVYXZug2++fv2HBz78xZ/96ctPPzhMLBYjHA5TLNeIRPaJRqIcRCL09vUwNNiH3e6kWq1R\nrdXQG20EA35u3pgiEAiQyeaJxaP3mYzsVkrlAhaLlf39ODq9SC5XYGBgkHwmT7FcBFmTyMEWep2O\n/v5BBocOcePmNcKBAA6Xi1u3prHZrCgEAa1KxeUrl7HZ7VQqVYrlIlqNkna7jc1qp1yuoNPryGRy\n5PMpjCYjly9eQYYcncZEpy1RKpQoFrLIEGlJErlMkmDIR6fTQqlUUyxWmJu9x+BAP++cP4dCoaRW\nK7O1tU0ynuD0Y49ht5vJpHKsLC8QCvupVhscHBwQiazRbte48M49PF1+Hnv8CW7fusHJBx9k6vI1\nhoZH6Ar4iUei+H0Bwt297OxFqVWr2B0ODHo9ClGBIAnobRq8VhMXzr3JB59/ntRumum330RRjnJ5\nagevGjZSDY50KdGb1ZixEX7oMNvXpog0bVRaIiFTmofe/2GajSa5dBxRMPGHX3sL7V4WmanFS5/+\nJY709NEQZSQzGWqFKjarBTltAsEQr599k8MjfXSHexkYGrxf5BMFbE4bmxsbuFxOMtkkZouJWPwA\np9PB9tYWi8srjIyNMdjbi9RpIxMERkaGmbkzA/ImEhJ2i5Od7U1sThsalYpyuY5cJpDJZMjn8jhd\ndrRqNfXGfcgwODCEx+dhZmaaQrFEtV5HrdbTarbJ5grs7uyDVoVCoSSXTBBNJFiYnsZlMdBsagkE\ngyTT6wR9NnZ3E1jtdhRaDTJBgdHmRmpVUStVdJCRK1RJxTdwu92UCyWkFmQzGba3YmQzSRRqJQND\nQ3Tk9zse/eFejp56jEo+j6BWEe7tQ6tWU6mWicaj9PSNo9XY0GgMTIyF+a0/+toPT1D4/B987uXh\noIpcPo1CIUelFlGpBC5ffoeHHnoIm83G1l6U69dvYrU6yOXyOB1m0ukMBr0OhULk4CCCzWbh9Okz\n7G4f4A/0YDI7ePGFF1CKGnp7++jQYmV9gVqjRjqfx2Htot5oka8UWV1ZZ3h4gHqrhVplJhQMIJfJ\nEERAJiPU3c3K8gKTk8ewWazvynXpqFRrdOhQb1TR6XUMD/TjsNqYm50jGothshtpteucfPgE7U6b\nYuk+B+H+/j71Wge/r5u7d+4xMDBApVpAVAlo9Vpc3vvq17FEhB/98Y/QataJRVMMDfej0aix21zY\n7Q7azTa3bk5z+/YdfvTjH8PttvGtb3+P8aEhgl1eotEEg8PDIEloTGo2tteAOkMDfQTDYRrNFvlc\ning8yu3ZFbQqFeVGko//9CcInvo3bO9s8Bd/+Rn2t7apJrYYGA3yuF/Pxa0G+WyF5x/p5jO/91Xe\n94CVv59NoxBbnAqoOSjVUKgMDJ+c4IUHf4YfGzfxy//3Zzh36TJqtQ7rcD/B3mHS20k8NgevvPkD\nfP4Q4cFxPGYzkkwgmkihVSkwW00YdSrktTI2u4XZhXv0u33cvnsDs0bN1asXOHnyYaxuOwqhw+tn\nf8DRyWMkE/tcv36FEydPIsgUvPbqG0w+8ADIJErlKqKgR6dVYDGr6HQaNGod9vei3Lw5w9j4ERx2\nO+lEnJYEwe4BHK4gxUqNxbUV9EoN/mCIYCjE22dfw2wycebMMzgcFg4iezicflKpCt39wzSaec69\nfR5foAt5s4Zdq+bcG2cxGzSIMg2CzozD7aYY3cRstGHQ66i1qlSaRTKFNNtb9+gN9aBSGinXm1Qq\ndQSZkrM/eBNdB2QKOQ6VjVq5xPUrl0mlDnjmfc8wv3CJWGyeUjaCJLn5oz//2x+emoJSpaK/fxBJ\nkohEIkQjEarlMt2hHgRRyfb2LnKZApvNjtFo4v3vex/tVhO1WoEgwu7uLpOTk6hUKiRJon9wEJPF\nzsihw+xsr2O3mokdHKBSatGo9YiiiM1iYmHxNqICFIKcq1evsrS0SCKRwOvrIp1O4/P5UIgqbt68\niUKh4MbUNNPT0+zt7dHpdMjlCgSDYXQ6A5nMfbori8VCMplCoRAwmvQoFffZfy5dnKJWg9GRcbp7\nBxmbOMb+QRyZIBLu7qVWuy+T53Q6cTgclEolWq0GdruN7e0tBKUCp8dLW5LxwAMPIJfLuXfvDgsL\n8zTbBeqtJpGdDMsLBwgyOXaHlUh0n1AohM/tIp/PYzZbeeGFDxOPpZHLRVrtDj09PfT3DzNy6DC0\nG7i7PBydHGZ1dR2XwcD3rm+jktXJ3VrAPzDE8aMDxKotdlMlssUSKGSYJIHF1QxGEWSNDulUCbUg\ncf7iebaX1xkPmMiUSqTzefL1Op/45Z8nP7fBhYvnGX/haax+G48cHaeey3L2a1/jzs1bWK121Go1\nbb3I6r1ZkFrcnbvDhSvX6e8epNiRc/zUszT0RjptObUaeJw+9Do1nWYHg9FGqGcQjUFPo9FgPxLl\nyaeeZnF5ib3IPi6PD38wgFxQkEil0Wi1zNy5jd1u5fFTj7K5ucnKygrz8/NUa2VmpqeQ0cKg0+J3\nezBazORicWxeJ0+deZYnzjzH1PRNZJKahx99gmwhR7NTp9Npk8nXCAUn8Ib6iSRyzC0tc2jsCO12\nG4vdht1qIp9JsLu7TSodJxKJoFEZcbpGGBl9gp/+pd+hgRp/oAupXWH+3hRai4nJyWPIDWrK2Syf\n+73PcPGdt+kJd+N2elhf3cLqGMRk6sPvGaWjKr9nf/xnkSn8/md/++WAReDwoXEEQYlK1KDTGAA5\njXods8VCrdJmZGSAxfkFpq7fQC50yGXT6HQalpe3qJQbtNsdLFY7ly5ewenysLW1RbWYoTsUIpNO\nkc4U8XlCyDpKluYWcfls7G/vEN2LE+r2o9GoUapULCwuUKtW0KhUOB0OAv4gG6trON0+Wu0OClHA\nbLHR290PCJSKJdqtFn6/i+W1LerNFmqlSKmUR61Q43C5GBkZIRlPEIvvUypkkItqhoaHyWSzrK6v\nYTEbsDncFIplmg0Jl9PNOxd+wMjIINl0ilKhjFKjgXaL18++wtChYQ6NjvLtb/83XK4+gsEwkf0I\nB9F1PE4/balOtdEgnkzR5ffwre98m1Coj6GBQ/R1D1OuNylXazTaHWKROGtr69jsJqw2B+WKgMes\n5dO/8jEu/cMljp14iDe++3e4zB4e/8hjfP3Pvs9uWYdW3WZsKERxa5lYS49O0cLQUKE1Vfk3X/gi\nvn4PH/vIHxHSZ9CFellcWefPv/F3XHv9DQbHhhkeP8Te1G3+5A8+h0It8tDxY1Q7LVxuO9Rq7Kyu\ncO2tizgCXmxWG/6uIGNHJzH7HMT3YyyurGBUqtGpVbQ6KqrlMteuXWR48DgGswVrVxCrTsP29jZ+\nf5Dr16/jctooFor0dA+SLsTQ6a3I5Rq0eiMTE0e4NX2DVDpFoVjm1KOPs721hdfn4OBgD7VKjiiX\n0dfdh16hIpZNMXXpCoLKgMFgxBv0oDW6WF/fxOdzIUltOrSRq0RqxQ5KnUDY66dezuL1ONne2WV1\ncw1dR8/W6hqjE4MUyxWqlSYKhZ5SqXaft7OSo6e7l0yxSlfPOMGecar5PDKZQGJrj8FjE/QEfBht\nOjQ6GyqFBovZjEolx6hUsjy/iLfLzef+5Bs/PPDhj//z//Xyx148jb+ri0Q8jkYrw2DQ0pEkKtUy\nFrOJUrGATqfEbLbidHQxMNCDSqVFLgi8+OHncbvdqNUaSoUyDqeFdrtOo1VDZzIzdWOaQLCHE8dP\ncm/+Fja7DqvdgNvpo9Vp0T8QZn52nv7efhwuJ31BH8VikfX1NbZ2t5mbn0cuishFOV3hbjLJFHt7\nezg8dqLRAxYX56g3KtgtFgRRzu3bd5k4egKXy8/lK9ex22wYDUZKpTIqhYDH5aJVrzMy2M+lC+dx\n2u2Ua1WK+RyNWhWH3YzJqGX6xg3GxiZRqLXIZRqysSyDA0Oo9Vq6ewJcu3qBZ848zVvnXid6kGV8\n/AhPPnGGcKgLl9NDNpNibWWD1Y1tOp0OOp0GmRw0eg1vv/Um7UoNn8tBOpvEYTTRbDTxdwVot0pU\n621alQyTj4f5V//2izjtA1y/PsvkxDD/7lsz/B/vC3N1Oc2zh92UaynCbg25uBZBLKBVSzzz0r9g\n6iuvsjj1Dr/zp/+Jjc0NXvzEj7F1b5blhXlGDw+xv7JJvV7n8IMPkc5W6aDE6fOjVeuot+q05XLG\njp9kf22FlZ0obleYL/7+Z5mdusED4w/QarWxWNQIZLB2ewj5AxhVJhLpfXpHR0jsrnP1yhVGDx1B\nZzIxMTFBOZ/C5TSjVOjuN9K1GwiyFu1mjWyuSLMlR6U1c3RkmOmpaUbHhqmW6ni8Pjy+Lrz+ATa3\n91jaWMVoMjIxeRK9QYkotjCYTNy8eYVsOoFR5yCTiVIrlyllqoR73FQLSZRqE1VJzkE8S5e/H4/Z\ngL1vlI4okq80KJYErBYjayurHB4eY31zFZXSzX40QXePj7npi1j1sncz1QzZQg6r0YjGaCC+vU8k\nWsLS5cHgc9Iul1Dr1DRackwWL7/3x3/1wwMfBLmAKCiIReM0Wm1q1TqiqEYuE4nHkhxEYuTzRXZ2\n9igUCnSkBjabhUajRk9PD2+99RZLS0totVpazRpWqxWlQoHNbKGYy+Pz+fjmN/+ee7N3qdebiKJ4\nn0RTo8Fh99CRFPT0DbK7f/89pUqRo8cOM/nABIJcRTQapVzJ43Lb2Nvbw+t143BauHHjOr293Xg8\nPlpNuHzxMrl8Fq1Wy+zsHPV6nVOnHsFo0mK26FGpVJx7+yKpdJ5mq8PO7iaBQACb1Um92WJubg69\nTk0+l6XTud+enUplcLk8WO02RLmcarXK6toyGo0Gn8+H1WHDYvKyvrpLpdwgHA4jE0QajRaVSoPu\nUJCB4QFOnX6MaqXEwX6EtbU1XnrpJdbXVjh/7hw2qx271crAwAClYh05OpxuG4lsmnZe5ENPjXJh\nepZKBzqFDgDxRI6DWpPXbq2jF1TIFAKCQoXOAKq2EZlaxZXLUwiATKllb3WNlavXWF9a4NGnHiOV\nLuD2B4inktgdNtxeD6VqjS63C51Jzc7mDjqtCVEBdo+LVrlAOpfmIx/9GI1Gg1e/+y3SiTROlxeF\nwc29W3dZmZ0jmkpgtuu4fO4tOu0Wp0+d4rXXXuHgYJ/V1SUcDgvJRJRUfB+VQkQul6NSqalU6zTb\nHbqCYdxOJ+lEGrvTQTyRotNpodFomLp+nVKpRN9AP06XB5PFilyhxGr3EYmnuHDxLR44/jjHT5xG\no9Vy+NARzAYtXo+JbL6E3Wzk+vUbNBttTCYTVoeVq9M3SKfT+Hv7WLp3j7XZecw2K57uIJIox21x\n4rJpMZl1dCQFmxt7fPc7r2Iy6KlVK/h9bkxmPaJMhi3g5qGH7mcRlWSRQi3D+uYc9UaJcrX43v3x\nn0Om8F/++PMvPzrup5jPMjTYR6l4n5m2UqtSrOTpHxigXKlw6PAoly5dYHBwiHw+T3d4iPPnLmAy\nmSgWizgcdnoHeqhUGigUKubnlgh0dZFMRDn9xCni0R1WVpd59NFHuXr1GicfPMnO7j7ZfIFHHn6K\nu3fusbK8RCqXoVKSUas3SaS2KJfLIClJJZJEIzvM3buKUa+h3WqyH01gsThotWU05TIG+gYwmXTU\nm2USiQTZdInevgGcLhfRaIynn332PobV6clkc1jtVtK5FO1Om3q1wtNnzjBz6zZqtYGZmTt86IXn\n2dnZoNGooDVqaclaDI4McfnSNFcuT3Py5APMzNwiGPYy+cAEBqMepagkmcrwla9+FW8gzNjYJE6X\nmy6vC6/XTWR/n3uzt0DRRKNRIjUk5KomSqWe6Zk7dJDT0zuAUjBgMfp535lJfvu/fJcG0MqvsHXQ\noN2qE61LiMU6E04FBbWSaF6FRV3goCxDfnCHr74yx1NPjzF16R1+9Cc+yon3PYfN7+Xst/4BmyeA\nXNRgtfnIZwv09PQhl8s52N8iWUpjtXjx+ELUqgUGHnkSg6jCaDVjMesQNUosDh2Lc3cJ+4O8/soF\nfvSljyMq5XhsAaKJFBOH+7h47nVcFitDQ4dIxaM4nU4k5Nhdfow2O5VGlWazzcb6Oj093VTKNRw2\nC+lsgpbUQKMSWF9ZZfjwIRqNBsFQiFQ2SSGbIBHdRSUK1EpFNtYWkXdETjz2JJVkhquXznMQWaP/\n6CPUGx2ShTwDAxOsbu0yf2eeY+OjtBs1tjZ2eeTZD7B15wYzUxcIdfkZHB2h1a4T7A6zMDt7v8hq\ntLI0O0Mpn+DRJx/DE/Bz5foMh8cn2NraQd4Bo9WMw+WmVszhshmoFhKotTZm57ZI5VNsb63zrdff\nW+/DP4ug8MUv/OHLD08EKVUqaPU6LCYbUzduMjIyQqvdoNlqYre6QdbB43OjEOVE9mNs76wT6Aqh\n1ZqwWtzIZUoSsTh6rQ6DWoVa2cHmMCMIKpDkHBzEGBgYIxpNUim1kCQwGHWks2kSiSix+A7hoBuj\nzUut3OIgesDw8DCjh8bo7x8iEPRiMbtwO21UanVCvQOsLC+QSEQo5TME3C62dtYIBXoQZGrsNhsz\nt29QKWWplIoIMgGNSkk+lyIYDjA9fZ2DWILd3Sj725t05DAxeZRiuYAk1UgkUpTyeQI+H5sb62gM\nZpKJOEoZ7O5sEo3H0evvw5K7t5a5dOMaVnsAp8vPxatv8amXPkWr1SG6v8X+foS+oVE2t/Yw6g10\nJImxkaOoVFquT10mnShidrmwuf24fW5m5+bxBsMszN/FbNIg6mysrWxyfbPEB148zcydDWwKOT6V\nlqBZRqkAjUYVo6GOXGHCLKXZWK5w4vggH3j+cVLJLHvJHAaVhtHJE9y7c5fdvShuvwen3Ui9UqLV\nqRHqGeCNV8/x0JkztOtNavU2a7O32Nzf5nBfNzKUBENBZhcX+eAHX8RgNvCdb3yDYw88iFiXs72/\niiQDv6+HYHcf+WKFciGHP+AiFotRKN4nqSkVU7htBuQSXL5yla2tLUYPjRFPRCkXK3g8PehtJorV\nFmurq1RqNUbGJkhFDmhJClRqHaVKG6PFjkZrYXV7B4vNQbHRoGd0jJHJh2jnEwhSG6Wgpd5pI0gt\nJk+coNwosLW9hVZtpJiN02qVOX74MHvROFqlDFdXFwsLSzicFuw6kb2deTRaDSqtmZtTd/C6g2TS\nMTR6HTaHA0kAa7gHsaFg7u4M9baA0++jlMswfuQISrkSo8nFn/3ta/974INMJvtLmUyWkMlk8//I\nZpXJZG/JZLK1d8+Wd+0ymUz2JZlMti6TyWZlMtnEewkKgihndGScvt5hBLma1Y1dnvvA82QLZdot\nGT09PbQ7NZRKNZVig52dPZKpKBPj90Vpa9UGnQ7k80WMZhuioCKezGAwmGm3O4iiiCiKjI0dQ0ab\n7u5ujk5OYLZYKRbLdIfCGPRqjDo91WqdcqbE/u4iPSEfXncX29u7rG+ssrS4TTZbZ3D4JC5XHwIa\njoxNsrMbZXM3wtbOLt3dfdycnkIQBAYGhhjs7yUajd0XCTk2TiGfxazTsTx3j/GJY/i8ATweDyqN\njlOPPsr3v/UqWpWWZkNOo97C4XRzcJC4r8C9tonD5iabL5Er5lEoIZNIcuPKFBqVyNGxIerlBNub\n84yNHkKtUGM2qXjiidOEAj6uX53CYDDh9Poolaogqgj19vPI46c5PDnBzPQt9nfXoF3nwWNHWZ6b\noctv5/zF87zw9GGqhRJGhYzpV96h1AG7AnK5IjlJj16vw6tuIaq0CJUmwYHTaDoSkrxONlPBPzzC\nkYkhFGoJq07FM8+e5vHHJ6FTZOneDO1GkXoxx7VLF2lUc2wvLBA/iFIt5xjoDtDv97GwOM/8/CwX\nzr/NB9//PvLlClv7CX7iZz7OO69+jz/90h8R8AQwaTW8/uZZZm7d4SCWoq3VsLy+RiaTQS5vkC0W\nUQgiO9sHVEptTj/+JD/+yU8itdqIgkAmEWPuzk1WFufQKgXqjQ7HHjjJ5tIyDUnAoDfRaYs43A6K\n1RptQeKDH3o/VCtkItvkE9vsrc0R2d9iK7JBRyijpkytnCGTSFItV+jp6aVareIK99EUdZRVOtK1\nJsVqhVqhQNjtwW2zMT1zF2fgEF5PF5noBqODLjTqMkcPDaFpt2ikU8T2drjwD98imt0kX05S1pMU\nvAAAIABJREFUk+WQiU1qlQTf/Pp/w2U1Mjwafi+u+N6CAvAV4Jn/yfZrwDlJkvqAc+9eAzwL9L17\nvAR8+b18RLvVolgscunSJVxuO33dHnY2l1AKLY5OHGZnY4t2p8ns7CzBcAiTwYlareXW7dvY7E5M\nFjMmi57uvi6KpQx3792gI9Wp15pIiKxtLPOnX/4TDqJRhg8PsrE6x9zcNEqFiEGnp1quIchV9A2M\noFbbSWU3KJVSFPIJzr31JiadlmI+RyBkp2/AzdrGCoGwn/7BHmbu3OaJp57k4UcewRcOcuP6PO0O\n3Lpzh29/91UsDjfZQh6bw0U0lSPYO0gsU6KnfwhBDjqdQKWSwed3cvv2Aj/+iX+FXDAiteQcGp9A\nqdeTr1QxO9ycfvppJh58kGg8gVIQOT45zqWL10in8nh9TgYHRjh8ZISZG9eROm3efPN1RKUZpVaL\nweREp4XVlRkuXzzH0fFjlMtZ6uUiA4PDiGoFYyM9SOUyX/nynyM0yhSTSWSdCpNHH+PunZucPBLA\nKIdej5mqTElNUCHvQHQzzm4+RUtdI3JQQ64v8rk//TZHh2QMPjaJ2hdmf3cHtaggncwxP7+M0eyk\nWKxy7849NEYtFocdpVbHQH+Yob5+1Ko2u/uLRPbWWV5aAbmMYNjP/NI8Gr2WnZ09TAY9aqFDfP+A\nnoEhPvrTn2Bjd4NXvvc6zz77HPlsikatSLaURy7X0mqrGBgaIH6wRbOtRNQbyBWiSLTZmVuh1CrT\nkjUZOjyI1apFo5BjtRjRakSmp29QLNUI+N3kKyW6e8PkM1n0GitKpYbltU3aCjUOu4fkbpzf/dXf\nxKL3USrC7N0ltAYTagWYzEr8AT9KlYDVpGJ7bh6P2UYhFsFrUKC3WllZ26BUrYFcxGi3k03vka8V\naAoKNHon9+6tY+oZwR4eROfq4uhDZ+gfGEJeK9E79CBCy4ISLU6/n2MPj3Px+lvsLK+8F1cE3kNQ\nkCTpEpD5n8wfAr767vyrwPP/yP5X0v0xBZhlMpnnn3qHIAjk0gmOHzvCzsYqC/PzjAwPY7fZOHv2\nDar1BnTaeH1OSqUiSrUCrU6BTtdBLm8gCh0KhRTLS7N43G4UCgXNZpPrN2+QyaTotBt86mc+yeFD\nI9BooFCrOHnyJDs7W8TjUQRRQqvV0pFkeH0uwuFuxicmqDWaHD8+iSRJdDqQS5e4cf0myXiSN86+\nztLCIlq1inNv/YBSMYtKlGMyq1FrZLg9FkJhD9s7exj0Jvb3DziIxDh37hzVWoV2u02pWCGbKRIM\ndXNs8gHarQa/+/Kv8/ff+BoNWYPXXv0ehWIKQZSIxiIYdFouvf0mrVqZoaFR+nuPkMnHOfXkCbb2\nNpEJCmIHefoGj6BWq7Hb7aysLpHOV9BotWysL5NJJ+nye9jc3EQhqlnf2qRUKKHsqJFQsrq1zNFj\nh/nSF/8IkQrJWB5Bq0apgQ+++DD5uoRSdb/6Xa7XqNKhXGxQKEikck1sTgcWUUOXVcI9NIJKMOFx\nuujt6aNSB683RFfAzfSt67gcdp57+jk8/jDNjgByBXs7W6jUAh5/kEA4zOGxoxRKRYxGA/V6lcH+\nfgxaPclkivm5FSTUpNMZOm1YWV0jFArx8KkTpBIpxiaOE+weQi6Bw+Vk/NhRdvf26e8dQC6X0x0I\n4feFiGdLOAb7ULRFdCoD5UoLg9nFvbkFUuksapWK/v5+StUKMlFONZ9k9tYlVAo1CnUFKiXUrQZe\nq5XZ5RVChw/x5W98nVgpRjjsJtjlZXVlntW1JFPXZrkzvcz8vS2KlRY6nQ5XV5C1rW0UCjU6hYrh\n4VGSqQwHsSTNFsgkHSqlHae3l9XVZdwuK1dffR1Ro6JQLiCvNtk7qCJTOqlXWqh0KqqSQKGuQxLd\njI4/Tmjw0P++oPD/MVySJEXfnccA17tzH7D3j9btv2v7fw2ZTPaSTCa7JZPJbuWLVaCD2WzEbDbj\n93UzN79MOpPjscdOIwpKmk2JRqNKs1Wm3qgw2D/IyuIO9+4uUq/B62ffxmnzUquUERUa6i2JkdEj\nOO0ugsEgKrWMO7fu3N86tPvQ6m2UK3nKhSy5WJqFuTtkU/dZc4b7DqHTGbDb7czNzTI1dY1YLMZT\nZ57hwQcf5vQTjxAOB1leXkar1+Hr8tNqtUhFM5hterKpOjq1mpmZ65x58hl6evoYHBhBa9AzNDhA\nyO/jb//277DZHNy8Nc3de3MU8xVcLgeVygHdYRcrS8ucfvQRqvkCerWKrq4+ovtZJg4/yO1bd+i0\nG0Rjuzz7gSc4NDaA1aFn6to19vf36evrI55I0tMbosvtZ29zm0a9yvjYJGNHjiGTCZTyObq7u3G7\n3VSrdQSFmnpHzk/+1Meweuy878UfoVDrcP7cVZJbEY4cOs6DZ17A223EZDIxOeBgYCjExKQP48le\njGEPxuA4ksNCWtvG4oATzz1HX2+Y/dQO3mAX5VwJpVqHymjCpFeTzWZRKTWsr+2yvLKBw+0kkytw\nZPwozUYDo95APJWlWimj1+spVdpMTk6i02lxOvRsbi9Sb1R48plTlIs5RGTs7+xTq5TZi+zyzb/9\nJqG+IawGM81ylZWlZVY3tyjk8tAss7q8wNLCDuGBfqSaRK1VoVWuohVV7O5FeerpM0idDnabA71G\nz9j4OOVGi4nHH0bvMNJEZHdjh3iqzPzKLqtbBzz44Bh3r7/BV//4ZcSmgMMRQC4Y6R89gSvgY/Kh\nSfoHwpQrebp7Ajh9HpZXV3j8uQ+wuhVleXGF1ZUNDo1PolAo8XhdJBLrXDj/JsVCipvXL/CFz38W\nu93MjWvX0RiM/PXf/zV+vxdRZUarl6FSdNhenifQFWaodxCTwUitlH/Pzi2TJOmfXiSThYBXJUka\nffc6J0mS+R/dz0qSZJHJZK8Cn5Mk6cq79nPAr0qSdOt/9fzR/oD09c//a6LvysI1WyWWl1dp1Fu4\nXSGkjoy+viDQYX1zA4/Tz8WLP+DYsYfIZdNINFEKIjabDZ3WxPLKHNt7u5x+4gwrS+vML83j8XuY\nOPQA2xvzVJsdlCotmXSCRr2ARqvCbPRg1BkJBF00WjIWFu/RqJXZ2tqikC/j8Xjw+MLIBQGf18nq\n8gLDQwNodUYuX77MyMgI5XKJSqtIsGuYK1cuUSkXQRIZHz+KUimi1Khx2t0YDAb2Izvo1QpSyQQO\npwtJJrCxtc3+/jaVcont9TV+7Mc+ypvnzzN59AGOHj3K1M3zLCwsInSU/Ptf/nW2t7eptdosLS0h\ninLWlucYHRom3NfH/Pw8E5PHiEYyqFQ64vEIp0+dYHNzk729CKcefxSVSsPy8iI2k4FKpUJTLtDl\n62V5eRmfx4wCOY1WkZvTt9nZTTB0KMB3v3eLs1d2yAsK1O0mOiREAVpqgS6Dh8//+V/wyn/9Av7e\nHtxOCZO+Tl/fEJVakfm5NXr6hnA7nKhFGTu76xSqRXzBADdv3kKrMfHs08+QzWapN2u0Wi36hkZJ\n7m1TrtVIxRMUKyUCXWFy2TI+v5N2p4FMpiEevb9DE+ruxxUa4sa51ylksvQdOcKdW9PM35vn4VMf\nQOtQc7g3RD4Tpd6oUCk22N/Z59jYJJFKDqvJTCqVwWiwsrezi0yS4bTbmLo1zcQDJ3B6uqlLTVzh\nXi699l1Onnk/N7/zVWotCbPDgys8TDAYRJLa/Ncv/gGHjxxFrTPj9tiRBCWx2D59Pd3s7OwyNHiI\nuXs3GTv1GNlkik6lgk5roN6oIpNBu5anlE9Bp4SgdCM3mKnlDnA67UhtNffmljAb9bTaGUw6GVZb\niLrYJrKbYmt9lzMfeIrIwR5GnR6F0ohj6IMzkiQd+6f8/f9vphD/f2DBu+fEu/YI0PWP1vnftf0v\nR7lcZmlxnWCgl9m7KxQzDcwGJxqliWQyxsbOKnOzC2xt7tHlDVEs5ent6WdtfYVcrkCz2SDcG0Kp\nURNLxjAadBwZGSWbSSEqBZ568klsFhPRaASn14fd5kSn0zJ+ZIxgMIzT5SES2aPeKLO/H+HgIIbX\n5cVp76Kvr4e+/m5m7ywzNXWNRr2KTm8hmkyzvR9BrlTx8U/+DF3BMIIoIxMrcf7cmxj0KgKBLpaX\nV7l+/TL/g7r3CpLsvO48f5k3vfemMrOyvDddVe0bjW50NzxBghyKFEUnipKG2tWGdjTSxO7saoSZ\nkKU0FGUpzlIiCa5EiQYkAAKEaXQ32tuqLu9NVmZWeu/t3YdWTMw+7A5jYx/I7+nGjft6zj3/c77z\n+ycSMUr5HDqdlv39fZQKDasrm8gVSra3t5C16zitRsxGG/v7cUYnZvje91/h2WeeJpNNcfPmbVoN\nLX5PHx/+8AtksnEuX3mHTCqGXqPm8sX3+IVPfoqnnvsACpmcz/3qr9CuNQgFN9nbWeWDH3iWQChB\noy3H3zNIOJIiX6zS2TVAIBilJUqpFGqUC0kMOoFarcZuaJ9gqIjBZEIiVFhZ3KBn2MHRI71o5XUk\niJSkoFLJsBtVPPGhMf7gdz5FR4eGYnUbh9uASmXhIJam0dLx9LP/isPHj7MdP0CqV9NSyDn9xFni\n4QBHJ4cZ7fGQjKRQCCpWF5fYXt9gZ22NveA+vo4O7HYbk2MTaFVa+gb6uDf3AE9nLxqDlGs379Hd\nM0GjDuGtRfoGB1AaTdi1ArJ6mRcvfBCHvs2Iz0ImEeQgmkQi1WO02xk9PIPZocNj1JPPtvA4e7Fa\nrczMzKBSCzxcfIjH++hnlUjtEQ8HKIXWUCMSX53D5BvF2zfJ6OgUifAm9UqEWj3Or/3GFxCFGoPj\n/eyvLqNXaRg/dILVnQjJVJHdjRXGjh4lsrRBaHkNk9VMPHJAsVikUChSbgh0dI6jM7jIldoENvap\nVRusLy9RqVfw97sQNBJGx0dJpirsBsJUS02ajTaB3W3i4X1cVidWo412JvkTB/f/16TwGvDZf3n+\nLPDqf/P+M/8yhTgO5P4bmfH/eOQyGVqtBhDxeF1I5QqSqQzbu1sIUuj1dyKXy0ml0sjlCtRKFdFo\nFJfrkcHF4OAgly5dYWVlhc7OTow6I6+98io6nY6RkREqlQrDAwPEYlGKhSoAEhE219ZRKJTs7e2j\n1SlIJA+o12tIpbC1vUoiGeHa1dtkUlm6/S5ktHj1B6+SSaUxG01cuXKJYjFPLBYhkUgwNT5Gb4+f\nwb4+vN5OPB4PXd0daLQqFhYfMjQwgCCVksvlaDQaBEJhqo06jz/xOMVqjp2dJfYD6zx1/jHCkR2a\nzQYarYJ8NkG9XkeulGK0Wnj7nUtkMiV8Xb2oVVqmpqYo5LO4PT5KlRp/+ZX/Qmh3D7PVhkQC0ViQ\n+YUH6I0m3B4vMoWSbL7Enfv3cff1ceLxJ0CmxmSysB8Ko9XpcLnduNxeWlR4/8ptfJ1OuvvsdHps\n3Ly3RbYGSqkCtQgeowmDYGFxcZuDeJX/89s/ZGr8EIurm6zvhChUcoRiITY2QoRCKY4cmqFUTCGR\niHzr5ZcZHh7FbLUjVUrJ5nPU6w1OnTpFb28PiWQUi8lAPp0hnUqRzKUx2a3U6i26urooFnMsPFjm\n9OMnWV6b496Du2QzFTR6Hf19PchUKmZOHWdvZ5Gurk6iqQIHkSgqeZu2WCOXSJBIpvjmP/wjpWIN\nv+8Run9/b59w+IByuYzP46bdbBHa36deLjM2OUC2mEQQhH8xGLLQ2zeAwWZn+tAEyUiU9fl53n7j\nTXr9/dQbbVQaJZVyjpWHd5keH8Zs1CIoFbz7yg/wTPYimNXIyg2sZh2ZdIJAYId2s8n2xiZW9xhq\npYYT545htugxdzjZC2xDvcxwXw/3HiwwNHGE8UNH/gU/oOff/G+/jdgWSKerKJRW8qX4/0sE/t/P\nf1c+SCSSbwNnARsQA34X+CHwHaATCAAfE0UxLZFIJMBf8WhaUQY+99+TDgDjAz7xY6edXLhwAbVa\nzbWrtzl16hSpVIpM+hFBKZ4I0un38cUvfpGTx08wPDxI9CCE09VFuVrD53WxtraGViaQL5UZHZuk\n3Ggyd/8BR44c4eqNi/h9XTx94RkKpQivvPo9nLZR7j+YZebIKFq5ElpNFpfmSWUKOJwuJCJYzQak\nUilI5bz2+qv09vkYHTvEvbuz6PVGTp48jkwmQxDkqHUqBLHJ9Rt3eOL8M9gsJtbXFpmfn2d2dpZj\nx45hc9gxmUz0j4xx6/pVdGo9KsWjRS5BriAaSXLpvR/zS5/9RaLpKKeOHeYrX/kKTucAb75zmYFB\nL9OHxtnd3cXjdTMycpRgMMjlK+/gdLo5e/Ys3d3dvPv2j9BotJRKJT71C58lHA4TjYdQKBSAhGg0\njrfDx5HpGSQqGdu7O8RiCWxWJzqdjq2tTex2F+VyEUGocuPyRSbGD/PW+5e4ezdJLJ7HbZHSrjXo\ndmqRax0IaglOsxF3q024mkfUKPj5z3+adC7D3Vt3+I3/4de5du0acqWCkx99gWuv/xiDSoevZ4iD\nvT3q1RpKjZx0MkGHx0mzXsVsNpLJlalWmrTbkMlGGRsbZ393l76hHqLhA/Q6G1/+86/ysU99iqmZ\nCdKJIImDAE6nB1Gq5u7te1x4/CjpUhm52sT+5hLebh+tloheqUbncnDvzm2GB7v5+//yDcqZHEdP\nP8Pg4AQCIvFUhHazgclppVas4PT7MRhMbC09WtTaCe3jdXow253INSaaLRGZVo1GLZKK7ZPPVfBr\nHGTaDbQGNWKrTqFQxOHvZ2X2KtcvvopKq+L048+RSDSwDvhZmn3I1OQhurw+4pkI+VSJiUPHCAdW\nKZcy5HI5+seOIhca/Obnf5F//du/y+CgD43SQTGXpNkusb65g8Hgort/iIOdB/Se/fX/f+SDKIqf\nEEXRLYqiXBRFryiKfyeKYkoUxfOiKPaLonhBFMX0v3wriqL4P4qi2CuK4vhPkhAA2mKb6akZlhaX\nSSQSmAx6GrUquVyGarVGOByh2WySSqX4D//77zA+PsGDB3OUKjU0GgOlYoVyuYxcKoAgxeV2UygU\nWF1e4+jR41TrNaanjtDR0cHq8grbW/uceewJcrkcTz1zAbPVQDQeYWl1DYlEglymQYKIu8OFyWoi\nk8+h1WnoHxig2RKplOt0+fsQUbC9tUk6maKQyyOVStkN7NPX10ckHCQcDiORCEjaEgZ6+6iVK8Rj\nSZQKNVevXMNiNbGxsU5PTw9OtxOJREJfXz9KpRKny0GhVMLm8nDsxGlyhSxmixlBEJicGKNaKXDm\niSdYWVnja1/7RyqVBs899wxHjhwhk0xRKhdwdzhw2G3MzT0glUlisVtwuuxkcym6/Z1EImFS2RTb\nm+tk0kmcdgcmg5FGrU6H2023vxOnw05HhxeJICccDvPcmVMIQpIuSx2LoMCkE6iUG0jkCpzdXZg6\n3dRNBloqCaceP027LqWUajExfoj79+/TbLcoVWpkN3exWHSEE7vIqVIo5UAK3g4PvX3dZPM5/N19\nFPMljCYLap2etgg9/kdUqa7uTnZ3t1GpFLQEkXPPPsHc3eu89fr3UShl+Lq6WVtbYz+4jdtnIxCN\nUs6VePf1N7A5nZSrFQS5lODeLg/n5hgeGSWVKeD2d2FyOnHabTy4fY3o/ha1UpFqtUq1VOEgHiMS\nD5PKJImlkhTKFSZHJljf3GRnc4t6o4JaLeHd177L4oM5/L4+HE43hWaOSr1GplTh/cvXaJTr5HMZ\nhgbHaSMnH88zNDBC/9gkw90DDHX1MnJkgrdee4Uubx+dri5m79zh4ew9HDYrMqmchiii0mo4c+E0\n3Z0+2q0G4cAa1WqWarlEvV6n3W6RTCcwWOw/SSgCPyU3Gv/iS1986Zc/+QEisTgymQylXMXm5gZj\nY0PkCzEePrhNKlmgXCxjt7u5cuU6p06doqenh83tTXLZBAa1Do1ah9Vup1wuUa/XsFpt1BtNNGoF\nkpaMbD4DMh16tYlyoUrkYBepVIZUokSv1TIxPs7wwDi5Uob+/lGmpg/xja+/TC6bJ5VJc+H8ecZG\nJ7h16zZanRa5IDA43E2xWsft6yESTSARldjseg7CO3T5u9kPHuDzW2k2q8w9WEGj0VJvVtFptGyt\nB/j0pz7Ltevvo9KqsZjVRCIxrt24h1zZoFGrMzl1DKfbQbmcIREK4nE7+cbXXuGXfvkzVKsV1teW\ncDn1lPIFisUol967jCipcfLMOU6ffxaz2crVK9fx+XtQaY1o1AZi4RjeTh/xeBKFQsPswhxT01Ps\n763RblapVQtAi6qkRjCww/KDO4gySOXyWI0ODsIJ1kINUmmR3l4HkyOdFDNJbJ3j/MIXfocfvXmJ\nX/zlD9BstlGp5Jg0KvLFKrdn7/HsBz7M2OQUa8urpFIZnnr2g+xvHnDrxh18LjdavZ5mU4LL7SOV\nzJKMp8hnIngcBtTKFmqZiFIpw+npYm8jRK0M5WqOsbERurweQqEDquUKBrWN3e0AZ84/RbHUwGXt\nwd7Xx+hwP//5P/0xH3nxY1y5dgOnuxO1QkoksIe7twePc4Ch4SO4vS7cLivlSoVkPsPcg1nqjRbD\no324rCYEUUCvNZAvlQlmizx+5gypVJJStYkglXJ4Zhp/TxeNap1kNEIseIDZ24UgtnF1eWgJAq2S\nhGazxGPnzjBz/CTFUpl0LsL6wgI6rZZkNItvYJxiIUil2mBodIRCuUax0sbf3UkyliK8G2Py2Anq\nxSKbu1uYrD7kcjWB4AGJnUVcZh3LS7P09I7yR3/9k/EUfiqSwn/+4z946cljg2hUanLZPKGtDfx+\nL9HoAYV8hg889zyVehlRbFJvVNDpNSRTCYJ7Ibp7/NhsNtLJFKl4EqVaiVRUoVSpqTcruN12bt26\njUSErs5ecqVHNu0KmZT19Q2arSIrS/fQGJWolQaKuRodbiuVchG5XEE4kUAqkxLc28NqtBAOBdFo\nVQDoDCpqlQYedwc+r49SsYLZrKXRKJNJF5HLVHi9borlMvF4hlK5isfbwaHJSZQKJdVKGafbxfT0\nFG1EfF4fV99/j0w6isVqpt5sUiwUkUqkzM8uIpG0UClMLC2H8HVamJic5PqVy9SbdZwOJxarjlI+\nh8Xp4CAYx251kskkKBQzVGo1otEIU9MTZHN5qpUyNouNVDbL0HAfa0tLXHj2KeKRCJ3+HmKxGOmD\nA8qFBBKJknw1R6PRxKI3s7a5wdp+mVy1ikxSRyYT6fUbaEvaLN6/S7uQ5MjhGYq5LDKdFrHWoIXI\n+SefoJjLEdjdxW7rYG15gVQsxMjEAAcHQQLBMGazHp1BQ61eQm/UkIweoNTr0Wm0BDZ36B0fw2Sw\nodTpeO+dtxgbH0StU5MvVzAbLHg7OugfHKCYT2PUKJEoJFgtNiqVHPlMgrm7t7F1uBicOIwgBZVW\nQGs04/F6SIUTeHydVIpFJIKAFBGzxUxfXx92h51kOoEgClQaZdQyBZlSAblSylj/ILlMgkY2Te9o\nP7Vsiu3lh1gcLiq1Ms1qHZlcSSqRxO60oFIoiUeT9I6NopBLyKZSiKIE5ApcvmFie2EGJsfwDHSy\nMz+H2eNAbNXYC+4yOTqEVJCh19spFFKoNEoiiTAeq418tYrZ60AqVdLr8XD5ve+TzWXp6hqkLTHw\nF1//7s9OUvjKX/3ZS0eGOqhWa7hcbsqVDMeOnaBYLGEyWtkL7tNsgc3mIB5NIQhyRkcmUMillKtl\n1Eo5giAgSCWo1Aby+dx/dd/d39+n0ahjc1iJHARpS0Qa9TL1WgWL2UJPTx86tR6pREUuV8Rmt6Ez\naBB51BDs6+3h0OQEvX3dvPbaWxQrCWamx9la28NmMVPIJdBrVOxsbEC9zLe//U1OHjtFW5RTq7fQ\nm8zkMxWkEinFUh6Xy0E6nWZldR6dTo/b7aVSqdJotJifW6DVAKXiEcQ1EY9jNpiYu7/A6sYKOoOM\nSCyCxaTh85//NLdv3KOjw02rLWV1dYN2u8lzz38Qt6MDo1aDXCYQO4iQzWYYGRpmauoQKyuLdHV3\nkk3nee+999jb3cNqszIzPc3m5jYquZp8rkhgP8CZC+e5dfsm9UaDWrMKLQGtXk+pXuPHyxF0TRGj\n0UYgmIVGkQGnneDCbSwaAdtIN6VqFYtKT75VRCmX0T86Sb5cQZQKdHf7SURiICoolFqo1Fpe+MQv\nUCvkaTVFNGoDCrmadlNEa7Cwvb3LyccfZ25+E53ZgSBVUGs16O3rZ283xuTUDJVKFY1Wydb2JtHE\nATKVFqlMQaOVo5KJU2mU6R0cxul2srGyRH//BPVGiY31debm5mg1BTw+L4l4nEQ8QrlUBiR8+U++\njL+rm5GhYcRmnQ6XD7Vag7urE5VcJHYQZ35xkbOf/nkkuTJWewee7hEePrhNh8vN/MI8Tp+T+M4e\nzUYFmaRFOp0iHgrT0zeGTm8mm6ugVhlYfHgbo81KKp7k4mtvMjY6hFxpwCRocLm87O7vYXe7ENtV\nkvEo5ZrI9MknkEhrhCMxth4uUS9EmFu4zszhE1y+fpOjjz3Gm2+9xo+vrf7srE7n83kEiYDX40Yq\naaBQaVhbXyEWj5AvFZFI5Rj0eqSSR/bcrUaN7/zTt0ilo7gdToL7UQw6OwODk0QjQRrNIqlUmFIu\ng89jQyFrEYvsodNqcFhcvP/+FewOM9FEiOB+iHZLgk6lRi5IWd9cQyZoKZUqIBHxOFzUS1UUEgGd\nXqBWhVdfeQ+rXYfXb6RUlPLa63e4t7jGwOQ0L7z4cRBkJBLrLC1eprujA41WxuEjU9TqBd588212\ndw6YnjpOKBQiHo9w9epl5udvUKmkiCeCeH1OdAYlPm8PbpeXZrvA+NgQY+PTZLNVmq0KV65cJVfK\ncuv2PBcv3kYQ1Pyv/8tL6LRGcvkSUpmSZrvB5uYmWrUOk0HP1voKRo2OfDrN/PIsVrsRj9fGkZlD\n7OwHsBvdqMwOvIND9PX0Ew8kyURr+Dp78XoGsTvdBMMrxDIi1maLkQEzemmSj56xYjC4SNuUAAAg\nAElEQVSa+cBvvYzMN8bMhz/Kj964RClXodwuIKUNUpHw7h6hYBC9Xkc6l+aJc6dpt6o0GjlOnjpF\nIVnAYjOzF9ggEFhna2uZSrVAs1Gj099Ls63g0KEpotEo87Oz7O3scufOPebmFpibWyFTqtFAjlar\nZWL4EP29I+SiOZw2Pw+WFmkJAtVqmWzsgImhHm5ffhONVs6R6UOoZSBIyo+ANqU8GqUSjUqNTqPn\n6MkjmCwGNDoN0WiUhYf3iSVClJNZYvEMhXKbycOnKOfrCCojZbFNJB1hfPIECqWZU09cIFurk4gH\nGR7sYXlnk429XSwdfnbX50Bs0Ww2CcejuM0G6s0cUlkDf18X9v4hoqEAsUSKVKGBQtZiff46tJq0\n60CjRi2XJBUpEtyKYXH7mTn1DINjjxPJqvm3//7P6BmY4ROf/PhPHI8/FZXC3331b14a7rACbR7c\nu088GqFaq+DrdOF02JBJ5dQqIvfu3CO4v8+FC+dwuuzs7QVoNlocPXqEu/dv8GDuDiND44RCQRKJ\nJBq1DrlEjtlmoVjIEwzs4e5wMzY6xvzDWcYmRqhV69SqFdwuD+sbWyRTCQ6CAXa3t8lmUijkaoxG\nE9ubW/g73dTKUmQKI3qzDEGmZntrE7lMoF2vUM7n6OxyUyq0kcoEfP5OHszO4uv00983SC6XJpFI\nYTCYaTRLGE1aLGYzu4Ederp8KAQ5D2eXQCowfeQYu7vbuH12otEokraETDpCPJzhuScvcOvGfeKx\nEFLBTCiYwOW1ceL4YdqNBsVKCa3ahEqpwGoy4/X5iUaj2O12IpEY62sbPPf8CyTTSRr1Ot//wWt8\n9le+wL27N4mHAqwszKE0aFAo4Wt///ccmhrh3p0bmHXg947zd9+6SK0i0O9s4rfq8Vo0ZIs5GpUq\nQuwu6laB42fPodHoqDUbPLh3i3feu/oIi6/VYNCo2U9EiEZj7OyGsNnN3L17F61OT6tZwmTUcfX9\nWxw5cgKJoObb//yPVBtNjFYr4cAKeplAh8dJPLTPoclJBDkcPXOW1eUNeru7KeZz7AW2ePX73+b0\n2eNs7+xx6uh5BEGFUS6nTYPtgwOsej3pRotiUWRwcAhBAvlCEZ1WDyL4fV6uX7vGvTs32NhY5tD0\nBGqpmkQySbFRo7NvEKVOSzqdJlmIc/2VV5k4fo4bd66jbFb57qvf5eSF86wsbtLVM8Ze7AC1QoXX\n7ePUuScpVhs4HW6SuSwup5NYbJ9aQ041ncVotTA6eZRWTYNv6hCzV99CJoN6XcTjG+AgmUKncyK2\nWiTSKZQGPZ5OF8P9Lt54821y8Ry93Ra21pbYXl9BaEv462/9+GdHPvzx7/3uSx9//jBI2sw+uM8v\nfuZTqFRqdFojrUaLZq1MOpNgaWmBz3zm0+zs7KBQyJEJIoJUQq1axmgw0dvTQ7PRRiqVUSmXyRcT\nbO9u0NfVj9iCkZFBdve2SKViuN1O3njtVTo8PmqNNlK5QG+fh/GRQfb2A8wcmqKru4tGS4IgKFGq\ntAyPjoLQotEsEtiLsLMdwtvpAUmBeDTCTjCA3qDBarMjlahQqqQ4HSa2d/ax2a0kojEiBxFikQMk\nkjbHj51gLxDAYrayu72LTqenVi9hMBrY3tpjYmiYcPCAequJw+UkFSmwsbVPT38XsWwQg1nPwvwm\nTreec+eOE42GqFVr1BoFSqUqiUQMd4cHQYBcLs9eYIepqUkGB/vZ3wtjNlqYGD/EyGA/NOpImw3i\nyTTdg6NYLY/8Ep7/4ONcunSF7u4epPJHBOK/+e5NmlIpPoOez33u46wHI/iGTvKZP/xbaqKAR2cl\n0iyhEODiu28hCBJGxib40Bd+k0g4ysb6Om6THqvWyKGZaZCItNot/B4v6WyWrb0A588/xfb6GqVS\nmVOnHmPm2GGi4V3URh07u7ssPFgCpZIOXwe1WpGNpXkezt2hVm2g02lx9w7i6Rmn1pJyd/YSO4El\nKqUC2UyFpqyJw2GmWSpTSFXo7OuhmM/i7xlAq9VRKZdJpjKAlHw2zzMvPs0TF84T2AuzsrHNyOQ4\nof09Kk3I5dI0C0Usai0XnnmaaquBUaUhny1STAcoZdJMDE1Spoa8DV5/D81Wk4PVbdQ6G3duXKF3\nYIztnQCSZpOt3V08Hh+hcIxcOk5gZ4l8OMrh41Mgl1Eu5tAp1MhoUCqnGDp2GIvBAtUcYqNJIllg\nZHiQgWE/6UyCfLHI2KEp7J5Ofv/PfjLy0k9FUvjSF3/vJZeyzq1bt3nizGMszt2m2aiTSKbx+fwY\nTWYy2Rxd3V5SyeijCyuzc5h1FsrlGvVGi0AwgN6go94qEgrF6O8fRa0y4fV0c+3Gu3g8ZuQyOa//\n4MfMzJylWGwS3g8RDK3T2++lUmqxtxsguBvEYtYwvzjP4uJDLjxxnkg4iFoukMmm0Wkf7To0KlXc\nLhd3782iVeuQSbSoTRrWHu7RbudJpw6oVauceuxJ9jbXuH3zKr5OM9du3KUplSJtqlhaXoSWFK1e\nz7kzT7G1vcxuIMj04SPMHB7j4fwCCqWCZrVGf+8Ab7x+lVK1zPMvXmBlfZN0poDH10GH24LHY8Vi\nsSOVylGrtYSjAeqNKq0G7O6F6erxc/HS28RicZQqNUuLKxQKJWqVGr5OL8FgCLXWQKlSwN/lw+30\n8errb3PhuY8RCuzSFnUIyjYdDhtf/94dbDqBQ1YpM4NlvvTKOqePTvNXv/pZfHqBd669TzybQqNW\nMTI8hKfDg0RUIuZyVPMZrFYbrs4eVFYLCoWK1fk59DI5Oo2CTDpDqVhDrtcwOj2BXq1jaf4OhVIa\nnVpDq6LE5+tGVMo4feZxLBYnm/sRhnpHOP/hj6BTq1GqNNg8PVitZgrZLcZGjzA8eQqns4N8Mo7Z\nYCMaToBSyz9962scnT6CtNEikUgjEWVEwyFmjk+wubGE22UjmSzgsLvZ39tHKshwO700Wk0c7i7y\npTZ3b76FTKlFpbXSEOqsLDykt8uPyeGkqVDgmzlCNVaiXIZccJFitsnYybNcvfoGp06cIB47wO9z\noZC0iCazzEyOY7RoeeP7r/ORn/s0HUPdxHbC5CoC5USI2UtvkRVFDJYONIKMfCKEoDZhMrpoSGXs\nBPeRtAVWF3e48InPcOv6XVw2K3/0lz8Z4v2noqcgk8kYn+xhYrybWCzA+MQUp0+fRmw3WV1b4NqN\nS+xurbG9vkYxl6NWKnDy2BEqtSwSocqZs8fJ5vPUm23qTTlGi5NAOEymkECubPOJT3wOicSM3e5H\nqZVwEFthaeUWx09MMTw4RLvRpt1uk89nCYX3UMiUTE1MopTJ+Z9+/d9z+dK7vPra96iU82i1aoKB\nTaZmxhCEBuvLFdbW0mjMOvL5Ij2DHqYmD7G1uk+1XOdHr32Xxx47x7/5d/+Brt5hHDYTCim0Wm3i\nsQQSKURDEdLZMA/nljGbrQjSNm/9+G1cHhsLD+/TbtVw2W1kinnawPL8Q8rZJvFomcX5GLfv7HHn\n3h61ukhnVzedvm4GB8Zw2H2YjDbOnTuLSqXg/NknkUkVpJMZhob7gCqhg23+5E/+mFqtQTgcZmbm\nCG6Xi1u3b/Or//bfEdjdwWKzo1Er0GgVBA6SKCQtOu1ynAYtW0shQpEaqmKMp547QbtZIl0qcWRm\nBp3RgElvoFapINcpqQuwGw6j0ai4d+MW89dvIzaaDExMY+jsZCcaRWPQsrW1RLezg437S2xvrWK0\neTAorJRLNdZWrxKLrNDvc9MsV5l7cIdDY0Mkc0l++I2/JxraYmXuHi9/+Q+Jbyzw4Poit28+QK1Q\nEtzZwGyWc/PW+6wsP0QmqaNTOVldW2B5bRmpsolMUcXps7C9nWR8+ix7kRR6vYGbN26wvDiHTitj\nYfEBaqWKxeUlOrv7OHHmSbRaPTcuX0LeUnLo8DGqNLCoHYhleOVLf0J2ZwGnCYYPn6AsFvi7P/8D\nPvD8h8mnM5RzJaqFyiOHs0QKTHYWNqJ87te/wJ/94W/xG5/8OTLVEgf5IoefOY/MaabH7aWaKxIJ\nhYlFs8SjOXL5FCa9gj5/D97Obs4cn+E7X/5zguvrZIs/azTn3/uPL3Xo6ng8Ph7MPkSrF4hEo8gF\nCYODo8jkShqtAg6bE6/Xi9lk5uH8Q1ptKJerJJM5JAjks0UOHZpBIVdwEAkxONDN2so6JqOd4EGE\nQjHPqWOPcxCJ4fN6iSXieDp8zEyPg0SFyWRgaLCf61cXKZUafPTnPo5GreDKlVl8/g70Ri0PH85x\nEAqhlAt0d/nJ5sIUakVCB4/KN7lMRn+fn1g8y9zcIoenTvK9V/6Z/Z0tOpyd6LRqnnv2PDu7u+gt\nKjbWD3C5LGyuzaNWOukfcaJRGhjsG2H2wS2UCiVatRZBruPdazcxmfUE98OMjfoYHR1ne3OL6clB\nPveZF6mWKrzx+qvcvXMbi9HM8uIyj58+gUYpJ5fOoFFraDUa5LIZFuYf0mrU6ersRCEXWF5aQJDC\n6vICsUgcu81Ou1ogfrDD/t42UkGKVGzyxg/vsxDIMe3r4PiMjmLayGvbUc47lSytrWIw97K8uMbo\n+DAKQYpBb0WpUCGjhdhoMD48SCR8gMlmod6qo9Vo0akVyNpNNlZX0RidvPCRj7E1f5OhsT5q1Tqd\nfj87u3t0dnbROzCOw9LBxavX6OnpRZTJUcnkmA0WnC4n7VaDwYF+YskETqudi+9e4ezpwyzMPcBh\nsSG09TTqEpKpGFMzY1x8/xq1cokLF57GaDWRS6RRK1TksiUiB0GcVieZXIJOj4tGrU5XXz9KpZqu\noTH++eX/A4tOT7GQ5MzJs/SMjbAfDGA3mFCJah7cu8bEkdNYHA5uLq0SS0VpSeTYDRo6XBa0tLly\n+zKWjk7cLg8ys5FaYpdWvYJZr8VmsXPs+Gl6hwf5y5d+h1/97OcIB3YI7YQYOHyYYGib1dmHHDvz\nBIHgBtl0GaPFSi6R4SAUxeAyMzQygsFkoFAr8NVv/OhnRz589a++/NKvffIseoOSLr8PQSLl8cfO\nUCyWKZVrJJNZ+nr62dncIho5IJNKIJPKH3lKms1EE0mUGj3HHzvFQeiAh/fv0unrwO3xIBXb3Llz\ni3Bom0Q8gEzeQioq2VjdwuPtQq5U82B2nnSmyPDgGM1Gk/GxcXr6+ojFYricTsrlHNPTh5BJ5Ths\nTsYnxtjZ2iaeiNMWFSjkSpLJNK2WCrHdpF7NolIJHD9xnB+++iOevvA4CoWE/f0dLFYTqXSMrp5O\nllfn+fRnP8ra6jqxaJJ6rY0ga3Pi2Bni8X021jYZHhrA4bCwsLDFXjhKo9GiXm4ik1UpF8pk0nV+\n6zc/T6VYRq83sb6xyMbqDvVmhYmpSZRyBaXSo9XjbDaL2+tBrlDg6nCj1mqZm31AW2xSKRfx+Fzo\ndAaS8RQer5dSKU8hnySZSKDXmTFbZLz8/ctUi1Ka+Tyfe8bLD2a3qcXadNkKLK0UGDwyxpX353nq\nyVNoTHoEhYJmvYFMraFnaIRwJIS7w0oun8Hn9RBPRElG41RrVSamDpFJx9lY2aAskbA8u8LwxBTZ\nQgmZQoHJYiGZCVGvVTn+7AvkEwmKlRwqiUguk+Hm9asY9XouvvUup06dYWNth6efPker3WB/M4RW\nrWJ1dQmjyYJUlHD78mVUUpH9nT1SqSCDvTNojFo0GjWRWITh0UEK+TydPi8qtYZquYJcrqV/YoKW\nIOPpx07g7fZittiIZvNEd0L0HDpMqSlSlchpSqtUSlEy2QqPTQ0zduIx8vE8XYOD7AZDbK2tc+jw\nGQKb2/RNDrO3vorT5yabe8TYqIkCMq0Zs9FEb083rVYVs6+LdquO3+eiv7+XVrPJwu1b+Px+Jo8d\nJZutoLQYsVnMvPbtfyIS2OcgtI/X4+avvvH6z05S+PKf/sFLfdYG7WYDs8nA3Ox9ioUy8Xgcr8+L\n1+vl5a9/k5MnTvLg3l0uPPkkpWKFpaU1avU2ZouDoeGJR25P2QxDA4PkcgXkghJBKsXusOPxuOnr\n87OzvU5Pdy/1Zg2r3UKzVaVULJNO53G73Lz6yvfp7OxEpI3ZYkShVDA0NIDb7aBWq6NWqwmHwkwd\nOoQEsNpcKNUqlAolKytxhoY8HJ4eQqMUMJvMbG1tUy5X0ek11Gp1Wu02xWKJp59+lrWNFRwWN889\n/xSpZIa52XVcbgMb6xsoFRJabSmNep22WGd5LUAyXUUiFVHK2wz0+Wg3JaytxVAoq7z44otsbe9Q\nrVXQqAwo1BKarRbDg8NIBTnz8wvU6zWUCgXFQoGHCw/JZDJ4fZ3YrTbq9RqDQ4PodAbi8SThYBid\n3kAmkyKdTmEwWFDKm7xxaYNKsYFVZeDFE13805U1rDIt/b42m3ttOvqszC0FGO53ozLq0Rr0lIoF\nZCottVYTsd2gkMvQbjRwOOw4nC4aLdgLhnjv8vs8duIw3/jW13n+xU9SyCUBCUaThXg8wc7OLj5b\nB/VGm8jeHkuLs/R6PSwsznJwEMHpcHDr+g1eePYF9kMhxsYmKJSzpFNppsYPk8/nMNsshMJRhkeG\ncLvdfOsb30GjlZPNp1EpLGgtGox6LVub65SrFdQaLalMAp1Wg0qpYm1lnYXVJSaOHmF9YR69wUi1\nVebu+xcpZpLUW036ZiaoZWNoUZHPxYkE94ntRajXa3h9HWw+vI9GJiGbi2B3dROPRcim06gFBS53\nJ/VSnVazRVts0iiXMHe4yFVLpBIFfH0jGHRG1u7d5+HCHIVsCpddz7Wb90lGo1jNDpz+DlaW5zl1\n6hQ6m5nBkSGUKhl/+pWfoctLf/z7/+Gl81Nd7O8dsLK0js/npdPXzdrKNgM9fr7+ta8x1NfJE2eO\n4XLayReqNKR1RkYHsNgcNJpN1lZnyaaSdLh8VFvQ1dUN1AiGd2g1m+h0WtqtNiMjh7hy6RoatZxm\nrUmrUsXvdlMp5wgEN5BIGqRSaXa3dvmHl7+L1arj/t377O9HMOjl5LIpBKmG5eUVLjx5nu4eD6Hg\nLpVKBkGoEo3EqUkVdLo9mPUGAsEgY6MDqBQtpG2Rew9uopBpUKkUuJ0uHtx5iEamBlkZn9uKyWBk\ncmwaf5+Tl7/5Q7Y2D6i14Mdv7TE44EChFNnaKHPysQGOHjlGuhDhox/5ELFEiA53N8NDQyi1WjRa\nFS5XBz6vh4sX36HdarG0cA+f102pVMLhcGI2WOnq7MbtcqGUqbDZ7JRLNexWGw6HnWI+T7VaoVwq\nIJVLufneEg/nY/yrsy50Bg1by3He2ynzB//6CF5dF9+8u02fzczsVhqHpILOZsbn9lBv5ZFLZeRz\nRXK5Mj989S2MWgsH8QxWpxer3c7w+Bhjk2Nsbgfo6e5Dr1RitlrJpUsEdncYH+lHKhV5/bXv09Pd\nxZ0bN3nqqacJ7IYxGe10WDowaPT4R/sQpUp8HhcHyTB2pwOtzohGp0Jn1JGr5Jk+cZjt7U3S+TKH\nJob5pS/8GkdPnsHb7UEik9BAyvLdOabGJtgL7JHPF1haWkSllKE36fC6XZSSKbrGhqmWa+TTbewu\nHzNPP4ndYGDhnUuU0ll84zPUC3Bv+T4//z//Nna9ht3gFiNHThENZejonaCzy0etUqevr59IZAuV\nWY1voB+ZIGdtdZeB/jEO9law2y08XF5D1Uyj1FT46t9+mYGBCRwdvUwfu4DRYWd0aJj7d67QSB0Q\n3lph7OQ5TFYzuxvb1PKZn3gk+VPRaJTLlZSKZc6dO4fP5yeRSKBSKzGZDBiMWj70wjNIpCJiW065\nUGV7Z4PBviF6u8e4c+cWSqWSnm4/EkmdVqvE1fcvsbj0kEq5RC6Xw2q1k4wlabcaXLr4HnanAaVK\nglQQqVTKIHlEferu7mZiYhqlus304UGefOYI3d3dzBwZwW6zYTZb0ev12GwWZmZmuHLlCqvLa5w6\n8Ri0RbQqNbmWgoWVEJffn+WVV9/B4LSxF9zD4/eyvLJGswFSqZRCtoJCrqVaa7C4ssjEyAhyQUGl\nnMfr9VIolDFadZTLbeQKGY02+HoUKDV1EAVkMjkGvZn+vm52dtcJBw/QmZSodVo0mkdWeyqViuB+\nlOGhMer1OgNDg+zs7RJPRKlXa3R0uDAajWQyGQqlIisrKzidj9bRZTIZcsUj6VEulxAECdF4jgIi\nYkuFoi0hVi2QFVu0UKDQmhHkctQ6HfV2HUQp1VwBJE0aIhRLOU6ePs65p89z9tzjDI32Mj01xve+\n8w+E9oKsLa6hNdk5fPg4yViU+/fuEcuUiCcO8Hq6uH7tHlq1hRdf/AgSBOwOK/FokOGRHqCNWq9i\nbWMVu81GqVzmypUrCMo229tbtNtSsvkyMrUBh81PO1vG1+ngyY8/R6WQJRKLgFqgUi+gUqkwmUwc\nP32KwEEIm81Gd3c3Q0NDSCQS7HYncrkSlUHH2toGOosJ/9gQdq2Kaz/8DoVSkYZMRN/vIrJ6h1R6\nmaeOz/DqX3+Rm9du4FIbySYjGI06OidGSYR3CR4E0NttyHU24ok0777+NsuLK7icVsrlFAq5jla7\nzosffo5iTcRi7uI//uk3ufDzn2f63LOshg/QaY3sBULolGq6u7ro7xsmEthk7tZ1GtUKconiJ47H\nn4pK4Utf/P2XPnKuh929DXRaBRKJHKPJzODYEAsPb9Osl0EikI5nmJudo1avkc3leOvHFzl//jTl\nSpnoQRinzUkmW6ardwi/z8+PX3udFz/0Cd594wrXr91kbLSXeqWK22PFZDLSrNTI5/PU61WWFpcp\nlitk0lnKhRiCXM7i0ioKWY297T2SiTCCVE4+m8dg0iLIwO12IRPU1OtNfF4XN67ex+kwsr1XZj+Q\nYXjcRWgrgt3uolQokI2nGeobQSrWSWWilIoNwuEoMpmccDCAKCpZWV0ml4/hcjhZmN/k8dPTGI0q\nlheiuJwqZIKUZr0JjSqBwB7bgTWS0TzDQyOks3mkIlhMVurNMjqtmcuX30Eml2Cy6Gm1BJ5+6nmc\nzg6ajQa9PT0szM9jNpuRSCRIBAGDwYTdYWdtZQWVQUu1XCZXTKPTafj2D5dotlsoZQ3iiRzJspxY\npcYRq4ZiPcXV1SgOuYxavUquWqBcrzJ2bBhZTUq13sDZ4WF3Zw+z2YpaqSMWTxAO7DI63MOX/+KL\nbK4uItTreNxeXBYDldQBOoOW3e0d9Got2Uyadlvk9u1bfPCF52g0ylRqBdZWdzDYLQwN95NNJXD1\nD9Dj96K3O7lz4xZdPi/VaprtjQX0BjnvXbtEh9PLD/7xH3F19XP4sQvsbgfweFysL69j0RuotKoU\nyxXUSjUqjRKpAJl0gnpD5JUfvE7f6Bi/+9JLLC+vMDnUyY13L/FgbomnfvlXaeQLJDa2iberPLx+\nhyMnf4HJ54+S31pkdnaLfn83m8vzBJcWSaSSDA76+ebf/CUffPEjRJJNpqaGUCpF1lfXcNp7sbhs\nvH/1Ov19I2gcdi5dfp9qNoXZoGFzaQ6nTs7eQYi+0VEuv/MjlAYlequFeCxJh7cTtc6O3GjlS3/z\nrZ8d+fDVv/7SSw5lFXeHH7vDg9ZiQVDIEcUmu2urNKtVxqbHyaRTNFot7A4XY5PjqNRSQgdxhkdH\nGBwc4etf/y5LK8tIqJFOhvG4rSjlVQLhOxw9MsDbb77PzMwMtaZItd5GJZPTarWoVGuoVFrWVjcZ\nGR1FLVfx4P4iKrWGXLKMQoBOjwMJIulEHIW8idlsZf7BMqFImHK5yt079/nwh56mlM8ROYgzMWYm\ntBOnLK2wuRml1ZSjNtZpS7T0j/VTLRTo9Hnp67PgcuqRyrSkiylCoSL1ep1GrUI+n2Cw10ezHmdz\nPcPERA9IpLg6zJjNLqwOPY1qlqeffI5U4oC9wC6NWoXQwRaFXA2ptI5SpqS3Z4BarY3b00Gr1UBs\n1whsb9HT7cNmMVOstHjz7bd54fkPIpXLaTYaHEQOKJfTyKUqkoU0NpWZ7723gl4F2Uobs8FJsVDg\nE0cd7GXK7ESalIQmiUgcj0tHS2zQlAg8/+wZUrksyCR85c++yNLcXbp9HYQiBygFKVa9EbvTxeOP\nncFmsmI0G6jXK7RFGBgZQy2XYTLJkCpbZLMxPL1jnHvqPKH9Hcw2H0ZTB5FEnMHBIZRaBWqFQCR4\ngMPqpJhL4fe7iUZCfPNbf0s0maR/YIzhrkEUOiXThw+RCQV5cPc2CoUUtVxgYW4Ru82KTGhiMRqp\nFku4vZ3EY0msZitGow1PhwelSuDTn/4kA91dyFHg6PKi0isxKaTIZNDf10diZ5+nL5zgb/729+hw\ndvHD1y/yK1/4Za5eex+5VEENAU9PL0aNAbffT3gvwFCnhUtvXWTq0GG+8c2v0hCjXHznx5w9NsnO\nygI/+udXOXnkJJFEDKXcSFdfP3P3ruE1WYhFtlHqFSgxMjp2EplY4Y9+7z8x0N1NpR7mb19+9/+i\n7r2CJDuvO8/fTe99ZmVWZmVmedNV1dXd1VXVDt0NNADCkiBIkATEoRElUSPNaCe0oxlNzM5gOFpp\nJIpOlFmKRiDoQBIg0DCEB7qBbrQ31eW9Te+9vXn3Adx9mNgdYjdiIsQTcSPud89n7ss5ccz/O+c3\nRyn81Z//l8cfu+8wba52VlbWaPN6yGXS6LQKSrk8xXKZnsFRNndWsZhM+H1etnZD6HVm2pxO5hcW\nyBQzjIwNYLdpqeRK+L3t7N83xvyNTUZH+ylXWxw6fIjQ9jx63fspy4A/QCYTo6OjE5+vg4nJKUZH\nx5AJElJLTl//HrLpONlsmmw6SzyRIJfJkMpWeO70BbTWGp997Hc489YZ1tbmcbh0tGp1spU0qbjE\no4+eQllNI5PJaHM66Ou2kEm3uHnlKh0d7fzkp6+i1+hoiRqunL9Oo1xmdi6D3qjHqNcSS0YZGx0n\nloji8zuJRotUihIPPXwXhw4fQ61SszA7x8LyPHfedRf9fT0UyznefOMMjz76KR31Bw0AACAASURB\nVKLhOEqFgWg0hFIlw+3toFSqEdoN4Q8GmJm7hlavQaXU0hnws7q5ya1bt9DIBKLJGKjUaFVNiuUK\nYr7I9ekQHp+exWiNUq6GzyzjU3eOsLgeo1Bo0dVpJxNvMBA0UG22GBrsp2tPF3JJoJarc/dDD/GR\n+x4gnkrT09+Ly2LFbDYh15lJpjMk43EWFufI596vBVFvFLlx+RpjB4+QyOXoHhqhnMlz4/p1nB4X\napWEVqvg5oUL2JxmlILE6uIyDYUCeatBdCuJp93MwvwSB8cnUMoMHD31AE888W08The5bAJaGvqH\ne2nJRaoFiYC/A4VWRXgzTKtRx2K1EU3FiIRjqGRqZhbm8HhdqHV65pYWcDsdvPrOWZ75yZOM7dmH\nKIiYdSp2565i7RxAptNycM8B4rEaH//EYzz38x+hN9npG9yLO+jDuXeYS2+dZ2xiCrs/QKlcpmd0\nlJZCzp33fhynu59DJz6JUq3E0ebC6PVQyZdYX5lhcf4mKqWCof5RRFGP1uamr6+TUq1AqpCgkM8x\n3NeLAglJqeD/+MGrvzlK4e+/+ZXH+20qvO0eNjZWyOdLyCWJpflFbjtyAr3eTDgcps1hZWN9BZ83\nQKFcwNcRBAGcVic3r11GKYi89epVTt5+iEQ8RTyUwOwQkQtqMqkM0d1ttIKGWkHG6EAf0XgeZGbq\ntRKetjYyqST1aoWtrQgtQSDY2YlabaTD34nb66JYqFIVayhlVu644wj5dJO/+5sn6Oxq5/4H7iQW\nS3PvvcfY2Yqxup1mZyeEzShjYuIgZ9++RjSZR6xHOHzoOLFQlGyxRnvAx+LyOquRMiaXjrWNAsjV\nhOIhdGoV5VKDXLmKXFMDBNLZMvd++G5MBgdirUUkssu9992LTm9gZWGRWCRKV9CPyWQmlU4hUqW7\nb4DJyaNEoymy2QytpohMaNEdDLK0skLv4D7qzRaxcJRquYpcJqCwatHQwqzREN3Y5daNaSTJCK0G\n6bzIh4bbsHe50OQiFBoKYsUKoiSREfMc8Du481OPsmegH61cQBRkKNVq3n3jLHNrq1yZm0EhkzNy\n6BDT125SESV2QmHsLicdwSDjBw9RLRcwGnWY7XbUDi9Wi51yPo3dYUQhk5NI1ShWqyRiEdz+bqwu\nN8jlCBJ42ryoNTLeO3+dy5cvc+G9a7zxyhmuXb/JhXMX+dznf4vl1RU6B4ZZ34xgd/nQaU3sbM2R\nSmcZ6O/B7m0nlcujUOgQlBKdwQA6jQqlSk2rCU/+7GkeeuijKFpy9u7fyz2PfAKF0U7n8H6kmsh2\nKEStAuuzGwzddQd6g5btpTm0qgZyQSSVimI0qVg5+x7Dg/1UMznefPFFHK4OJJmAQa/jnTdexB/w\nUojtEI9usxMOMzI0RiVX4MT9x1mauYG33Y3VpGH+xjvMLM0grzVpb2/DZnOit9kpFmvcujnPwJ59\nfP3bv0HZh2985S8f/+idLja2ZpApWiwtr3PnqZMUC3muXrjGwvwy/rY2Lp6/Tm9PJ+ffvYzN6sBq\ntbC1tYZMVGJUy1ELLZTyFtubu+g0SjY3bqFXK4iHQ8xOTxNLlIlnEzz30jKvvzWDXmsmk1kmGolR\nyGVoNovshteoVRpIjRoCNQShBDQwGi3sGRlkfi5CPpejb8CDWi7QP+yjVK0gCRIdvnZmZ1bYDS+S\nysLySgW5Tsn+Pb30dndyc2WdXEnFK68t8q//4AjxRI6D44OYzCZeP7vE2mYeg05BPluiUBa480OH\nQKHn5Zem6exxoVGaiIRjWKw6Ojv9VIp5sskkVy9d4tb0LQ4fncRk1FEqljFaPKRzJUwmK6VSkVsz\n04hihWa9iL+jnXg0xezCIpWayM5uCKVMxqFDE9h9HmQKBQ6jiZe+/R2WLi7i1shIJ6tU8mXIiyia\ncvqlPKOdLt49t8VRbwc1s57fPvlxBrr3Y/QPgaeXUl1HRR8gXtxFWatx5NRhPO1upvaOUatm0Aoi\nZpOG0OoyjWIGoVFmaX6WdDyM2WrnqZ8/g16p4ZmnfoLVbqHHH+DypZtcvXiRwT4vVpOWdCTK5QsX\naZQLGDQKNBoZZ99+i7/4s6/z9qtXCce2Wd9Iky00qVckYrspenp973c4j8bo7Q3SaClwejoxB/rQ\n6FXshiLkC2XcTgvZdJJMKkelWMNsMXL54hWUGi33f/QBVubmUes0bG0nSadyLNy4yrWzryJWyyhF\nLdnUDkvz19HWRd745et4Ot30dXfynW99i717hpHqIlemZxHkGjbWN0imthGFBh6Pm1azQm9HBzen\nl2hztWFyuFhbXcKpVSJTtigVyty8PoPD3kYuVyAaT+H1d9HV1U2z1WJhYYFstcjMzDQjo0O8/vwz\nvHxh9TdHKXz1y//58bvGx1lejPPwQ58hHEtx+cpNTt5+B3qVEqXQolgsYzK3k8vn6e3u58b1GVrK\nGp29Aa7dnMPn8yEpZNBoEdmJUCvLiURFItEiizMRBHT4Ak42N+PsneojnE7RZlUQ3YkRTuTo6+7G\n0xGgVBVJhAvIlRIb62ECAT9iq4YotqhVFExNHaDeqJLOZgjHIrz84iIKpcjszDzedju5dB6DUU/A\na8HrNjG7FCOa2WVlZZ1GSSKSaGEyCdy6ssZnv/BxEuFdKsU6i2vbCHI9MlmLlijQqKtY29hhcz1C\nOt3A3SYjtJVEJq9x/32nOP3c8yyvLFEpptBrNHjc7QgKE7Ozq4yMTfHTZ04zODiM0WjC2+bF296G\nSiWj1ZB4/oVnsTlcdPd0I1MqaXc7KeTzxGNhXj19mpHufmqSyLVrS+RqIDrbyW7pEaMNRj77JcYe\n+SM2Vq9SSKgJNiU6K1t0uydp32Mn/urLxJaX6bjjU0RzVTL5CgFrgHq1wMbmGkqNgbnrCxiUesql\nOpsb26i1Jm7MzYFaxf79Y9jMBurVGuMT4/QNj1CvpvBabbxz7iqirMnk4XFeeu5ZFm4uMrhnDwqN\nQF9nkKW5WZaXFjl39l0OT+6nf8BNKBqnUmvSEMBoUVHKi9x771Emj40j1UQEgxW93crCrWuU49sE\nO/34gl3YDQ7UGjmlWhW7uQ2X00MoGqNUKKHWmXC1e6gXimgsRqrZNJlQkkqtzL2PPoxOp2V7bo1j\nj3yKyQ/dTzyao93fwZ7RfjbX1pjcN4E30IkkgqsnQK7S4NTDD/L6S8+j1mkQkFOrllldX8Hh8LMT\n3eDGzRuMjgxjM3uoKvUohBa70ThGo4ODR29DbbTT0e4jlozy3nuXkTVaVLI57r/vPjLpBGMHevj6\ndz6Y+/CB+j78z6Zer1P6z787TLVaZqB3kERGIptO4m13YVYrWFlaJl8o4bR70GgFdtaXKWdE4rtx\nbE4dF25FcbuURLMSfUN7cOokOjqcvPLqm0RzBmpiFbNRwz13dGEzmAitzmPQ+giliujMJRRNgXxF\nzvB4H4sLa/T0dVMt17h58yZ2u53u3h6a9QZvvnmBvXv3kslEGBgc5rlfvI6nw4/JrEallZOMxun2\n9xBP7BLscKFQanni6ctIDZHeHgPdnXYu30jj7bCxG0miqIv0+l3IlTV0FhlnLxbJFquYzFqS6QZm\nXYvhbh1BnxuL08qPfnSJ++8f4+SRY1y6fpXXX7nCo48dJpVOo9fZkalU6LUO5AqYXV5iYuIgjaYS\nt7uds2+/QbmU45FHPonQauDv7OHChQuUC1n8bW6WlxdRWWwILRGdQkdZqPLS90+TzZWwSkYurMXZ\n6xAotXsJZLc5LxxkrCtIz6UX0XV4aVeraTpcuLQGUoUo2WgKu91OPp7BPjWI78AoC+UtejoDpFNh\nrO4uUskMbo+LeDyK1mgCBZi1WvoHR/jej3+ATiVn78gYbpeHmljBbDYRDSdBkuMJBEjEIzRyeaxt\ndi6++zZHTpwCUcHM7E1q9TxBb5BQNkktlyUejWE0m+gd6EUQBDxeJ2889wYNscipD3+Yl0+/zORt\npzAqJBYWFjh64h4KYoXVlQ26nG5AQC6XMz17HbPBzG4sgi/Yj9PZxvnzr6Ns1NEYzIzf/mF0RgvZ\nTIyF2UtoFdDj96MzulDr378fI0dOPp/HaNQiKZVMv3MGb3c33YP7kBAplErIhRZSI8/y9CIygxFf\nwEMimcbp8iMIAuVMDLurA7lMSaWSw2AwsLy6hNWqxm42kYjGCPTsRdDp2N5Y4/XTP+QLX3rxAxVu\n/WdhKfzln3/p8Wp0A5XChNsdZGHmOvVKiXKxyPrKOul0lnvveZDX3nyNzY0wNmM7sfAatUaBrVSR\n3t6TvHJuDbm8RSWTZHc9SSadRqKFQd1Cp1RTSBdJ7sYoZjPML+RQKCtk4mWkQpX4co62TitGk5mr\nF29hMCjp6u4lnc7S2TuCUqkhEU8jVwt093ZQK9SJJjaZmNzDHXccQqOGrdUIjYZEPJnA5WynUWtS\nqBQIJ6sUq3W0ahVL8wnaO+xEk3FWl6v07rHR7rOztZ1A3tKyvJGkKbVIRGu0hBYOq5aTR6ZYXg5j\ntSsJR5McOTrCofETfO+7T5KIlbnzrkNk0jnKlSoDg6PMzy8xcfgwzUqFhblpytkc1VKWlaVZ7jh+\ngheeP825c+cYnzhGe3sHNbHKjZs32D95kEYpyfzMZeRy6O70cPmXZ6iWWrgUeVwKgRJybBWRhYyK\n8d/+IvLn/gZLuxybwYJjagqZXcfm8lWMK3E6/9MfYv7k/ey+8Sb6dj+ZapbgQIDlzRWCfh/Vap2g\nv52dzVW8/iD1ZgNRFJHT5Bt/9Zf8m3/3p4z0DHLuzCvMLC8T6OhAIZNRLJX42S+eRimv0NveQ7KQ\nolKX2HP0CGsz6xQkifGpI5isRs6ePcPkwWP0jezB62kj0NmDQmEln6vg6wgyePIONhaX6Q0M4bDZ\nUaner6bU4fdQbUElk0XM5shXa+g0GlZXlujfswdBrkShVDC69wAKtRq9RUsyso3UquN3e1BILV59\n/mfc9qGHsXu62d6KsLE4z8VzV9k/sZdiNYdOo4KajCtXz6Mz2dlaXEOlkVHOJshnk6g1CmLhKG12\nJ12BALFIBJfTQjy0Tjq6xUBvL7MLt9Bo5Tgdeq5ffIuh/m6KxQKRnQ3y2QR//Vf/DYvBSKNS49Tt\nx/mzb/7sN8d9+IdvfvXxf/u7H8PjaScWTWAzGjAbTTRqDbztbajVKhI7CeSSjAcfeIC3z79JtqjA\nqFLzsU8/xJGj4yQLST7+kTup13cZGuxmY3OLqaNDhHYTZHNV9AYZRpOOSKxITSZnfaNFKlHGZtFi\nNLeoiTraOkzY7FYOHx4nly+hVqsxWsx4fV6cdivFconh4WEW568zsmeYVkPg6WdPU682cDscHDy4\nD40OLl2+iUatZ3M7yU64RgMRo0lNJinR06MhHKqQLLZoNFrUslUyiTxKHZSbasoVBZKkwOIwYFRV\ncThNbO5kkSOjWqni82sQRCsrq7PojQJqhQy73Uk2l+P4iTvxB4PE4xkksY5Wo0Fs1EklI1TKRUaH\nhxk/OI7L7WZxYZOu7h5KpTwdgXaq9Qob68tkMml0BisdHQG2pi8jCjWMLagIEoWSilqzQalcZcDq\nJRjZQqdS0h6LkFVCn9FBbi2CZnKI+PmbaFQaSqkKht4RarEYyl4T7UEfr738MscOH0cmNWg26py/\ncpXJw5NsbW1i1ssYHu5jZnaBbDaNx+0kV6lTLRURWi06u7vwBXxYjEZsZjsuv5/tlQ1yiQxSS8Cg\nUZMuJlEKMDo6ytrqKp6An2wqirPNg9nVjs1s4rlnn6WwnSIQ6EJrs+Ee7CeXSrC5tkE4HCJXqGK2\nGpAEiZvTC5TKRY4cniQcS9KoNwkE/axs7HJrZp6RkWEK6QxLC4ts7oaYPHoYhUqOoLagNljpaHNR\nTUUxtwcoFxJsLs3hbvOwuBnBM9DJgYkxDBolncfGSawucunyZTLpDONTh3nh5Vc5c+Esgc5u/AEf\nckFEhsTy2gZTRw6xurpMJBRCJodwLI2/oxu7y4XeaKR/cB91sUlLJrG4ssMPn3vnN0cp/O3X//rx\ngLGBUa9neWkBhVqgt78Pj7edy9ducPT4SZK5BLVGje8+8XM+8eiHGd3fjbffzvKtFV56/gwPfnQM\ns1lNOZfjyMkpPvaJj5KMw+e+8FGkWh4aeTLJGrW6gUq5SjCgw9tjJV1rkshVkQQFQ0MB1AYtK7NR\nLl2+iNvto1AqMD19i7EDh6hV66ysrtHp81Mo1SmUK/T2deB02NhdT2G0KlBKah788G3U6hIul57e\ngByFIJLMFDEbBAwqNfFImf4eBStrNdKVGi15C5VaweJGkWodRGoURQmZHG5e2yYZreBpb0BdRi6b\nYvrWRVxtNhQyFR+6+y5+/tQvicYiuN02vvrlr9JsNpmdm+XVVy9zYGqA4dE9fOhD9xBPJJhfmEGh\nkHHoyGF+8pMneOfM6yhqRQJeL62mnD2Dw3QPdPPCD39MNZJAgYBckKOXyVFqRORyNXajF/PcGUJy\nJVWtg0Ijj9waoL2pIZWOY9zeJqOoIBZStGIxik4fxok+sqk1lC3Y2Y1x9NTdJDMxrl2/zNFTd1Or\nVhFrdQqFEqOHjxFeXmc7GkapN7B3YpLAwBDNhkQhFKFareLpHmNrd5WV+WX6x8awui0YtXJ0FjW5\nZBizXs3i8gIWsxl1q4XL7SGXzfLcz59m7PYTFJNhfF0BlhamkSsavPbSC/R39yMWc1h1esw6K/FQ\nmtXlVY4cP4bH7aZer9KQlDjsJiLhbeQaK0dP3kuyWKTN18WJ+z/MQF8fc0tzPP29Jxg5cAKXP0A9\nl0esN1CIBSSDEo3MRLlQo9ws42lzsTW/Qlewk+nzF2gpDOzbO4lOaaStrx+Nwkj34BgqAer1KrRU\nVMotZGoFmWQcvd6Gy9OFyaKmXq5RrVSwOztoyVVsLExjUqnZf2QSqZLnm09+sJjCPwuYsyATUKla\nrC7PYNQpqVaz7O6sc+XKZQaGhkmkcvT09GC22jl6ZAydWockSSwtbvDWW3MM7fMjSDpC4R0mDg7y\n/C+e5t1zb2K3NnntlV8SiWaot8pUik062gRkMtAbFdjNNra28wR6HDSbeWTo8Ni6WVi8xZ133EVX\n0M/w8BD333sfN65e4tDUBB9/+GOUqyJiS47d1o5KriGVzPDRj79fot5utzN/axalIBLwugh2+tg7\nEqSjzYhOoyaaSiJTtlAIKqxWGQq1gnIN0pEG/Z0qtIYGKq0MqSKyE61Rq8nYO+pDrCmIJvIEuzrY\nu3cEnU7H5kYIj6+dT376IWw2G0/97Bc89MhdBINu2v1ejhzfi8WoIxNPkk4kaTTrpDJprFY7i3OL\naLV6ms0We/fuQ2w02FnfRq1VYTLb0GkUlFvQUsgoNbRsbDSgIdEo1EhHN2jqwKbQYrSaEDU6NIkd\nQukQBnmL3XITo9ePq7efDo+N4vUr1FY3uff+++joDPKv/pc/QGyU8Xd3cvj2kyxMTxPw+9EaTaQz\neQq1Og6bncHBfnoGe0iuriGILYxWK7FcFp8/SGJrjWg4zODICCa9md31bUKxJCqFHqfDzc7yDqVs\nld6R/czOLLGyMI/RqEctSOR2dygUClSlKrVGBafdhowWoqxJQ6PE4uvg5XNvMjC5n+10lkwqTaPR\nYHV9A51eSTSRZmjvOE//5Pu8/NxPUDXqtHJlqpkCG6EYY6fu5ot/8r9i00nUM7sodUqswTY0ThsC\nKowOP0NTxwkODnPxzfNIMi1vnrtKh78HvUyJ2mpCZzezdO0SalWZUnwDu1lHW5sbpc5MQ6Yk0DFA\nT9c+Au1dNGtFLl28jK+rl914nGSyiEHnZGBkPzK9id3NEF09Ax9YHv9ZWAp//l//0+P7glrkChmi\n1EIQWuSyRcYPTJEtpFldWUFQiigVCvoG+tnZ3aFQztHZ1cncXJiuXidqtRqtysPszAonTt3Ge+dm\nkZVTyMUWgrKFs81HX38fuXSa5fUyWq2JNo+Mb/zT94hEsmTScS5ffL+e4vJahLNvX2d1ZZlAZwd2\nix2bxUwqmeLWzRk0ajn5QgGbw4XFZOXg+ATPPvsT1BoTfX0+kskkraaeS1cu4Q/uweuzo9HCxkYc\npUpErzcSjRbp7G2nJTaRteqcmPBy5OAQ12/FEaUmWmS0JDBqWrhtTdztZlpIuJx2Bnv7KFcqbG5v\nYzYbuXL5Ots7YexOC5tby2SSOTq7upk8uJdUeItaDXp6+vH3BXnrzFv09PRw5u1X0atVdHcGsVv0\nbG8uMzw4xgsvv8Drr51DzDVoZGoolQ1uLdTxdyrYqluZGHYwdPhuknUlk9oWhcIGFsmC/sQpwqIS\n8nkihzuY+si9tGIVrkW26H3so2QcStbn5xkaHqMhiYR2trh+c5aJQ0eZvnIBk9mEUq0iEY+wvbGK\nRqMhHE9wYHKK62enqTYVKFtKhkYCfPPrX0evVWPW6akV85x+7mmGBvvRa7WsbO4Q24lSazU5et+9\nlLIpwskdhkcP8qMnn+RTX/w9stE0gyN7KWbrTN3xYcSaFofdh1auxWiwkUokGB7cw+r8Ldw2Pe1e\nH/l8AY+nnWQ8gVYj5wdPfJ8v/sHvY7ebCIfXefGXP6Un4MZkctMsNVlZjfDWCz+gWslQTudxu+zs\nLM3REeygUK5y88YVujutOEx6jHYnIwfG2ImESeWqdHjauXT+PXp7utFbzKgULb75zX/g9uOnWFuZ\nZW31FvsO7ePc5bOUK2l0GiU+jxeVTIXdoGB7YxqtTsbpl19HEMBqt1Mu1/nad37xm+M+fOubX3v8\nDz/3IAqVCbXWiNhsoNWZSMQyhENruJ1u5FoDcrkcvU6HwWKmXGtx5swMd903ytZymKE9e3jnvfM8\n8omHWd4O4233sT47Q7pQ4cGHH2Zs/AANhcihOyaRail6+uxMHJngx09+h4kjQYxmCwcmRognwjh9\nLhLZEG6fE6VQ49LFS8zOLeJ0+6hU6xyeGCWdyaJV61lZm2N9YwFJVKJTC1htFrRaG8hKeL1+Dk1O\n8df/+4/o6/KQKZSZW2wgkwmUSk0KlRr5TAWbSUV/j5ubC+ssbxSRK0Eug5ZMjlIGKq0Sn2eA8NoW\nXr9ENh+lVmsR3syj1jSYu7XMwICXkZFBGlU5SHq0jTL1TJ6mVMbT7qJYKvHUj58mGc+jVZu5/0P3\nEmj3EA/tMrtwha5uH+9euEE9H2dowMvFC/OYjQok6mxEYTSo4MTHP0f/iZM88Q/fwHdgH1dtAaQ9\nB3DuD5J8/g0m7zlOaOkCGpcbrbefks+EfeQohYKIWBWZGu6gmc/ywlM/4sRd91CpVrly5TIf/cRn\n+fGTP+alF07zhU9/nj3jB9ALcjbDO6RiZVqNGLemr2Jus+Nq72Z4+CBqHXj7ehGVYLeZmLtxEwkB\nf2cQpUKg0xsgsRGhqVFiMzhIb0Yxm81cv3COtmAPtWKRerOMVtkiGd6gUcthdunZ2drC5XZQqlXR\nag3YnS7mpq+SiIURm018jjbEBjREkVxDzvnz1+gJDDI+dZxbN2fRG9So1DIsOoGBg8cIBrvJlwrU\nKg06e0aoUcfp9eNu8/Hu22/Q1z9EOrKLEpFMNkVofYveOw/gMRmZX5tHp9XSqtUZHttHLpfn+s2r\nGC1mrEYb2XwJg70NpVqOKDVAUKBx+ekeuwutyobdocFoUHP16lV6uwf562/9BgUav/znjz8+5FOy\ntLyCRqVGrVJw7epNTpw8Sb1exWZ1EPC7efO1V7FbLQiSHKvJidWiw2JRIpc78fg9iC05uUSMUi6E\nUqlGYbbS5u9gdmaB69ev4HJaEGQ6NMoU9UYLpcXIwpVb3Hfqw4R20sjUdbwBGzSLDPaOIKOFTNZA\nkkT0BjMeTzv5fJx6tQ5SC4NZgcPqw91u5a67jlKtlinlmzSkBivrc/R09vH33/ouf/Znf4TRIqdV\nFzl/OYTRCoICKuUG6I0kMgLXb4aRFJAtNlHrBVoNOfmiiFYjoURGPL5DsMuBSlDjdraxE4mQymRx\nu2wMDvuYmJjgW39/mnCoxmC/E5MGXG0ujt5xnFItCwjcujHP7PU4K2uLGBxabl6/ioTEgdH9BDp7\niId2cflsRMMi67NhLFoZqXCTgSEHxUSTn790mbdeeI2JgJ6E0sFbL7+MphRh4PA+grePkFKIFDYS\ntBsc1PvtyAQVC7tpREGFxWamd8TLzOIcPcN7+f73f8jqxhYnT54iXyowPjVJW1sbVaUGg9ZMKV94\nP8U70Eln1xhTU8dALlGIr7K8Pk17m4/wZhin2UapXsWot7F/7wGuXrvCwMAQly6cpXO0k1ZdoL1v\ngNmLZxgeGyASi9K/fx8bM7cwqWRcvTXPvvH9CDJIZ3OUFQrmF1YZmzxKfDuCLdiBzWjG7e3AFwjy\ngye/h91lp9EScbg7OHb0CEvzM/h6OvB3+tEKAltrYVwd/dTT2+hUIqlkjnaXh8W5W2xsrLA4PYNZ\np8fn91Gv1HF5fWxsbDB37SaTU6MIzSYLi3Pvu1bnz9HmsCPXqGlUakzddozOYDdymQ23x08xnaLN\naoKyRLFYw6ATSCe2UCkkavkqO9sh0skkwd4+vvqtD4ZoVPy6CYIgfA+4H4hLkjT8q2+PA78DJH41\n7T9IkvTLX/H+FPhtQAT+tSRJr/66MzQaDR0eF13+AIX8+6CNw0emOHfuHdRKFd2dQ7zxynn2jkwR\nCado98p5842XMBht6PVGxvfv48l//BkjY+0M77+NHz15AZkixuDYOMloAZe3jXPvJjl0zEulVKam\nciA3lnHr7OybHGdpa4VsPoau1aRR0dMoaTlz/nUOHBxErXTSprPg9gRZmF8mlQzx6gtnOHRbgNlf\nJrjn7jtYWprhpz94jo4eG4cmJlE3RF6/Fmbu3Gk0bTLeu7LJ5PExEs/N8+lHhvn5szOIeiUqhYhQ\nElDJJIoqOaubVfRWE+ViEZdVQGfSkQ6VCOxxYtDC0tIux3/nYexWEwsbReIBowAAIABJREFUeYql\nCN3dXrQaC2+8dRmNXsbRyQGcRg25Sgq1WcPPfvJT6vUai4vbqLUafF4lHZ06NAo5ExMT7G7vIKFk\n+uYCTreX9u4geyfkrK/PsLpa4pFH7yWWLPD92XeZvLOT+++8HZfLwb/8k3/k0IEefD47BoOSlsWP\noZXF/PnPYJPX2Wk20OgM9HRI1Mo11K0WZrGFUG4QLcX4/d//Y8wuO0szN5mbvYXT6aRQLGO2pGmW\ntXj2DLIT3UUll5OvxDl39jrh7R0GR/fQMzqCRm/C1KgRDe9SrVUw2CxcXLjM6vQNYpE4+yemmJ9d\npDsQYPvSRdo796C2d2BwbyNvNOgc3Eciso7ZbufV06cZ7OlhO57AYDXT6XNTT6ex2C3Imk0SySQa\njQ4EOcNDg0TDIWQKJdV8jgvvvo1EHavZRTFT5dLCNfYdGGdpdZ5UZJWeQA8KJHKVKlq3nfhqksNH\nJ4nHk5TqMpw2J1qNEYfdRVd3B75gB8trWwz1jnDrwi06Bg+gNluoZZK4u9qo5qooNFrUGjnVco3u\nnn7CG4u0VHKaqhb1fIlSqYFer6KuNRCKphkY7KOSTP86Mfy/6YMEGp/g/S7S/z19TZKksV89/5dC\nGAI+Cez51Zq/FwRB/usOkJCo1htYrFaMZgPj48MoFSJqtUgkts1rb7zA4HA3Wj0Eu9vJ5TKMj0/h\ndlo4fvQgf/u1J3n0sY9y4MABzr33LlOHb+PkqSOEN9fJpDZpVAt88uHbKRfiREO71LIl8pkm//Hx\n71NIpXj37JscO3qSZKLE5UsLqHV6fuf3HqPd200uW0Wp0GMyK9HqarhsXsb2BVDLHAwNeqhX3g+K\nuWx+gs5uFufWWdoJM3n/RyjKW/ze730GlaHBZz/5p/zRf/wCNouDjz0yRrPRopyXU22WaNWrnNyn\n58iYgXIij1QFsdokEavQkkGx0iKWqGF1yFmcXaZSrlMo54hF5cg0ChY3N7l+Y5WBrmF0BgGzy8XG\ndoKvfe0pItE8DUmDo93AkWPH6B+14na78LS1o1UqCW2u89xzz2EwmensHsNhbyOfbzEw3ol3UM+F\nmwsshVZwWAw43Q609iroauQSORKhCHPT1+npdFNthlA2KqhkKejowCgvoG6k6DSJtFmqCPIUb0zf\n4OgnHuXwPXdz4fwrPPPEP/DO6y8Q3dnCbjZht1kIeP3UajWSW2vIpQbpQgm50EBu0vKxL/w2V2/M\nI5eUxNYX0MirXLtxlv0HprCozByaOMKjn/k0B48epJ7PcPT2O1hZuMozz/6QjvEeVEh4VFr+6e++\nzOrVC+i0Cg7sGWLi6BRtXR1MTe1npK+TjjYjzz31bTxeK4ndNZwOC3aHmVariVajo7N3gKGRfZjN\nVg4enKSY2eXaW8+zuTXD8ZO3s7a0iFenIegfpCxK9B6cYDeyhdtpYXzfOIVKHafXi0ymoCRBrlbB\n09PJ4Ohe3nnjPD5/D9lciUBXJy6Tle2tFewmE7mdEAa9kko6SqWWJhJb5/w7b1CvVhHqcmSSjGR0\nC5dJT6OQp5VLcPzYPtpcViTNBwcpfpCu0+8AH1TNfBh4SpKkmiRJG8AqMPHrFlWrVXZ3dwmFQmxs\nbBCJZBge2U+50qQuyvAFelidWycVC6NXy8hkcvT3j1CslTh77hLD+/y89OIzFPN51DotMrWSWDSF\nwy7HbGggFxsUskkKuRC7mzuEY7uUS0k++sAAFpuaTLLC41/6Cwb69nLPvXeRSYZYXV0mn42j14us\nr83z3M9Pc9fdd7MV20alMnHt+jTZvMj69gqpfIme4U4OHtrPxKET7N07SWfQxDe//r/RyFbZWFjm\n9Et/x/PPvsGekT2YtVqCdhGxKUNrNNCSSXQHfUiSQF+/lcF+Gx1OLXZzi96gQK0ZY2UrwakTdzO2\nvxeosrISoVwVSeUEfvLUNNGIjGO3dxNNJnj2pTfINcuMHOjisc88wonbb2dw4DCSUkEyBSJqHE4r\noUiUrUiCBx95hCoiorKM1eFFqdbR5gmiMsoJ9BsJdjkoilVUkgmz3oWyJuFxm0kXStz3kVPUC3mU\n6TQ1g4jWJWNrboNaK4nBLFCrlxDLJZxaJYsXL1Lb2aSWSvKhBz/O8Yfu4+P/4jEQ82TjOxRiMTbX\ntrHb3OhUcjLZJCvT72GyBXA5vTz9ox/wyG89SrFWwuFwsLC0Q3fvOJlCnhdefoXt5S1i6TzJzV3c\n/b1ENsMcmLyNBx56kEeO38PnP/U5ypKS8anDuHr72NzcZmdrk3/87rdJ7MSoCTC9vI1M7eTUqbv4\nb1/6r+TyeUKrWywvL6PXaJk4dRdSJc+tS+9gNunYDIcZGJ5kJ5pEi5zNpTkmj9+Gsb8Hn9dKs1Fh\ndz3M1KFD5GIRzp55ne2FFd46/Qq+djdCMoZRJefGxfPk4klsPjvLyzNYTSoKiRA/e/r7hHaWiUR3\niKdTpLIJZFqBC2++RYfHxYF9o+RyuyjUZXRKEYPdi9FmIl+M0Go1iMcyNJsK1mZufEAR/oAwZ0EQ\ngsCL/5378FkgD1wF/liSpIwgCH8LXJQk6Ye/mvdd4GVJkp7+H+3f3W6W/v3H+sgWsgwN70Fv9GAy\n61ldWoamhMNp591z51Ep1BgMBlpSjf49AdRKC9cuT7O7vcbevXvJ51JUmwIyQY7NYiYejRHo7qVY\niGGxGtley5BKr2A1WzCZrSws7tCs1VEq9AyNutAqnaxvbJIsxRkd3EsqmUFo1dFoVNCSSCbTJBMF\nWgoZGq2eUrlFKicikwlk0zm+8uV/xfe//QSjo3s4edeD5AoFQjsLVMtFUpkCe0dvw2TTsrY5x9vv\nXOPVNyI0FQ2qJdBoWzTqoNYoKZcbaBSgM6lRtWqoNJDPKnnouA+LRU4mVebstTAWixyfV89DjzxE\nvVagXi1RrghEw7t88ff/Dbl0jrnFOXZDa4iiyIX3FhEEiXg8yTf+6kvk6iVmZm+htbQx2DmA1e0m\nmUhhM+u4eO41zp+/RldngFwmSksu57ap2xE1VZq1Kv/h3/2Yzz92G0ePHaSl1BBJhFAqjWjVGpQ6\nG4VcHotJi0oFiXgag1bDgduPkU5nadVr1BpqdHYHSzPTWPQyMqks/mAHcoWBWCKKWC5RzOYo1ot4\nvV72jo5Qr1SZnlnm4rVrBLu6Gez2kk1GUShleN0eGhJcm77J3NUrfOQzn8fj8NCo1cmnk+TyKd58\n9TW++Mf/np//+Gn2799Dm0WLe89x3n3+pzi8TvwdnSSjETQ6FxqnBYUISlmLSjVLqVTEYjQhtgRy\nuQwrq8t4PJ1oNQZWFtY4ccedxHM5rG12NBoNyXSK9OYCt6Znue/+h9jZXEPvsrJyY4XgvhEEOcQW\n15m4/17EfBaxVkSUgVAXWVqdY+yuO/iLP/wTHv7Q/RSVKmwOKxaLhUbz/XtAnX4Pm6sr+IMBtrc2\ncThsqDU6lFoz2xs7zN6cJtjtxt5mR6PVo1BbMARPfiCY8/9fnMI/AN3AGBABvvL/dQNBEH5XEISr\ngiBcTecrVBoqDh25g/mlLULbIW5cvU4g2IHWbEauVfOpTz3G1maY69fmyObKJOMl5lZWsLlsHD56\ngMvvXqFVVdFmsTFzc4aN9WU6e3zIhSbTNxdJJytkimmmbrufQl1Gvtri0c9+jK7ePgb6R9CbbVTF\nCgcnxnjongfp7e3G027n0G1TdA904+v04PXbGRnr5tDUbRya2Mv4aBd2sxyhVae3z8aXv/IUS1sF\nfvqLi/zZn3+Dn/38F6yvbXP46L34fD3oLRIXzl3gnVevcnJyHIVUQyF/Pw2rUbx/LbdZbaBVgMms\np1iqISFHrbJjc0q8+84Gu7s5zC4LQyNtWGxa7rv3AaK7W+yGtnC1DdJsqTGZfEQzeaaX1nG4daTS\nCYwmLZeuJEhmW9jsTjY31zG0+/jUl/4L+48ewmyzIigEsqUUv3jxZ2j0AhqtDEFQ05CJVGplWqoa\nQkNOLlNEJ8nRyGRUyhJSq4lR70AhNsmldnj7lWcwKnUoFQqkBpgNehRik3yySj4rkkpX2drcJJ2I\nM3XkKFK1wp5uP4V4iEwihqzVxKRW8t6ZM2irYDQaSWXzXJqep7dziIce/hhiJo0gUzA4dpBYpsLM\nrQWsBgt2q4MDIyM0c2Vef/lVTO42jDYL8qYCuUKHRqnjkcc+haBWUlDouH9ykkIxSYerHUEQ2I3H\n2dyeY/PKm7SyIZ756Xe5fmuaZDxJqVQmG48T3tri5Inb8Qe7OXPuPGPHJ9lM7+LpamP22gVim0tM\nX3ibWgWOHTuBTAkmg55MOs/BySE2VubxtjkwGOD1Z54jur3NO2+e5dqZC6yvbDKyf4KVi7M89jtf\nwOTVIrQKyAUFxVKdldU5zBaB1956DafPw+LqGlqdnnK5TDIRIxTaIpWPceD4OO0eD8VMDo1MRmzt\nf7Kl8P/G+1WQEUmS/uJXvFeBxyVJuvA/2r+r3Sx9/0ufx2Y3I9Hg9DMv0O51EU+EOH7kbs688zoy\nRCrlJmq1GoVahtcXIJuuoFKokYQkgtTCZDDS2zPA3PwyhXKJweERmhUZPr8DlVJLoZhlZ2OX6zcu\n4W/3YG9zY7Yb2Vib5+SxB5ArbZx55yX87UaMOjNqtZrN7S1cLhcr62vIRIGV5S0c3i6KhS00cjUK\nrYm33r5MIi6iNWrJ5yso5SBTgtmixu00UKs16OnxoTcaaOTK9AaD2H1OLl5f5OnnLqPVKYil6qSL\nEgpAqQSdXka10ULdkqNUi5SzAr/1UICjU0d49vnTqM0OFIoGRw4dRGoJlCp1tGYzerWFoT0BPCNH\niW9t8p2v/RUqpcT0rSWWtyTUmjo6Nfzu7z5CJlPA5XJhNjnoH9lHaGedubkZenvaWVu8Sngrg0av\no9pokM2lOH7sLtRqNdH4Fj/99jm++MWPYQtqaTW1KDRq4rEYFp2KhYUFxvYfQ6mVIaCgWsshoabD\n10YTAZfHTWwnxHp8i75AH5fOvc2BsTGu35whGBxClEHA5WTx1iJd/YO88PLz9PUO4OvrIxeJILcZ\n6e3rZmt1E5PNSluHn2tvnQGZjODgCGIxQ75cRYZE0OulWsuzuTxL39AIZ8+8x4GpAzSaIogiWr2d\nWjGLILVoyUoks3nMmjau37rE4akJitkMdbmGYjKHxWEnnUvS3x2kWCwj1/lQKdU88f3v8nv/8vcJ\nhUJYLG5MdivFaoXo2i5dPUFytTRnf/kag6P7kUlhAv5Rzp65wO0nxtiNpOnbN0KzXCUZiuL2B3nx\nly/Q0zfMQEcnX/nyv2V0+BCj+w+RyOWw2rXotCpkgoLt7R38HUEMKhmh3W2a9Qa0Wjz/4os89plP\n8/Y7F2hztjMysofnT/8Tn3v8lx/IUvi12Yf/JxIEwSNJUuRXw4eA2V+9Pw/8WBCErwLtQC9w+df+\nhFzG3Mx7VKolJElk8vBR5HI5sViM1Y0L9PUGsDr9lEol0uk0DoeLSCRGuZhj7PhBIjEdYk1GtVIg\nlSywuRHisX/xabZCO9yaf4+B4UcQG02uXbrIqVOn2A5v4PS1421rJ59O0O3t5oXnn8IXGECl0mFv\n70Is5VhYXaZQbiEoyiwtbuDp8KM2Ociky6gUNuQqHbnkNmqZyH33DgAy6tUi12fCiLIWuWwNk05L\nOFRha2MJh1ONRlUlmtzhD/b/IdXMGcZHnSyvpbHrJWoNUOtUVCoNcoUWSgVorWqkpkhnZ41IPEQk\nvsFmuIgiUaKUl3jgwYc4f+Fdevp76B6YQKJOVYQf/c3f4na3sbW1hFFnwGyQoZFXUcpVlPJ1csks\nJqOGaHiTbKmEr6cDwaSmv8tLOBwnm62ityoRmyImo4Ht3TBqtRaF0EJoyhBaBVAVySRbGM0WqtUa\nOkvb/0ndewdbkl/3fZ9Ot7tvzu+Gl/O8mTdhdyfszg6wu8BisQBBAgRJMIi0JdOkKLpki5arRNMm\nIVG0ZJdoizSDQYUyBcEgABIgMnaxGbs7GybthDfvTXg53Ptujp27/cfQLrvKJeIPygWc//pWV1fX\nrT7fc37nfM/3MDTrdDsmnjBEcDVE0SUU0hgaFj0B7rx3g+TqPeaOL1FKjlIYm2e8sMr+1joPH18i\nMzaH69j0Gw3iuRj1fp0f+/FnqdQb5NI6iZFl3vneKxQ0BbPVZmZmio0blxk0duh1DXRFJZkLk4iE\ncQMI5afg8D4TsxPcvn6F5sZtjGOTZBeW0EMpbj73Fabm5tjYb9FvW1hBk1DW4Pj8cURBB9VH6dsc\nX36Im3dXSEUTDNp9qq02py48xuH2Lj//yR/h3SvfI51Os7NRp3nT4t6t+zz1iQ/T7VeJRFWOLM3R\nbTU5ffIYu5VD8sU8nhcF3eO7f/EcqXyWdLHE7t1Ndu5tcWTuCLutGr/yj/4ZbmeAI1p4tS5hOU9r\nt8btjet85KM/ye72Pd65eYni2CwL89O8fekGP/8rv4IsCTz11JM06010XWVkdOr79++/LlMQBOHz\nwBNAFqgCv/VX1yeBANgEfvn/AglBEH4D+DuAC/xXQRB8+697iVImFPzS0yMUi0VmZ2ep1AcIAQx6\nHY4tHcU0fCzfJZ/PEwQB42MTXL9+HcvpIkoqzXqNxblJ3rt6jcW5edrtLnokSm/YIyQriLJEo9Fg\ndmKK5174LufPf5CrV25w5FiZRCxKJJzEGPQZnyhxsF/j6vX36DTqxCNRGvUKyVSO/tCiPDFOdb/K\nwsw0O7ubNGotnn76PEPD4LP//otkMined/5x1jbuE02NEAQBr718ha7pUm8N6Xfgg09Os3lvk4ju\n8+SF87z4ykV2az59ScHoeMQ0EVkOUesMESQBCQnP8ZmbUOh1LbJ5lZ2KRVSFn/zUU5TLReqtIWfP\nPc63vvNtZmeOUi5lWbn+DtPTk9xducN3XnyVoRlgWRZj5SzNVp1cNsX4aJ7jx5f50DM/zgsvvcjE\nSIlUMsTFS29zeNgGRCLhEI7bZ/tgj2c/8EkcweL2rTu8+NU3+M/+3k9jBw6l/ARhVeKwb6ILNvfu\n7rGwPINlO8SiOv1hD0dSOLJ0gvsbWzx64iFef+MttJDC0tEFItksjWaDja11GjvbzB9d5OJrbzA7\nPcfEzATpiMatG9cezGBkCuxtbXD82ALv3dlktFhCVGQ826RT75ApTdDqHTI2NkemNMrh9gbddpVE\nIk0mleTG1YsUshla9QGvvneFZ95/nma9RbVa47GnLjDoiCRzJRTVpFmpMOgNeWdlE6d+wM/+6t/m\nysU3WZye5WBnG02PI6oRDFdhdmGa7337m5THxhl6IsV0jmpjm/L0BNXtA3L5AmHBZWjZ9C2Lbr9H\nuTSDIPU53N+jVCoRTRW4ce0qEUUiGo2iRpNcvb1KRBoyOb6EFch8+5tf4cQjp4gmi2TyOTLJNEar\ny9b6Lfp2n5CcwBh0KI2VuHXzNkLgoUV0FufmGT33n/7NZApBEPzM/8fP/+Y/cP/vAL/z1z33/2ma\nFmX++AdIxFPUOl12d9eYmZ5AQKfV7rOytsLk1BwpN3jASrt2hZsrlznYafNLv/gpElqMlbv3+fAn\nforP/OEfcXRhFkcIiMWSVA52sWyb8dFxvvP8d3ns8fO4vs3j7z9Do97nt//pZ5iaGyEV0xA8n1Qy\ngxqTmJyeoNVoY5sekbBGIpEgkU5gmD43795nbKyIGs7w+T/7ImPjBc6efZS9gwOu3b3P6FiZdDKG\n7/vMTue4vrJFNikRFjysfp2l+QwDR+C9ldtEoxFmYiq393s4uoArunR6QwQJEtEY9sAikYbDigcy\ntLsSpg2pkMitq1e58e4a05Nlvnjz9/lvfvOf0e13CAKXvXWF9Xt3+PMvvchgGCDpHoEis7HVpteU\n2VhvceNGi05PpDg5QWm8jCMr1I0m3f4BQkjHMFpoQZpYXGNGO0NI19AFjedeeJXRqWlGymME2Iii\nzthTH2UxfIpm9Qr7tT/C9n18REzLIazHafYMIm5AWhAxLYtyKcfqvT0GWhxzr4KnhlicmONzzz1P\nTBSZHBlnaf4I165cZHxujrGxSXq9Hq1eh/HJCaqVJicXj1KtVcnl89y/dxddDSG6A8JKQFj2+O6X\nPsvpc+fo1mrocoKa02F07ijdVptEIckvX/gQ2zeu4UoDlh85y8vffIORmQKCZGB7MJZN0LUGzOaS\nRGYnkGyJfCRLrdNm/+CQy+98hac+/CzFuZOsvfs24XiU3tBlenQUKxQg+gG+6+GYFtduXOMLn/kj\nfvJnf5qFxeOM5gtcv/UWF578EWp7La5eXmVsXiEWjlKv15g+coytjV2sXo9wXEUOR2hV94lEwoQF\nmalCls997t/zoQ99kGQyzezcDC++9hIf/ZFnsHs13n73NXbur3Hq5HESyTThkPJ9++MPBKPxf/4X\n/8Onf/XnP4KqKriuy/zcGJ1Ok6vvXcI2LcanSoyNT+F5HrIscbC/y+OPP0alske700AJSZRHp2nW\nmzzxxJNUDmpEY2nmZmYwDJtTp05QqdY4fuIYI4UStuXgew5j42PMzJYpl2Lk81ls1+dDz3wECZtk\nPInrWpw79ziVgxpbWxvousax40tksqM0mm3ubdxncXEOx/UZmgMq1S75/AjhVIyDvQMCJ2ByZpL5\nmTGWjswyO5MmpAZs7ezR7giYvkenN8TxfRzTpl53CUVkTNMHRCzbZHRUx/RdOpaIr3g0uw6yCJl8\nnN2DDs1Ol5XVPfbqHUaLEp5jcu/uGlcuXWHl1ibmUELVBWzLAyTi8RhSyGJqQkYOfNZu1Lh66QZT\n0zlCUsD6vbtokRRBICIEMqZhMDRFEuk8mqZy584dBl2TYj5Leaz4YNdlNIY5tNBVCdlps7W5Sq9r\nsLO9x0h+BMcHAgFRDrj63hV63Q7rW1ucO3+ajKahZ2IM9ncZ1Js89bGPsLOzy+j4BLV6nYWjc7QP\nt3j+G39JUpe5d3eVdqPK2TMPsb76HhHZx+s0ubO6SiYe4+7Na8iKQjiexncDDisHTC6M8+ZbbxGP\nRQmHdZqNCoVckt17d9mt7GE5Fs1mg7HRApoeIqRo5NJFQvEUdmvI/fUVJkanqB5UGJ8ax3NdFpeO\nkkomyGfTZPMJ8qNlSsUy8aiCJ5i8+b0XWV4+jh5SiSXiiGLAj/zMz+IFIpFUjkCSUEQPWdbZ3rjD\n1PQkiWQW2xPIZTM0Gw1EWaA8Nc1oaRRdl5F9j7AeZeHIMaqtPk8++yOEtSjdQZNbN24yPjZKdXML\ny2oRzcUYSSVxHJsTJ49zcHDA7/+77/zw0Jz/+H/9Xz69UJTIZtOsra4yUswwUhwlmcjTGzSIRCLs\n7FRYX79PgM/YeJn+wGRg9ZmZnkJVH7QgQ4rMlavvoaoRGq0WtmdRGMlgWA6aHuONN17joF4hJOn4\nQYBp9Xj30luIoo8sqxw7uozreNimQb8/IBzViUSiCKJIsTzCsDfAMm3u3LvL3btrlMsF4qkEfiAw\nO7+Aa4HlGZiuSTqRRJQkJiZniISThEIa6XyKaCKM6QzZ3DTY3BsQCAK+5zKwAgYWSHKA54HjBAiB\ngicEdNoCsxMJhv0h8UiUXBaazSGBLxONK4RjCtGIQqmUZnpuFsFXWVu9RbXS5bDaI/AdYhGFdFJC\nFQIU38e1Nbp9i1gixK9/+r8mEk3zyNmHuX71XWRZpd3uEYsm8AMgkIjEEkQjOulUkpF8CT0cJl/M\nEVI1PA/swYBh+4Dm4T6HhxUc26bb6zA69mBeJBRSSWfyzM4tMjpWZmdzk1QmRTyTZdBqs3P3Dv1m\nnWQ8zltvXmRqbIxELIJjDanu7fHImYfpdrro4TCe6yJLEqosc2d1DUVW0TWNXDZFJKywubnOydNn\n8JwATwiAgEg0RiIep1AsoaWibNy+jdXvoSgJzpx/FNd3mCiWMX2FRL7MwfYaoaiAKAnooTDXrt5m\nbHySamWDSrXFrdurZNNJ9qsVwuEYl9+9hGmYBL5Np9PEdR3W769TLhbY2d2kUC6hKCFGCkUi8TSy\nEsIY9CiOTWIZfW5cv0aAQ7lYAt+h3+kSi0ZwLINWq40S8tFCEroex7I9UoUCg1YbKQTOYEB5tEhI\n0ciUMgSOgSDC6s3bTE5NE4klWd3Y5n//0it/MzTn/z8sABxHIZEocPnK58jkR4gnAiQtii/m0CMT\nnDg1z1f+8i+YmV8mGo3Sbfc4dSxLd9BjdGKSL3/5y3z0ox9hZ/8NHjs/ycCxOHb8PG9873WOHJvi\n1qWrIKuEozk29ncZKxexkTnx0OPsbVd4+KGzZNMRVtduIeoypcIUiqzzyqsvoesqh4eHjJUm6Zge\nsXSSn//A00SjSSxrQDgc5hvf+AYPP3qWRr2K5zvsHexTq9UwDYveoE+tUeMjH/4I/SYcm1kkqe9z\n9foe97Z7NNvgCiEM18bpBAQ+JOIqvmuheDBeCNFuthEEAXM4ZGq8hEwDc2jQrbukUnFsR+DNV9/m\n5W+9jCYpGAIMDIjEdbzApda0UYcwGD74zxUxQFQhloSZsRFkTeON198hmiriOAaJdAIBhRBR4uEk\nuUIeXdZwXY98XmdkJI+AwnDgACayLDPotQmFQpRzeQgkRksTBMgM+l16fQvH9bnwvvO89trLLB+f\np7Gzwc133qF6sE6+mOTsiRP87u/8Y86dPUu7ukaj3aF62OXE6YfZ3a8Tj+fwe22MocWgNyQcDpMt\nlDEDgdHREp7n0W4NiYd0Nt99g8rOAdmxNNF4Bk9PM7p8lOrqOpXdLVq1LsW5BZLRCJWdLTRR5Nf/\n21/j7/4Xv4rgyLTqTbK5PN6gz/31FU48PIsoNlldvcvkVJELj57k3u01zOEQVdE4snicXGmUt958\njUGnySsvv8Cv//bv8vxz32RufIxkbgwsmzdf/w6SkmLp2Cmm5hboHe4Q00Q+8OTj6PE4u7fvs7O7\nztTcBO++9RwH1UMeOX0WOTLKO9euoqgyS0tL3HvzHSxPRlEUjj6yxkG8AAAgAElEQVTyEOt377G/\ns8vM7CxG2yYUDzOSy7O3vUU2k+Ls8dnv2x9/IDKFz/zh73367/zUB/F8F2NoMDk1jSgq3L1zn6Wl\nIyCI3Lp5iwuPv48rl68gCCK27ZAvFOgP2vi+xMTEJJub6+h6mKNHlxEEhV5nwP7uJrX6HrF4jNur\ntymOz1LM5dk/2GdhYZmQqqJrYVZWb6OqIe7cuUO+PIZjizRbbSrVNj4SkUiM0fI48XgKSRHY2zug\n2WwiCFBvNtB0jdn5JRzHYn19EzkUIp6MMjKSY3enyvz8PC+/+iqyItFqNxibGmPzfpX9XQNRCGM7\nAo7rIosioVBARA+RTIrYPQ/PF9CzGrqsoGoeiujTrAzIZaJYjo3t2rS6Ju9//xSFVIl0NElhqsjq\n3X1838VzHrQ5dU3HNl3iUQ1J8wGfcinDsRPzmI5C/fCQbq9Nb9AlHA5jmDYEENYiIAjoIR0CEVGU\nsGwTURJxnQBflBABTxDwfQFZEvCRkGQZQZQYDAcMDItoNESzcUC5OEJI8DB7bTxcHjn7CLbp0Kr0\n+NGf+HEOK1WarSanzz6OFlI4ODgAx6Zl9LAck++9/j2effYZKgdVLMvC9Vy+8+2vk0lnKZTHqFYr\ntCoPNm+1BlWCICCWGcfsDpBDCTLpDAo+omBQr3aRdQ1JligXxkimMtxcvcejZx/j9VdeIJGME08k\n8DwLc2gSjiXY3dsnmxthZ2eHQqHEzv4Ot26tMVIskh/JcWRpCUHwCASFx88/xt7mHVzbpT/oszA9\nytrKHVKpDC+98jwTxQJ3bl7HNUxkSeK7332ZY0cX2dncYGZ6gonxCfZrNZqHFY4sznHxjTeIahHW\n793n1NlzGIMBt+6sEsZHVXQqBxtsbK6zfOoY1y+/hWubhMMqhjHkX/7pCz88wq2TpVTw7/75r2JZ\nFiFFo9nuo4VkBr0+0WiUqakp9g52+fznP8+Pf/LjOI7De1ev8amf/gnu3d1AFB8gpmPZnHvsDF/7\nxjfQVZWJ8RJ/+m//FeFIhOnZBU6dPEfXNBH8gH6vQzqZIJPJUC6XWbl9g1g4whtvvEEsGcNxAiQp\nIJ0qkk5nUUSBen2XQqHAwBiSzeSRJIWQGvD1r3+Dp576INt7VaJqiMN6FU0LIQowMTpGqVjm4sWL\nFItFkqkYW1tbrNy4xrVLa9TaXVpdBycQ8SSBXsdDCCCZiqDgoskCtuNhugGOGTA5qdNt9Immwhxs\nD4kkIaTIuL7Ax55aYqyQpdne480b+xxUujTboEohDMNGUUCRBSQpRCatIEh9SqMjPP3RTzEc9jHM\nIaLoY1kemhbCNAboWoRkMkM8nMS2BPADPM9BCUmIkoLvA/iYjo2maQiCAB4EYoCqRTGtPrKiYhgG\n5tAhlUogCh5HplMMOk0808USRERRpLJXYWGuyM7BPpXNPVx8kjGdnd0aH/2xj7Ozu0+xkEeLxjjY\n3SOaimEODWr7h0xNTZPM5Hj37bc5dfoYO5t3abYb9BsGE2PjXLtxnfL4BLfv73DmwvtIp/Kk01la\n/T7RRJjLb79FRJIpjo7gGEOatQ6JTIaN3Q3e9/hjfPHPvsT41DyV/V3OnDmN50Eml6NTraIk4tRq\nVY6dPMv/8YV/zQcvPM3+5jae7OP7LoHtEwrLrNy8TyKuoSoaKysrPPvs02xt7TAzOYXlOmxtbTFS\nLFCp7JLLZmkd1slm8wwDkXwqgSYLRFIpGu0W9YMaqUyane19lo8dodNqkEgk6BmHNGqHuE7AkcVF\n3rt6DVkK4QseT/zdz/7wCLf+8R/83qc/eHaRdDpNrX7I5sZNNjfvcHvtGhOzMxiOSb3R5rFHz7F/\nsEdxZISpqSnanQHJRJwbN97DcSwy2TSNZo/KQR1RkvG8gGQqw+TMNMdPHCcRT3LlyjXS6TQzszM0\nmw1UTaPb6+IHHtt7u5RGx7l/dwNZkZmZmeP06VPE42F832R1bY2Z2VlKxXGazTZf/cuvMzE5RiKe\nJhKJs7OzS7XRYmnpGL1On2w2T39o0u0PUfUo7e6ASvUA13E4fe4hjh1fJJXRSaRl7t1t02l6xMIK\nmgqC4NHsOdQ6Pn3TY3o6Qq9l0h/aaLEMPaOLKMscHoIghRkYHpmMyJHFGUanlnjttZu0mxbhkIIe\ngnjERw2B5wqEIzpDT+Ef/MNPc3T5NNXaFq5vooZCdDp9REHCc30yqRE0LYEkRCDwCQIXRJdAAFGU\ncT0bWRHwAx9ZEhECAde0keUARRFwXQdZEQn8AAEZJaTR6nVxBRGra1KpthBDEp7TYiQbRZPDbG7e\nQ5UkoqkciUSWymGL8ZkFRqenSabSPPftb5PJ5TFtGx+LZusQJXC4du0S5rDL7u4Bvc6QxYWjDHo2\n+3sbeJLCyeWj7G7v8PGPf4LvfuvLzI5lsfoVAsfm1soKyWSS8ZkZvva1v+S17z7Pp37hP+GrX/4y\nTzz+BCt33yEm6ZSKZc4++QF2dva5vbVFSICRsVG+9MWvcPntF7C6Dd7/oZ/g1ZdeYePeBkvLi3ie\nj4BLoThCKqJTa7UpjBZ56OGHGB8tY5sDMsVRUpkM1d09itkiiVSKcrmMLEFuJI0mi2D3ETwby3DY\nXt9kZCRDSJaQJQHL6HHp2ru8+vLLPHTsKWxTJZbIk0yGsTybeqtPpjjJn3zx+9No/MHIFIrp4G99\nYJb3ve99ZDIZBj0PxzbJ5VNcuvIumdwIt29c5+zZM7i+h+3Z1GoPCpBz07Nsrq/T67fodttkMhkQ\nFFqtFgI+D588SyB6VA8qDAcWR5aXcG0HyzS5s3aXer1JPB5nfHKMSCRMEEDgwdAcUC6X2dy4h2EM\nEHmwiXlx4Rirq2tMTU2i6zpDo0sinqLRaOB6NoO+hWk7zM/P8yd/8ic8/YEPkMlkSKVSVOuHmIMH\nZ+FWq8EjDz/Mb/3j3+T22gGdnk8gQDwi4jg+jheiZ7r4yMiBTLs/pJhPYvk29fqQh5YyDHtDpkez\nDAdVfN9lcWaCDzxxASSfb774AvVKl+FwiO+DJAn0BiJaRKZvWPyT3/kNDmqHOI6FZ7i4nocohtB1\nHSEAwxiQjKXx/YBYOI7rWUiShCQL+L4PgYjneYiihCRJOJ6LGtIJggBJFjDNAeFoAj/wEH2JQFCw\nXQfLsXE8D1WSURSVkCKQiRpYQ4Neb8jU1Dh7G1scDg181yObz5CMp4gnEwx6fVKRCBu7BzhuwJnz\n53AMky9+9k85c+4hGu0WI8US7XYbCYnjj5ykvbfJzn4dMXAe7FNwPQr5JLdvrVAojhCJpNCTaXxJ\nIZUr0ul3UZWArdsbHP/EJ7j+/KsMa/u0ej1GCuMM2l3iuSxTR+aJp3TeeeF1jj32KIF5wNf+7HN8\n7Gf/IdF4mM271zisNSiM5Hj7zVd59Nz7cG0HPxAZnShjmiZf/fOv8+QT50hkspi2S7vR5L1rV5ie\nnULXdVRJZHX1NlPTE2TTGd544w0ef+w8W1tbHD2xTKvegUCm2aoTjccol8Z4+ZWvc/ToEVrtLkdP\nvp/NzQ1yhTg793Y58mO/+cOTKfzh7//up//zT32UkXyBTruL6Q54/oXvUCyUKBWyqKpIKCSQyWZ5\n5613UGSdE8unuLFyCdd+8DFXKnVcLyCiJwmFIhw9cpx0Ks97199DDSUpl8dYWJxjdWWdfr9Ju9vm\n1EOnicV0RFFkfHwMwzQpFgtIkko4rJHJJuk0bVKpDLblPFg4g0tIUfFdj0hEw/clLl26RG84pFGr\nYzoOihLCMIecP/8YrgduEBAIIpGwzurdVRaX5gmHdbZ2dvn8F76DYQXEEyH6wwBFFAnEEIcNi5Am\nko4GxMMyuuJgdE1U0WGsUOT+dp3mwKFS7dLoetRb8PBD0yRTGS6+fY3jJ5cYKeQZG5vBFxSS2Syd\n4YDjJ0/xc3/rZ6hW99FVGRDwXJFwOIrneXRaXRzTRpYFYjGdaCSKIAgEnoMiKfiBgO8H+L5PEAQI\nAgiCiKpp2LaDJIiIkoymaYhI4IMv+ODbRDQdTdMISRIID6ZjXQ+i0TCO46HpEUTRZXxyCgnIpuJs\nbO8gyx6Vg11iEY3s2BjrW9scWZpn9cZb7Gyv88jDp1m9f5diqYhp2cSjSfrGEGNgI3ki02ce4vIb\nb2AMuwSBSbc7ZGpiHsOwSOSKmIMB8bBKq1sjFo8Si6bIFIv84W//U/LxOKcefgjLNJmbmSQ7Vsa0\nB5iDDna9Tb/XJxZLc1ip8uKLrzIxs8TAdJiYPUpjb5dCfpRYIkejto9lQW/Y5+DgAFmQWJiZZf/w\nkEQiiuP6JDM5knGZpeUTbG3vMjk5h2naHH/4HP1en4P9feaWT7B1cEjgudiuz+jUJL4Q4PkWX/nq\nl3n6g8/i+xLrW7tk03Fqh3vcv3uL3fV1vvDS2g9PS/Jf/W9/8Omnzy3RH3RxXAuBgHNnz+A5NkpI\nJZnM4LkBiUSUWFzHMDtEogqxSIZut8fU9CT9QZderwu4bGzdIZdLE4tFH4h1qhIBLpcvX+KJJz6I\nYfWo1+vEIimarQaSqBCJRMjlc+zv75NIpjGMPi+99CJnz53DMHrEkwk2tjaoN5pMTU4gyRL1Zo14\nKsleZR9ZkXj09Bl8D1RFwzINUok4oiDQanWYnprkzt07hLUokqhSqeyzcuM2V6/cR1VDWAMbURUQ\nEXEcgVRUZDynUcpqJCIihuWgKCJBAJ1Wj1RUQwdkfGKqSEwPSGZCfPyTH+fJpz9ANn2Uw8MGTz/z\nDOfOXGB2ZpZiKYEeVhAF8P2A3mCIMTRRQyrdboduo41tDrDsLqlkHE2NIIkCw+EAVdNwPR8pBOAT\nkh4UEUOhEJIiI6shNFUhFJJRZJmQIiNLIqIgPZgr8DxEWUTAJ8BHFlxCmoIXeAwMC1nW0CMx0tkk\na3fukcrkiEQT2A5UDqqMTs6RSI1wf6dCt2+RSCaRRYWwHmHoOSSTRaZnjmG7Pg4my2dP4/kereoh\n63dX8R2PhSPHyBUmMYYDev19onGdysEBvgu7O3tEEwksw0aUVe5ubHNq+RjZRIrOwMBxAxrNDtVa\nE1lSEEUFx/V4+9LbiILBjWvXKORGePSxR/Adk9rhIY45BN/DGPRZWJih0+6QLxTI5gooisrt1VXm\nl+dwLRMRGTdwqVX2Wbm5wvbmFrlcnkw+T+WwRTQW4+Kbb6GrOiePn8J3TGzLotdp0W426A+HfPRj\nP4rR79Nut/FdD4QBkYiM2XdJpZP8yZff+eEBhX/xP/7Op3/hJ59iZnqabDaN5ZosLC7iOC62bRJS\nJeKJLLbjksmMsL1VpdMx2NvfJZ/PI0kilmUTjUbRtDinH3mMqak5Dms1QopCEAhUK3WefPJDvP76\n67z19qssLh4hEk4wOztFrV4jlx/h1q3bjI1P0er0KJcLnDr9CPVGg1wxiRSS8V0BPRzDcmyqtTqm\n4ZAvlLl86SqiqBAEIo7rPIhWyRi2a5LNp3BtA0kMyOSSJJMporEwuVSaTDrDtffexDV9AiHAtQJc\nz6dvBviORCQcIsDFNBw0JYprmaQSIoEIouiTSOn0BjZeEOAGEueffIrlU2eQ0OkM14mnYrx58Rqu\nP+B7r79MMh0mGtUxDQPH9Qh4MPbdadbpNht0u22M4SHxmEKxNInkq4QUhVhUQwvr9AdtRBEkARRZ\nQQnpgEBEj2F7IoSSaIkCvqARyAqCoiH4Lo7VQZJ8ev0Ojm0Q+DYIPiFZQpIEPE/Csmx8IeB7L78K\ngoCqqw+AO51keXkexx7S6jRRfZfjS7Os3ryKik+zcUg+XyCqC7h2l3ajTiKSpXnYxjD6GIMBkXQK\nHQFBFtitV3n+O6/wyNmn8FHxvADTdIgmUtiuRa9nE4+kCHyfgTmgZ/bpDQzubqxz6qGHQJKQpDDl\n8gSRZIy5hXkEAU6eepTFxWV+73/653z1y19if+8un/iJn+LenetEVJ/XX7nK+HSZ7zz/EqNjU9iW\nSyIR4/KlawwNCdeV2dk+4OyZ07z71psUcjlq1UNu3LhNv1dH0pMsnzyDNWjhWn0iiThB4CIHIbrt\nDpNTU7z+2utcvXyFcrlEKpnk5RdfJhZJY5k+ggT/+ivv/vDUFI7NTwSf+Sd/G0mSMPoD1EiMWCxC\nJKyzt71DMpHh5p3raKEEtmOSSsVRVQ3BlUkmo/i+z16lyki+SDKto8ohyuUSb737Jgc7uzx06gwH\nBxXSqTzhaISVlSuUiyU0Pc733niZZz/2DGsra6TSJXxPBt+kXB5ja3eHbKGIYQxoNKtMTz8YEjX7\nfXzXIRmPs7GzgjnsMzk+Qd92EYMQCA8cWiRg/7CKLmmIskChXMQNZEDE6A84rNzj137tHxO4oKky\ngeMSjQkEBEyWi1hWB8dXESSRfq+JZ0fwAwvH9pEVAQEXHFBDEATwi3//71EcLREEENU0tjZ3GZ2a\nw7JMVlauYgwqANiWi+2IOJ6HJEk0DnbxEXEMk6nJGY4dO47r+YTDYRzXxnEswuEoBBKiEMK0+mgR\njSAICIfDDIc2khwiO3USIZojWT6FSYe4rLKz9jbGzgqB06ZZr+H7PqqqEvigqioEAU4g4gcitusT\njYYpFoskkll2t/eYPzrHvZs3uHHrJvFkisUjyxRKI+zsbnJ3dY2xUpFAkrm/tkp/0OXJJ9/Pu1cu\nc2p5md1Gk4l0ln/72c8QF2SGtsXM0ZNMlucYmykhBR4DVYeOiRqJ0XEHlNM5Ov0evm/j9trsrG9y\n+/46eiRG4Hk8cuEc5dIY91ZWKY2X2KtWyMXy9DoN2u3mX3XLJqk1mw9o+wcVlpaO8talS5iDOlPT\nC9y5t87a2hpnHznFoNNnc/c+ihYwv3iWcDxFt7bF4uw0B7UWXVvE67dp1+u87/HzDGwbQZS49NYL\nXHjiMQLPp3rYJFsawzQdsokM1cM9FEXGcRyisSTdzpD9w20e+7l/+R9vSvJv2h6QlxwikQgbGxuM\nqRpbG5sE+BQLJd67eYOxsVEsM2DYtxjJjZDKxti8X6HVbZFOxpidLuN5Ht1Oj2Zjl3Z7D8EX+MCH\nfxTDMDgxOoZlGTRbNSZmZolEIgh+wLGjC8i+hxqKEIlEMAwLx3Kx7D7l0TyOa9PvNrnw+ONcefci\nnudRKhTYOthie3NIWI8jCzq721Vmjs5z7eoNHn749IPI225TLhTY3t6hUW8hCjITs5M4joOvSbz+\n+uvEwjK9rouquqgRmSAIkGWZw2obLRah1uii6zqZRJFhb0C7BZIq4DoBmq5heTaKGEJTJRLxHP2u\nz9zMJJ41ZKQwSkiRsW0b17VwHIt+f0gkHMOyDHq9AblcDi0cpVAo4Psi8zOzCKJEr90hU8jj9fsM\nugNi0RCe52CabZLpFEPTod0aIIlhLNvDtYckjBaaYLN/a490YoRDp013c4VOZYdAFtHCYTzHRdFC\nWKaN53kIwoM9HIHvIysBiWQcTVfpG00KYyme/9a3CUk2ne4hm1trPPbEE6zdW2NqYhSjP2Bhbpb3\n7qxx5vyj+JaDCCQjCTIjEyTSE9iBw8//wt9H9l1u3LpNvpyjUt1DrmnUDYvpbBE3sBFMC8O0qHtV\n4vE4ricQzYyiaWnOnnmC/doBputw8aVXmJicJZZOYvQHSJZLJKeSiI2ydOQY2zvreJ7HzOQUm9sV\nZE1nc3udyclxjH6M7EieVq/P7Ows0bAKcojT73sW0+jRb+9y99YaI6UM9XoLz/MYL+YxhjqKENBo\nNPjm898hl8uxtPQwd++12dvdYnd3k3Aiwvr9bRRBRJIkFhcXEWWJfD7PjRs3cD3j+/bHHwhQIAgY\nHx+n1+uxvLzMcNgnlY7iui7V6gOCiu0IIHjMzo2D71A7qGBYQwojWQLfw7IccrkchmES1kcJfJ/R\n0QJvv30R0zQ5ceIEnXaDXC5Ds2ujZ6KsrKwwVizheSobG5uMFEeRJAEhJPP22xd55plnQZSIRaI0\nDg+RUEjE49y8foNsNs1es46YU7l14xapZIJQWObRc6dZXb3D7OQErjVA00IsLy/TbLSxbZvqQYVo\nLIIgumxv7jAcuMRjIaJRBQmBdrtPJBJDkWT6Zo9SKU9YU9nf2yCfLhH4Eu1eF8P0aHY9ZAUIfAYD\nk+HQYOHIUbrdLjIBkUiUfDaJ5/SpVfdQ1YBIJIIYQL/bo1QeJZvOEYtEmZiYQJYfdB8UUcJxbe7f\nv09xpEQslqDb7ZJJJ7FMA8uykOUQgiQyMIaIoogsqtjDHoNWA3vgYOv3MBFJhMLsOwKaAIEvYdoG\ntuthGUMikQiaEkKSBBAhcD06zQblQonACbG9tc/pc4/iOD2OnjrF1auXabTqpDMpTNMEfOr1OmE1\nxPXr1xkrlygVCrz0yss0ej2mJ2YZXZwnXxxlu1rliY99kjt3b5KTJHoDkXJ+jJbVJ6crZCfHcQ8a\niF6fVqPN/Y17HFtaJpFMsbJ6m7V7dwmCgI9+9Mf46je/RePyFZ599mnC4Si3bl4nnowRCSfQYyrP\nPf9tIuE4Tz3zMWzb4oVvf414Mk0mFcewA+bnlrhy+V2KIxkGhkNI7yFJEtNjk7z83PeIxKI0u31U\nVaVRv838kQXOPH6Br37pz/npn/k5rl+/TjQVI+jBxMw05y+cw/FEjKGFZQywDJNkOoMbuFSrFS5c\nOM+ly/9BSZP/l/1AHB+Ozo8Hn/u9f4CuRRAEgdWbt+j1eiwsLNBoHjI+PoppeAyGbTRN4/69TWLx\nMLFwjFQyDn7AYDCg2+0ST6TQoyFkJcLe3gGnThyjXm+g6jqWPaTXb3F86WF2d3cZ9Dtksnnu3l8n\nHk8Si+j4vkMqkabdadLr9SiVyxiGQTQap9PpICLQajYZDAZIosxe9YB0Oo2uqpTKBSoHh8zNzXF4\nsIXnucwtLnLz1hr5fB5ZeuBIvuDztS99lue/9TqeHDyIlIGAHEh0uzbxuEw8rCKIYBoP+Be9ThtJ\nUGg2TeIJFd+z8YIQmwc+qmoykpP4jf/ut/D9CAvHF6jv7FNpdFkYT/DCy99GUXWSuTL1WpXDyh6Z\nRJyJ0Rk8X6Tda+P7PuVyEcOwkEMKnuMSUUMggu3ZFIplEBUiahjXd3BdF1mWEUURX5ToN5tYtovt\nueSyBfZrW4Q8FUd0CCsCgSPi8qAwCT7DXvOvjg8CBAGe72M7Fru1LjOLy8TTWVzbpjReQnYDbNfB\nEQQ8x8M3DW5cf5dzTzxBo1Ynn85QqVQIhzU6jQcpfNc0yU5OsXHpJolUlPWtdS6cf5R7d+4zPrNI\np12nfGyGL/zhv+GxRx8lUSzRqB0SUUQOq3Wm52a5ef0qIVkhGtLw8VAiEfrdProiIasykzMLrK2s\n0K/VGR0f46BW5+7GTWYmZ5gaXyTApNfvYroWCw+dobp+H1nRsR2PRCKGFhJ59+3rlMojWEaXG+/d\n4MLTHwTrQWH28LDC0omjfOMv/hxdkxEkkZHROXL5EuGIzEG1QkgRMXpdRkoj9HoDDg+7ZLJ5Dg/r\n5HIZUsk4uqbQrK8z9uR//x9Vju1v1DzXpdcdEA5HGQwMBAmmZ2dod3ocPXr0ATtQCjANh3gsw8LC\nETxXxCMgABqtJuWxUVKZNHJIQRJVbNNE13V6vT7D4QDPsTEHFp4l8uZb76CG9AdAMxgSi8XQNI2d\n/V0GgwGe51M/bDDoG+xsrxP4Nol4mL2dXW7deABY3d4QQdQo5PNoisTiwgKhUIhA4EFrr2eAqLK7\nd4BtDzH7HfzARg6pZNIFIrEEohbCMQNUUcPqBSiayvhUgUw2zsC0MIYmuhrGc0UCJBzXRA+H8DyP\nvu3hEDA6ESYWhZAMzUaPaFyg02/gEXDkoXmUSJR2u0ssopOMJxAEH1HyEJUwkhoiEtGZnhwjqocJ\nAoFsKk21UmN8ZpSRsTFy+QSRcBxbCOM5JoYxwHdEhMBHUESGjovrBcjhEPFYjpFMCXtQo5wrMlJI\noUbCiL6KrygkIyE0yUMWXPRIFFUPI6sh9HiEUFjFF0BHISRrxFJp1HCIZnXAne0NGsMBLhBJJdAz\nGcqTi1iGSyyRxHYdRFlCkjX05AhKOEYgKLg9i2wxQy6XoTRa5q2Ll0jEc+xWKuxWWnzzC3/Oj37i\n42ysr2HU91ElmZ4xZGZxlkHvgYDLsVNHSaUTFItFfM9htFAgn0sxksvhega5kSzhZJxbq7cJfJdT\nJ8+wub3HxtZ9PMcmlUpx8eI77K1tENXDNGqH6JrMwe4O3VabeEzl8LBCeWKK1EiR3fu76HoUJaQz\nM3uEyu4hiWya6YVZTNslk0rgugM8zyMTS5CMJKhWGphWQCY9wkNnHmVydp7yxASV2iGtTpvhcIjr\n/LWi6v+3/UBkCkuzo8E/+sVnSKUyTE5OUqvusrZ2jzOnz3H/3ipHjy1ycFBlbHSS3d1dMpkcyWSS\nnZ0NUok48ViEnd1dMpkMnhfQ7/cB0LUouq4zGHYfRHZJwvMcpibnaTQaBKJEYaSE4zhUqtt4nsfO\nzg7HTxwlm8ji+h6Bb7G+vo5pW8iSTqPeYrRcZv+giq5HyKQjuI6Dpmns7u88WFwzMcFwOKRWq9Fs\nN1g+Os/N69dYXFqmWusS+AKeb/Lm6xd57hsvIUoQDisoWohuZ0BYkxAAHJV4zEdRFAJHodHroWpg\nmT6xuIYcBqMXxTAa4MHHP/ljnHr4IQJJYbyY4/rde2QVkede/BrZTIxmvYocWAiuhSBEcAObABEt\nnUWUVAYdg8D1EWUNy4eJdAxXNlAiKgl5lsjIJmpoBFlIkshMgBSgDNpIkRByVKffEdA0jba5QzKU\nQJQiKKqMJ6oIzgDBk7EshyB4cIyx7QcgaQkepmPi+h6Hu00CRef4wxcIbBc97KBFC/iBTbfXY3P9\nkOXlZba371HKpxn0uyiqzLA/IEDGDUIkk0nqtR0UMUDXYmjJJI2Bx9j4GE67jhCYGEYbTBklLLK1\nvsPMzBx79R00LYwoilx66y2Ozc8RDusMBkPub2wxOTXD6Km9k+AAACAASURBVEieS9cvE9Z1MhGV\nW7duMbe4gNE3yKRziJpGPBrD9xz2d3YpFkeJpKJUKtvY/T4bO3uEFI2zp09zWDlAUjUarTaaGqbd\nbjM3O8n+foX4XwkPy7JIoZijVavQbLaZXzpGf2AgS9BpNBkZybG2tkYkEkNVFRxkXnjlVY4cXaJc\nHMV3bDLpOMNOg+kP//r3lSn8QIDCRCkb/MFv/hLRsIaiKKTTWTzPIZ4IU6tWSCaz7B3s0uv1mJ2Z\nZ29vD1mW8QWRQiZHZX+XU488TKVaRQhEAhwEIQAhwDQs8iM5GvUmjXqPpaUlrl5+m3gqSSaXpt1u\nEQ6HEUWFdCqLoujsV+6iSCGymRzW0EIQJPRIjEa7guM4bN/fQVEUUqkUK7ff48L7n+D69ZvMTk+Q\ny+W5efMmJ04cJyQrrK6tkE5kOTg4oDUYcPr0WZq1Ovlinncvfpc//v3PEI8nkUMSB3t1wmEZXYvh\nORa262MNTUKKjKbJjBTjDNt9BEUkEsTwVZ+dukSvs09MVfjF//KXCWkJZo8t8O4b1zh59hyVe2/x\n9uvfIpmMo8oZfLOJ0a6ghsJYvoOohSiN/5/UvWmwJOld3vvLPStr36tOnf2c3rdZuqdn00gWkrUA\nlkDAlYwxQmDg+prt2uz4YrYAEVi+EtcyyMZCQmiEAGGBJNAyo9k0mp7pZaan9z6nz177XpWVlfv9\ncJoIriOM5kbwQbwRFfnGm5lv1of8P/kuz/P8TxEzsjjTMUEw4Yuf+zL/+sd/FjVTwBl4ZDNdtjd6\nyM5NCoUHOP/En2G6Lmff81OM+lMisoVv9TGSERrtIf40Rij7GHGFYNxEDHyEWJJkvIwq7wucNMnD\ntMb4voMi79Om7YmFFE+SLs0TycwheMH+QqztkUwWEQSBm7fOk0qlSGWLCJKG63j4nsS1Vy+wuLiI\nGk2wW6tyYHGVnfo2uZkShXSeYWuPSCxKo1Ejl0ri+yGuB53tKtHKArJmkNN1xuM2vu+jRFSm0ymW\nOaE0U6bZqKGqMoIE5ghSmRzxVIRxv4cgyViWhSRJJDNlLHNMr9tk1N4ml00hhmAOJgxHHcqVGWq1\nGqVynmG3Qb68iiLvLwYHkkZ7YDEzM0MQBPu7ceYEKRpFFkUcs4cqhrh3tTDmaMjcXIVWu0E6HkES\nQi6//AqKpu9LsEMP17XRdZ0AgfyZH/rHs/tgGAblmTyqLFLM51jfWCcIBACiRppOp0G322U8HtPr\nd8hkU5w7d47l1YMkU1FsK0m1vke32+XA6iFu3rzJgQOHqFbrKIpKEEJ/2Kc4k0dSPOaXFrFtm2a9\nQzSWACFCrbpHsVDBMAzSqSy7Oy2i0YBkPAkEdAcdbt64TiIRQxBCAt8l8G0OHz3G+YsXyOUKaJrG\n1evXmZmdpdlscv3aFe655yRGJEFkYHLi9IPISkB/CHIoUd+rI0kStuNhmiMMTSSd0jFHFqlEEphi\nquCHIbqhsVkb4Q4tMuUUg16NqSjz1eseh+dUZCkgaqQozszhTQOKBQM/bDPp+UxaIkndYPG+gwz6\nTRqhSK5yBEFW8AOH5bmD/OLP/jqf+syHWdvepdr9Mj/z63/C/MECTmPMaekF8r6EkNV42fofLNz3\nVoqLB7l08Qmy+TzthsnnPvZXlA/NMHfgCH1PpzRbINJvksgkEQyBmCjhOQOsiUPICGIGsVia4cAk\nb9iIXp9a6wbR2Bk8y8QNIV9I444tovEUtU6NeDxKZe4QkiDSbnRQZJFWp83S8gFKuRSGKjI2B5QL\nRSw/YGnpGM6ow9qNV5jJHcYae0SjRQJP4MmvPsl9950mMb+MhI/sTxj7IoKm4Q7HJCIGrWaX+dk5\n6q1NdE1m2OugprLMLM/QqbdoXNtEkERmy3N0Gg1m52bodZrgucR0ldjcYaKxCD4BSnpKPFhi6oSs\nnj6OqqoEkT0C2cfxbQJVQzYyzERDVHV/itjv95EEEXfQIZ4pIMcS9No9PNsnVyyRzuTxA5d8pcKg\nY+J7U5ZPnUWL6FiWTVQF15miKyL9Vu3vC8H/T/mmWFMIwwDPnxKKAYPRAEXZR+krV64gqwrDscni\n4iJv/Ja3c+z4/eiGwdmHHmJhaRnL92mNTRZXD5AtFmh020STWaaOhx7VabfbjEYjEokk0XiaOzsN\nND3G2LRZPXQMy/YYj01WDy3T7Y+49MorOLZEJBLl3AvnubO9w3BiIasKc7OLKLJBpVIhCAU8JPrD\nEY88/Hpy+VlQEuRLKxixGdpdn4UD9xPNLnJjc5ubG1sEXsh4NMV1oN6rs75dxUhm8AUB2wmJxtOM\nhy6OM6Vn9hhZIlokRTyRoteZkFQM8oUY86U4K0cqxOJxXn88i9dz8EyfmzfXmSnPM7dykFQ8RTjy\nqG5uMxnbrF1Z5+pzF0kKMvZkxNFKis6rm/zK//lRrr70BX7qZ97OG+95H/OzCSZjkdiRxxByZ1Du\n/yGq/kkstcgXz6ucfudP8+fnvs5f/9Wf8NDqIW49/zzls9/Lv/zNx/muH/8wj7zvV5DXniVm3qYo\nqchpj6w1QXdF7NaryFMTZxLS2Khhrk1JeBk6X/kQ3Wsv4Lse5s5V8ASKxTySlKY78ZnYDjO5Gdyx\ngxiNgxFBkjUMI8fq6kkikQiuIONKBkaysJ/klZDt29eJRDRmK0tY/hhNV7AnfWw/5HUPfwvlUgbf\n6pBOxNEicezRYF8Or6oEnk8qGaM/6lIqr5DKLeGKCWJ6jlFvQjKVYyYfJyq6mFafdCZOJKKRMWAy\nbLC9cZPptE5t+yb9+jZ64OP3dnB7W9iDHezRLp7TxQk07MAHwWfY3MAcjeh2mjQbuxRn8kzMIYEg\nIwY+k+Yek+YawWATs72D2d7FH3e48vXnUYM+om9hdncxBA9dnOJZDuO+yXho4vjqa47HbwpQ8H2P\nZLzE1kaLQX9KJBJlpjzL6uoy169fJZPOISLR6zSwpiaSorKwtIjtjUgkYqRTOap7LZKJAun0vhRa\nNwxcL+Dk6bP0egPi0Rg3Ll9i6/Z1NF0in8+yfvs6qhyyOF+m12kSeDb5bBpZlkmkY5x95AEajRay\nLEMQousqvV6Hre07uN6Ea1cuUa/WuHLlCoZh4DgORlxF00WWDyzgY6NqAqVyhdf9kzeALHHzxh0q\n5QVGQxvbcrDtIYJgoxoCtjfBC2HqgCrHcYKARqfBjVsNQkFG0fe5Bq1aH9O06TZ7OKMh2SToSsj8\n8ixqNAKOS6/TIAwcLM/CmvTwJhOGzSovfO0puq1dPvIfP8Uf/sEXKUrw/t9+lp0re/zEL7yR/+t7\nf41iKiSazuBkcoQJi/jyW2lf2OHIA2dANFlOVUg6FdrT47znRz7As7/7m9idL/GXf/CTeDcvsmMX\nOHvPQ9jOGOfpc2wMXMrLca6f28SyNQonjnDi9CmMUzqL98/QNwNmyhkKygRBkMjkS9y5uc64NyJf\nKCAEIXe21lEiGpFQJiIaqIk003DCXvUOU3PEkcOHKWQz1PZ2wXPY29nYBxZVoz8wSaazaHEDEQEC\nG8cdsFer4nkOnUGbZqtKrb5DLpcjnkhQa7WRZQXP8TCHHfqdXeKGQOj3cIZVbl3+GtWNm7RrW0wG\nLWQ8rr5ygcGgSzqdQJLAcwR8H4r5Aq12nWkQIOsGkqgxaA0pxJNU188jTgNEW6aQL2GN2wiBiTls\nsX7lIvGIjO8OEEKTdDrFyuETaNkinfYWRlQixGF+aZHe0KZQnEMG/uoz/4P2bgPfs8lm4mxvb5PN\nJV5zPH5TgIIgiIjylOMnl+l0a+xs79Hv9xkMBliWRT6fR9NjOMGUemMbEHn22a8TWiLOxKKUT3Dj\n6kWee/pLdJs16rubSKKP7zlUd7Y4fPgA63duUSqVmKtUqO1VsSYTTp24h0w6zaWL55mYI+zphG6n\ngSKDa5tMxl3uu+8kWxvbtFodzp+/SBgKaJEYqhJhrjLHAw+c5sTxo1x48XmiOkz6XV469zUGvQ5H\nVg/S3KuhSDKlQonx2KQyWyKRjNFuNbi9dpNEPIWERCqeYjS0mNoe8WRyn9gTuJRyKQ4uG6SSPqOh\nSTZTYuJ4bFe7SIpMiEs2pzE3N0syFuPKqxcQNIV4KoeqRamvrRFYLnPFJGnDopKJciA/S2erz5wO\nb34swZwW5+O//zx/8dsvkY/0+I6HTsDNz1Lur7HiTJgaUFJFdGmBq5sTNgYqqw8/SDUx5ssv/CUz\nh+9ldeExYpbO0x/7TX7ydz7CeO8ZJvXLzD/2Zh47eZQvfPDnedN7/xXrV67zqV/8AA8/8GO87rH/\nwH/54F+y+tA/5WbLYdzsI+Lh2xPmDhzESObY6gyo15tkM3ls2+bmzavs7e0SeD6TyZRcocxwbLO5\nscHa+i1WV5YYDscUCgUGgwHVvQaqHsEXHNbWXmY4HNDq7GIHbYxIgkKhhDkcQeAyX8njTMaMhn1S\nmSSCJNJudRkPR7SbLeJRA2FqEo/HOHHvSUJNI1cqUr1zizs3XmUmn0JVJHrdNpl0mlw6RiYdo91t\no+oKk7FNzMhhGBliiQy31tY5cuQEyaRGgAmhSyyqsn3nNt1mjdB1mI7H2JZFu77HlctXkNQIYSCi\nxlNcv30bH4CAeDxOc6+KG0p867vezdzBe2j0W9hBwKGj9+KF/8jIS2Eo0Gr3EIWQlZVlbl5f3xdH\nuTb33nuMwLW4desm+UIWXVexzR4zhRT5QhLbdmg2quSyKWZOHKXTbBGLxblx+SKCJNNqtZCFFeZn\ni/i+jzucIsoymUyC3qCOMx0wP1vEdqHT6bC8uoRlj0gmk0DI1LKYKeWIRXXGwy7zc4sMzCn1ao1G\ndY98oURE00gno+xtbdHudjh06AACFq36BkIQEFOTXL14iYHZ4eChFdqdXWp7m3iOS6892FcLugGq\nEiEa0/FdD0FwkMIAAhXPD7AsF13N4boDFMEiFYuxuTEmllHoD23qtV1evnCR7/v+93Lh2We49/S9\n1DbukJ/VAIH6Tp9KzkZ3O3iTkPbAAVXnTk3mzKkpjzwSpTXqkxeTxNQrnPHG5NY3uO+xFJ9+ZZb6\nD/0yWudrvO7kG4lKY5SRxyli1IYWwvQ5bj3zRR79Z7/Mxp3neP6v/huFmbewq72CNjxO/87nmBkF\n/Mp3v4/Pb8JHPvmLfPcvxMirGt29F/HGHrEEuI6JIEsEokC/sYeacFnOx7HjcWQlZO3ObYqVWXTd\nQBBdGvXGvtcAIrNLB3BdF8uy8EOQxBDfs8jlU/S7Tc49c5m3vvWt1Ks1cukcg/GI4XCIqpdI6Aad\nTothKDC7NL/v9OSZRNQYlZkSmuKgIGD2WkxEg4W5LDcvnOdQuczla9cpVma5ePE8U2eKruvkCwV0\nQ8UcDUhlcrxw7iUOrSwTjyVQDRXTmRAKPqIo0mx1SaeTpLIlTNNkYrkcOHYvg/4ITdtX8K4UC2xv\n3OHoyVP0eyMkMUIykSCRynJr8xbHD5/Gw8FlTClaZDoc4vkSBw8cpdWsUmvV2dm59Zrj8ZtipDCd\nWljmBE1WMCIa0ZiIEVVIJhNY1ghF9VlZqXD40CqyJBI1DDzHY2d3k9FoQK1aJ6LvLwwlEjE0PWR2\ndhZZ1jl69DiaFkEQZMyxTak0Q21vlwsvvcDu1jZhGGI7FplMinw+y2RsYk+nXLt2lbXbN7HNAV9/\n/lmefu5J5mbLvHL5EhNriGl18cMJ1doOw+GAlZVlFpYWWVxcYmuziqoZuG7A7MI8n/jj/04qo3Pw\n4Cpbm9tsb+2wtDi/736sBOTzWQaDKYmUwnDQw3ZHeK6AYUQZT0YIskIQimhGD00NiMV0NrfHeBJI\nioqqRZBVEU2HSy9fI51Oc/3qq9xeu8J42CKfjuM5I7yJiTt06TVcYgZMnCnnbvRYrUzodSREUaBb\nl/iT3+sgvBQQT2t88s82KHoXKV3+KI8sKOxMXkEJpvQ3/xrz6q+Sja7x1FWf+NL3kZQsXv3zT5EU\n58kun6T9F09w86feSev8x/FzMj/xzxQ+8r4UC5GA0eP/nkt/8jPk88ewrjxFbDwmGT2IJKjs1JpE\nVIOx7zCZTIgmImzfWScZjZKOJ4goKoasMlNYoFBcJFeaxZm6CH5AY2eHdGz/i70wX2Jnc41Kuchj\nr3+IsdlDj6qMnDFKVMPQVPBsNje2UFWdmK4zbHdxLQfDMNjtdfAiEXwtiRgvo6TLzB04iW2rlGYP\n8+dPPc/529vk5pZ553vey+LqSYozR5h6cSaOgapnMKcC9555hEAwSGVn8UMZJxBQ9DipdBlFTTAc\ni4wnMoKaoDCzSKc7JpnOkc7lkTWdzTs7SIqKH8JwZCIoMo4LqXSSM2fu5/FPfoTO9ibecMB42KHZ\n2UaLC2yt30IRBaKGxqMPP/ia4/GbAhSiMYOZUoFup83WnQ1UXSPwwbIDavUerVYX3/dpt9t4nke3\n06NYLNLtdrEsm3w+j6rqRKNRarUaw+GQ9fV1et0Bw+GQfn+IObYBkXarx8njJzhz5gz9fp+trR08\nx2d7ZwtZFhFFEEXIZFOUSiVs2+bo0aPMzS7Q7Q2YmFOcqYUQ+KRSCY4dPYzn2qTTaXZ3d4lGoyST\nSbY29zAiCZ766rO88x3fyfbWLrKss7R0AM8Dz/MIggBd1+n22uSyMUajIYmEgSgK+L7PxJySzubo\ndkakUilEKUTTRXarUwRFQZBEBgMTRVEoFAoUijkOHFxit1onEo2iROKMxx5RRSeuhYQhSKIOIiRJ\nYTgKJSMkGpORGZNNwPb5ASVJIxBdnjhv8SvnBb6wqfLBz21x7fNfImpHKBSi5A7FCHG48VKVb/2h\nn+HWs1d5/Jd+g+1mj5nXn0HobqNX77BU1sjgkko9goUEssne1asUox7S1GOzcQPXDzHkGKJvo4o+\nmUQU1w3QRA1ZVuk2GsRiMcbjMaZpEgQO7VadmzevMRh2cb0Jg36D4aDN4cPLyHLA7u4urVZr/wUL\nAjq1GuNuFzkM8ewpqiihpwzGzpBELoYalTDNFnJogz8lElGYLWWRxABClSCIYJoB3WoV3wlIzi/x\nrh/5P3jsLW8nmkyxu1tF1Q3K87PMzM/iBB7t7pBoLMnuXp29WhNrug9yvU6L4XCIbkTRojqSIrJV\n3cb2HK5du0apVMJ1Xbrd/WTvhUKBRCK1v6MQj2NPXXL5OO3WAHMU8KY3vZVoMkE6n0OWBPKZJGav\nA+xv9WZzBSb25DXH4zcFKEwtm2anTTyVRYkm8UOZytwis7OzFEvL6LE5IvEEjU6HZqePrEcxoknm\n55ZIJBJU5io4/ph0LkppYZFaY0IqV6C0mGf1wBK9fp/lg4fRjSSpbIaR7aBEkyhGlvsefBQrhPLs\nCtFkjsnUwjAy3Li2wcZGg85wguUHFMvzrG/skspm2Kvu0huMKZUWeebp59B1nUG/TSafI0BkbnEO\nQQnIFtIomooSEcnmEuxu3eCpr/wNy/MzNBsdPM+jWR0ihRF6rTGjroQ78UhocQQhZOLamGMHw9AJ\nvZB226dW9dBUnYjskkkIlDI6huoR+HUWVw7z5LNfYWJ2SEfT1Otdertd0hGZaLbInTs6T10I+cqr\nEEl4vOX1Cv/87fDhT0h84I8CPvHHGs2uz9D3eW4k8uCCwNe/R+bH73d4KB7ytRdd/ttP/XfCa7fZ\n/E+3SZ58P41hmhv/7ue48Bdf4gc+8bs8eOxRLv/ov+b2L/0MJ8+exnEK/OQfqxx7/1eodW1cpYBv\nbmIHENMheOnTyFJIu3UBp7PL1AtJGFFUVSajaNhOgKRKGKkUiVQaZB/Xs0GM8MCDr0MRBXAcVFHG\nMicIosxuZ0wkPUsg7jMRb6xfJ5B0lGgWwcjiCRJjL0DVk4zHE1LlRRzJwPHh+p2bKAmFoTVFkaNI\ngYafyuMGIpViEb9bxTZb7Fy+hNsYUMzP0OuOSSSzuB4IgYg9GpHQFOZnZ+k0GqQSSfKVCkY8AoHN\nZNBCDB0EQSCeKBCJxCnnSoQTm9lSlmG/Q+h7NKo7GIZIr7lDr99AlQUkAjRZZDJ10aMZOsMperIM\nShw1kiKSrHBne0BvBKnsLIEQY2gGhGL6NcfjNwUoGIZBGIbs7lbxvIBsJk8QBLiuTTQawfddRHH/\nr5ZKJXzfx7ZtJqZDPJbl/PlL2NOAa1dv02r26PVGeC4EvsTm1jaFQpGNjQ16/SGJRIp0Os1kMiGV\nSiMKKrOVZdbXtgl8iUxuht3dKt/xHe/C8QJWDh6gUqnQHw44dPAIkqQwW5nD8wJ6wxHf8+53s35n\nE0FUsaces7PzDIdDLMvi4ssX6fV6iKFIdW+PZDyBQMD62hpB4COKMslkEkmSyBc1Zucz5ApRBNEh\nnYkRjUYYj8coioYkSciCRiKVJFeIo0gicSPCaGjj2Pu2aOdfvMA7vu3beeMb3ohhGBw/fg/xhMpU\nUdjutmmYLvWxxcgP2dx2QJBR5Ch5w+GR41F+/AcPE5OhlNLIxkMuvNwnUKC+ZzEyIyRCA1FQ+NSH\nX8GJqHh7HRqff4a+1EFsVPn0O95L+0tfRJ82mFoezdoWpiVz3bf5d299jPgDP8jBN/wQbmme+lSE\nyjH81CKiokIgIUYcRCVEj0SQZHC9CaPRCN8PEULIZvOYwzHNZnNfRAXous727jaCJNBsNnnl5YvM\n5JLMzhRJpuK0Om0qlTmCIMT3Q3Q9QiKRAkRGoxGFfBlNjVDMVSjP7H+IMokk9b0qrushSypxQwfX\not/aIzmzSK9nohoReoMBiqAytUfsVTdQNJ+R1UbVQtqdPTqdFpFoFEmRicfjaJq2T4xSFOLxON1+\nj3qjynQ6AQL8MGQ6dTBNi8FgwMmTJ6nV6qRSGYxIjGq1SlTXyGQTBO4EzxqxsFAhm4zhuja3124i\n4lMul8mkkwiCsD+yCj0s6x/ZQuPUslAVnQMHynhuwJNPPsWJk0cYDLscO3qCUa9HIrnA2u0XOXTo\nAKoq02juUZ7Jcfv2q1QqJZaWZ5lMJtxY2+DgoUUWlheoNRu0dsd0+rs8+uijeEGNXn+MOTXxfZd8\nKU2rt8ut67c5duwYshIiCAqRZJz2cF8i/PLLlzl8YJVctoDjwXNfe4EHHz3LifuTrK9t8PyFq8TS\nFdoDl8XFZarNBqv33M9XnnqWk8eOc+b+B9it11k8cARBlqm1+yysHqZe22Fq+XiyiSSHtMceEmPm\ny1EiRoxOr4/rw9J8gV6vgxGTcTwZWRaI6HEMo0c6pbC0coBms4mmSpw8dpxmvcUTT36Zt3/7dzL1\nBtzekHCZ0u7phOEISRCIaQKh4vLxpx3UfJG46SBvT/jcJ6+jJFT6gYlgSjSGPl/+gsJG1+X9Hz3A\n05e+he71JrsvPU7dhQ/+8L/CVRcYrm8Qe/e/p/qtb0NVkrxoKkypIuhTxHGMj2sNlA/+3xw/4HLj\nqx8j1tokmZCY9u8wHVtoURVJlhkHOl5ooMRzmFObUPCxJg6lYgXLcekMJhjpMllNx7JMmr0R8WiC\n5dV7sXyfow/M4dgjGt0uy/Ey/UGDlaOnaVT3iN117wp9n1g0hjh1mYxNJrZNVIK1O9vML8yiaRF6\nrR7RSBQ/dHFsh9HWGmpUo1Vr0bxyntLCCrIYw3bH9AYTErHYPmHJiHLl+jX6vR6PPPIISPsp4n0/\noLW3wyiqEwYCoqDhB1CuFDFNE9e1CRGIRDUsR0I3IsSiCdY3NsnlMgiigedZ5Ioz7DS2SKeTaHqS\n6dShtruBJCkIBGSySWr1HRKJFJGYTKO6L9br9/so0mvXPnxTOC998AO/8x/uOVggCODQoYOkUzny\n+Ty5XIZuZwDsG0asr68jCCL5fJZmu4mmKgS+gKZFsCYWrufiOD66qmNbNnrEIJ8tENFlwsDHHJlU\nq1U838e2p7i+zYHlg/i+QCwW4+b1KywsLrK+uU4hlyMMQ7KpOL1OC2syZjgYcez4cVqNBseOHWN2\nroIiKrhTG8KQwbhLMhXFn7oUi2U6rQ6lUpFOu8uNG7d44IGzBKGIIMqs3brM3k6NVDqGJIGoRHCs\ngPnZAvbUYer6eF5ARBOIJwx2d9poukSz2aPf75HPFRmMhuxVm5TLBVzXI19e5PQbXo/oe0io7Gyu\nc/ncC+RzaQInIBzauG6A74SYDkxEWFyaRRi0GU1DWv0AP/AZ22C6ClktoBKDlYcjxGSRyeQx/voL\nj5MLxnRtl4VoFGSLOCKrv/YhSs9/jvTta4h5C0U7hes72IqG8dFfY/f8SzgVi3sfvoem2UY3x0xk\nEUGT8B0HVRWQIipy6iBy6gDJTAbbcQhFDdWIIkgisXiM4XBAt9lCCHxKxQKKquI6DtXqLr47xbct\nMqk0mqYzMQcIYcD21u6+H6UoYJpjxsMuoSCCHxBPJgjxSaXSBG5AGEA0kSadzeN5AXv1GqoAqVSc\nSMSgNLOvJG232sSjUdKpNGHgIQKtRp3llQMIgkhIgKomEASJZDJBMpEgFtPptHscPH6ca1deJZdP\nM7UD0qkUuqbj2lP8wCESiZBKJzEiGtXaLjFt32gnmUggE+C5DgIyIiFhaJPLppla+zYD5VIJQZYY\nDHpUKiXqtRqWNaFUrPDrH/rka3Je+obTB0EQ5gRB+KogCNcEQbgqCMJP3G3PCILwZUEQbt89pu+2\nC4IgfEgQhDVBEC4LgnDfN3qGJEt82zu+lVQ2yhe++FmWVmeJRHVULUYqk0aNCBw8ssjZh05z5uwD\nRIwUohRhY6tLvTWgN5qgROJ0+ibl3AyV0jzbG3uogsag32Y0MjFNi2Ipz333n2Q47FMuV7BGsLfX\nYKY8h6bGOHXvGe5s3KJbr/P8089SSKexpy4hMql0nul0Qq2+RbGYp91uc/nSq8TjSfKlIjNzFcr5\nHObQZDIZk4wazBSKDDpjLDNgbuYAr7x6nfn5HLZZZ3tnj1DysJyAwVihWR+RSUe4s1Zne6uLIgqE\njksyGkMTPI4ezOFbEolIlNVDC/SHUwZjlxPHDmGNS8tpfAAAIABJREFUbARfZmlpjhsXz5MpVkgW\ny7z5u/8FsqAROiGaDLbvIIghoihycCXDghGhv7ZGVoAfeNsqLUVhYEn4jkguGSEth5QfKHDyQJ4/\n+897/OknfxElbNBxVY7kDIqKxAOAFLEIP/NbHKr9CsvSB3jrlX+BUP8iqizzNvq855mn+WHP4l3v\nv8qv/f7znLzvbQwllZjvYFgeM+U34shJfFvC96Jo0ZAJIebEZeJOsEcmwthn1OrS63RZOXUvaipD\nr98CfwKaSMpQiYgesaiA2W+ydec6sZjBcDRibmEeRYsSBAGGLiFL2r4eQ3RpNFrEkwk8xyeiqEys\nHtZ0zNVLF9hbe5VIMCImOeyu36LVqDEZjIgoOkePHiVdmEGQdXwrRApFeq0Gklomk14ilyiiyiL2\nxMKd+oRoONY+n2B7bY3ZmQqN3TbRSIRet0W1tkUgiCQzFSRZZzgYEXg+pWwOJ1TwvZDpyGI6cbiz\ntoEqT5HkgKhq8MTffAlRCTh26CCOZKBH8pSLK/SaLYLQZma+TK05/oYf578tr2X64AH/NgzDi4Ig\nxIELgiB8GXgv8EQYhr8lCMLPAT8H/CzwNuDA3d9Z4L/cPf4vS0RTuPDCi1TrTUr5OXa2G5imiShK\nIPhEIhpXrlzF9yR0bX/r8dTJe9ne3qZcniEMA9qdJocPH2d9Y407u7skMhkkTcf2JAZDm6XlIzQa\nDTxvyKFDR0ilUnetwRS63T67u9s4roWmKZw5fRbLsun1BmQyOaZTh35/TDqdRxAEBoMRsupy/PgJ\nkskkt243mUwtErEYxZkldqu7lIp5EkWBwXCMKLsMBtv0Nrvox09ijj0USafdBEUdIkmQSkRQ5IDy\ncpKpG2E4dIhl40zxqFX7+4lk8i71+gCFIoWsSGK67xkxv7BCGIZUO30ee9M/ZTB0CHBxzD6FSpow\nHsEZmwhyBNedQhBwu9ql5sBbHnszvZe+zH99apus4pMVJI7rIvHvjfP6I3kurGX5zF+8xG+Y+yOL\n+VDi1xZFstEp+YcFDEwecUBQPsfXh3MMntsj1lQQPvFt6J0OVu/PePJ9aRb1AZmZN6Ae0dixt/jg\n4xqGpHP2gMZDwXXimTGWmCaMzSM4IXIui56Oko6lmUwbuIGNFpGYMwqsv/o89WqN06ffSCjIONMm\nWjyJhMTXnn+Gs2ceQDNECBVkUSWRSuN4PpEwwc72BslYimQiw3gyQRR7+LZPLJbAcRwi0TSSJFGs\nlInoCs1mnXihQGdk4noeu/UuC4tz2NMRo0GPTLrAODCZjMeEus6ofQtf1pEzc4yaHSKRCK12lXK5\njCcJKIZOo9vESMaRdZVuf5fp2KIyM4cfhoy6Tfr9LvlsGjWiMxoO8cQR+XyOq6++wtGjRzkUP8pw\nNCSXK3DtynXuP/0Alj1lt9cjmc7Q6dSJxzVkWSaiZek0LRrtjdcMCt9wpBCGYS0Mw4t36yPgOlAB\n3gF87O5lHwPeebf+DuDj4X55AUgJglD++57h+R75fJFCoYQoS2i6clcavY1h6ARBgCjoZNI5rly5\niqqqnDt3juFwSKPR4ObNWxiROI7t022OyWdniccyWBMPy55y4NBBGq0ms7OzXLlylXyuRKPeptfr\n0O8P2drc4dixYxw9cpxScY7Pf/6LdLt9fD9kMNh3xWm19v0FNze2UBUNSZLZ3d1lNBpw8NAqCwsL\nNJstnnjiK4RhiOcGXDr/MgdXD1Kcmefh172ZoSVw9fYNdpsb+F6fM2eKzM4mWFkqIGFRKBhMTRez\n70EYEPgig55LMlmm358gSgYrq4u0Oj0S6RS27xNP5Zk/sEosHWcyNpFEBUXRiAoio8EQczgiqSjg\nuTiOS+iFiLJAREswk4oSU3qsTWFx4QgrcxWS5RTa6yIsNjQ+9NMN/ugjr/LrOwLHl4/x80fvIwx9\nvn8zYHpEZjZwSY1S6IrCtO0wUWY59cufYfSBv2SvWSPU6hjCYZKuihdf5OponRf+4nmaf/IUv/yd\nMgcLHj//fIunqyqpE2+m1ZNxQp/h2ML3Q0QxYDjqsHZ7D4iRSFaIpWYplQ5x7Oh9WNYIWfJQRQEn\n8ECVefh1jyLpMoFnM52MEKUQ25owHg6QwpC5ShnPc5lMTGzLojxTZDCwmdou48kIUVDotjqIgkzg\nhmiizPbGBoNeG010WVyo4LsOmiJh2za31m8hBj5z5TKn73uYUJJxpxY7t9aJxSMQeoSBR7/fpd/v\nMzXHlIt5GtU9JElCUzR0RWfQ6+EHNpubaywvzpLNZVi7s0Y0FiNupPAcj3yujOPI+L5ONBpnPB6S\nL+a4dvsWqWSUYi6NIEvYnsvEtDCnFhNrRCKukc/m/+FA4e8WQRAWgXuBc0AxDMO/lV7VgeLdegXY\n+Tu37d5t+5/7+mFBEM4LgnC+P7YIJZmFpUXS6SSWNaI/aHPqnmOsrd2i3qgyHJiMRxYRPYosyzz0\n0EOcOXMGRVG4dXMNWVKxpy5nzpzAtgfY9oDhsE6pnEcQfaIxjVanzVve8hZu3LhFPl9kbm4OSZJY\nWlplPB7j+wHz84u8733vIxKJcO7cOXZ2dlBVlSNHDrG3t0O1tkc0GqXf7zM7u5/Uo9/t4doOsYhB\npTzD0RNH+Nxn/5TjR5Z45qtfQNd8LLfPt7z5TTz84Js4eeRhPCek3zWZWGMkbJYPFGg3umiqiB7x\nyaajxOIh6XQa15uQSsZwbI3hyMaIJxC1KLPLqxy/535GEwvdMCgXc7xy8QK2O6WxtU00HiOXLSCM\nTATPRZFkZEnC8kLminEygkBC2GHByKGsvcLOnW0GtSZhz+ZH/2yTLUkm7U54Qyjxv5/y+dmHL/F7\n74qRDh2+908dNgo/wovDHK98OeDO0wKnei9hf/Gfk6wGZDY+T+2PPs5vPv81qo7H2jDKg+/6JdLz\nLrEZjSA+oHFjwvofPw4Dkd7NLpruIk06eMEYyzLxfYtMMsXJE8uE4YhXr1yg0e8y8XwCUWEwalLb\nXadf7yAHIRFZZNxtkIyoELq4noVhqOBMiSoC9qRPu15DxifwbbKZJKY5QtUTiLKCpsioqs5MqYwQ\nQrfdJhmNkUnEiSgys8U8rdou7XqNyWTCzMIyxx84SzIRw5qanD9/ESmVIxKLM18s4LkWguijaRq6\nHkVTVFLJJN7U4dDBVTRFodexKGQrTEY2nuNw8tQx+uaQZrdFrlxk4toQmrjOgERKQxA9jKiG4wWs\n3bqNoig8+OBZ2vUq165dYWxNmZlfRNJiJJNJTKuN55uoYvwfHhQEQYgBfw78ZBiGw797Ltw3Zfj/\nZcwQhuFHwjA8HYbh6WQsiudaPPPU0/TaQzLpErKks762hSzrJOIZNDVkMGiwtFTGsSdMxiZPfuVJ\n8rk0D565h5gR4DkdPHsIdo9Jd5vpoIUuB5ijMe1mh0QigR9APB7FtMYMe30UWWNotumPurS6DUbm\nEMv2CAWZM2cfpNZo8vVzL+ITcuLUfah6lLmlWVKJCM8+/QSxeIRarYoW0fjE45/k8uXLtPdavPlt\n38bi0ZPcWd9GVTVefPEl/uAjv8tnPv0xfveDv83udot6bYznBtQbNpdfbtIzFUa2wsgM6HcHWCOH\nwWCEH6hki2WicYmF5SVmV49w4uQZ/skb3gSCiyJILC8dwXMtFCHgs4//EfNnTzHqdYgZBoOJgy8q\nBKFLIIbIkkB1rYY3GnPppQ4Du0M9n+TH7s/gROGZSx5zgs67HnT5t9+R5jfe6zAvbPJdX1mEd/01\nhysrBIrEWz7w//C/fX6N2Z/+TSrf923cqpxluPQ9RPUtKsoWsVX48L9cYfWdHyD3hncz1hWeHeZ4\neS+HIhV4zzt0Pvqj7+HbHx1xc/MWezdGSAmHeHoOIfTAthh19mjt1DB0jdWFOYJRm6QOtjtlprIE\ngo4RT6AyZdyvMbU8XCekVm0hCiprN9eIxmMEToBrCczOLxOJZkgmy7z88kWqW3uIOLiuj6hGGQ/6\nhIjUGk3yc4s0TJdQShDPLtE0VQ4ePUMym8MPJW6tr9HYrTF1o/T6HovLyyh+DCOSp2e73L69Tq3W\no1hZpdVtMhj0GVsOvqhya30bWZVYnF8kUAKypQyCJ3H75nVUNY0gJcjlSxhaikBUkdU09frwrsLX\nImLEWT54lEy2TLsxxJtKLB24F02OUG90UOMJqq0uYzNEFCIoavCaY/M1mawIgqAAnwO+GIbhB+62\n3QTeEIZh7e704KkwDA8JgvD7d+uP/8/X/a/6P35wIfz0h3+OmfIiw+EIx7WoVqvMzMxQr9dRFIXp\ndIxtuxSLRSRJ2Wf7pTOs37lFKh4jCG2MqE6n3kQQBCK6Snl2BcsTSCWzhGFIu9MkmUzSaNTRNI1s\nMsHGxhalmQrxeJwr166wsLCAEEokkzH6gy6Bv8+jMCcjbt1cI5ncT0q7n3G4jKIoBEGAIAjMLczj\n2jaCIJCbm6Wxu8ug0yWeTJIr5LHMCdcvneerT36J29dfQlZCun2fRCKBY4fU6m1yuRjlfIZRr0Mi\nlUQSVUQkXG+C6Xrce/o+jh1/iHRcZnFhhi996W8wDIN+p8/9jzxKKl8hnyzw8tpVZlNpfu9nfxVz\nPGUytalttJl6AYEPaRmmU5FULkqnPeK+03luXQv58XffizW6QUHpM/GnaEaE//h5l8NLB/nguVfo\nAIogEAlDHJKEjPHw+eoXnuFi1SLwBZaFawjVLzMb19moTpl58I1s7kz4wV99P8szZW5du83aH/4Y\nj3/gU7zlB98Ntd9FVGIIToL+ge9g+fhDtAKdQr6ILKskknGs8XjfAl6VaXU6lBbmaTcb5HJZhq02\nFy8+TyaTIZObZzwYMbswf5f96CPLMp1WjaWVQ2zXNyik8ziOQzqbpFlvIigGqqJhjkboqoogCLi+\nT+j7JJNJYnGd8XiI7/tsbG2wsrTCuXPnmJ9fxojGSaXSuI4FBDhBBN9ziEY1HLPL1IZMYYap1UKV\nNURx364vCAIESYbQwTaHdDut/QxkgYRHFF8QEESPfmtANhcnCEQ8f3/Eqygy40GHV195maWlFVKJ\nNONRA8+XiCXi9HojcrkcU2ef1Rh4Loogkr/3vf8wJiuCIAjAHwDX/xYQ7pa/BL4f+K27x8/+nfZ/\nIwjCp9hfYBz8fYAAYDv2Ptd8b5tut0tlPk4qI3Hplec4deJBNje3OXHiCPV6nVgsxs7ODgcOHGJv\nt46ARLEwy151F3MUYIcSpeIsuqLiCzJTs8Fufw/D0DHHA1S1zNLyAtPplGR035txfn6emzdvsrSw\nQr2xw/ETB6lXu6hKlOGkQTZvUK310PUIi4srVOZmyBUrbG3tUJrJ4doOkiTieA6O7yIicv3ll8kX\nkhB2ef5rF0jn8zx49lHMyQBCj1Qqh+iDlBzhehPyqTRpI08iaTAyx3jsW3UHvkguH0fS85QrJ1le\nXaFU1jAUifX1de7ceYVWq4kkKPtqzPI8522L+vYd/FPHESQRkQDPsXG9AM8HWQTbB2SB0dgiGtW4\ndKvP0QOrfPAPv0o57vPWEwIff04ivxDjo80qK81X+dAxCCYyL5aOcn0QpzSXRtd1ljMTvuvtj9EE\n0oKALsMGkHZDPvuf3s3tS69ytDzDi5/6AClf4Mpf/zTpe76bf/NXP0YwfJmbH/49tLNvZ/HNv0Nt\n4zmUYpZ5pYyMjywJjMY+sprHno5x3DFGIkP9zhaD8QhFNoikS9xz72latTqlXJqO7DMYtlE0FSkQ\n0CMq2UwKURSJRVOISgxdV9nd2yMZKyJrIuNhj9ruOvedfoBOt4+Pjyz5jIYtPH+fzJTPZphfPIGk\nibz+DW+i0xoQIuI5DqqqoogCgeUQ+DbexCUay5Kf3X+vp6MoK4dy1LbXCX2PXq9PZX6VSDSHLhrE\njDw3bt1EUzzS6Tni8ThTd8r84hyyqGBZFoIkYjsjZFlGj6RZXlzBnlp0Apt0qsjGnRuoEYVMNkK/\nXyWVKKLqCp7nIfn+Nwr11w4KwCPA9wGvCoLw8t22X7gLBp8WBOEHgS3ge+6e+wLwdmANmAA/8I0e\nYBgG6+sbHD16HEWRMOQYI6dGJpFnZ2uD5cUFWo0q08mE6WRMpVykUd1D0VVWDyzv23rhsry0zHiY\nQJZ0BoPBfueBiBrJ4Pshnm2xtHyEa5cvkYrGqQ4GmFOTF174Oslkku3tDU6dOsUTX/wSR06epNNv\ncOvqZWpbMU4/8CAbYZXAddjd3qbdbiOGId7/S917B9ma3vWdnzfHc96TT5/O3TfPnbmjiUozCiCE\nkASSYAkmOCygBVFylcEYgRZsi8UES9iAsTG7GCSWIAQSSQlZaTQKM5rRhHvv3Hxv3859cnhz3D/O\ntf71bNXWlvRWnf+6qqvr9PO8z/P7hk+a0O335h3962tUGg32dw+oOhW8mYdhNXnopWvksslB94Aw\n9MiIUDSZJPDpHo0oV2tYtoau1UmikIVWnVYzw/Nm6JZJKgiUrDKv+tbXMJhNWNk8xuOf+Sjj/hF7\nNw4wTAVV0Xj++Se5fOkiE29CuWTizYbEmUiaiGiiSiEKyEWByPyuJ0oFIgJalqKmBc88d4lNU+au\nsyt0XruE8cUnMPa7lJHYEXS2ag7P3T7gX/2nn+aDn/oKiZvx8vtP09BFPv8tjyJoGr/9mcuMn/wK\n8pV9KvqEwssQlD6Co5D0bOLNu3jgh/+cHVmgE3lsP/5XxOUlisBAMbcRhzukpSZFKSSOPFTbxnVH\ndBYydvvXqLc20A2T8WTG+om7yPOUJAlIMoF6a4HPfvazvPbRRxhORowO9zh19znc4ZhkNqUXxSys\nHePmjSvoRpl2bYGRO+T6hUsURcGxk6e4des2jUppnrDt9llb6pCIEkvHTjIajbASl7yQmGU5fjiP\naG9vb2GaNnle0F5cQNckksAjSz2+8tmvcObsWSoLOt3dfSTRpNkyGI17CEnGbNxDJkeSBTqLLURN\nQi4E/GBClguEakwc9RAEmE6nGIaGIhQMhxPMUhWn1gIhQcgT7r7nXgqxNEf5iUN8fwiiQBiGX3cE\nv5jnxagPjxdFIRRFca4oipfc+XysKIpBURTfWhTFiaIoXlcUxfDOzxdFUfxUURTHiqK4pyiKp/5n\nvyOOQgxNYn93i373EM+bE59t275z3D/iwoVLlEoOCwuLbN/epd1uE/oBcRwzHA6xrBL9/hBNNQjC\nGZZlIIpQqdYp2Q5ZIbCyus7Vq1eRFAVBgVKpxImTZzBti8XFRRYXF9nZ2WFpZROhAFHKueuu0wzH\nU6bTKXEc0+v10DQdwzAxDBNV1jm2eYJarcZoMCD2XQxNQS87iIqBbpWpddoYToljJx/Cj13iNCIM\nfWRZRNNlFEWgezREFCHNC0SpwPNmyJLG0soSpqlSbVRpdxY4c+5u3JmPeqcXcHVjk0ZrEbNU5vDw\nkKxIWVrsIEkScRwjCBKCIpMKBfND35wmBZAlOUWWkecgiyIn6hb3n1A5s9bn13/5y7hCznajjoWA\nUHh85HrCX7kgCxP+/Hf+Kz/+tpeiGTNM1eP5Zz/JtS/9IV/42/ez7QdsLLV5xy+9m3xmoIaP8dzT\nT9J6+C1I7XMMI5XNuk7iPU/h97j7x94/L6D50/fQOnk3TtVG0Q1EVWU4nvDMM89w9epV6vX6HYjN\nhGrFnpffAGGYopkOul3hFa/5VqZhzHA4QlMNPvXJT5ILIgfdEVMv5qA7Yv3YXVQbyyQpxHHK2bvP\ncOL4Jt50wrFjG6RpgiKLFEVByakxGI4wLH1OtgKK/0HaThK63S6d9hJlu4KQy/QOD3DdKWk+D7Q5\nTgXfden3DlD1uUekyFVOHLsb1/fwQ48gnOH7Ls1mnTxP6fWP2NvbwynV8NyIKEzw3JD19Q0EQeLw\nsMvK+iamXcYuV8jSgp39G0ymfbZuXGfrxk263S6yYlAg4gcRnue96E3hG6K49e6Ta8Wf/8efRVEU\n9vd3SbOCMPJRFAXDMHDKVSqVBq47pV6vUxQZ09mYNMnn0/kkIQgifN9nd3ebRx59GZcvX2Z5aZP+\ncEzJtLHKJTx/SDgdU2suM5hMMXSZSqnC1vYe65ttoihiMvaQVQVFNNBkg8m0O7eVahrnz19iZX0F\nx7HYvb3L7u4um5snMU0T3bToDvtsbq5xtLOD017FjxOyxKVWaRGlAofbF0jdKV/4wmPE3ojIn5LE\nGRkCumFy/4MPs7N9HUUSuXX9EoIksr56CkUzKdfqPPLoK9jrHXHhya9wuHODKIm5fXsPWTaZ+QGn\nTyyzvnaCF547j6hJSJJCOjAJZxO86YzDrQFpUlAUBZooIIrzK4omQBYnuFlBu73GmQfPotvPc/nD\nu4wLic2XvJJffPJLvMpJ+fl/8l1892//LQEWn/znZxCPnsa87y4e1pt8ZGvCWkPBmR6xo6wRTEOE\npW9hmvl87Jkh3/cDb6ddH1IbX2fcvchDb/oX7M4CDj7zR5x69I3k3m0Oty6S1h+gs3mSbOYT5il2\npYKqyCiCTpz4cxyaVSaKElRNxHVneO6Ueq3JNEhwnCpFkTHzXBbqDjeuXMasVul0FpnNpiRxim0a\njMcTZr7L5soa29vbVGo1SqUSoiDgjQcMh0PK5TK6pdI9OkBXNfxURNUtYJ5e9H2fqTulVLYY9ges\ntToIukF/2MefTWm3Gty4dhldN5BUGdusk2QuURRQrXaIowCZHFVVGY3GSJqObeqEYUi5VGFn7wjT\nUnEchzgJ5vK8CP50Rq1e5+bNLXRVo1LTiaOcKApYO77BzvUtZFkmzWJqjsNTTz3Oa374vd883Iei\nyLl9+wZ5HqMoErZdZqG9jKGXCMLpPBo77d1BjIn0+33yPGFjc4XRaMLO9iH1epWFTo1z955hNO6z\ntrbKlasXUVQBzVKZTif0e1N2u2PGowGRO8KxTA72dzFVmWGvT+R51B0bBZEk9XG9HnEYUTZLTIcT\nDvcPeP6ZZzl//jy2bXP69F1IqsBBf58o9Ti+uoTvR6imhGaoOJUqR/0+maiSZQX1+hJh6NHv7ZHl\nEbNJiiCmBN4QUZHQTY3OwjKBG2BoOopk0x302T24ydTtsb1zg2Q2JAzmbUSD3hhDt7Ftm7pj4pR1\ntncvIZsyAhoUMpZlEoQhiqBAoRBLIoEIoWawl4PmtHG1EoO8oCIIMD3g4t99jPN/e0hWQE3M+OqT\nj/Orr1zjB/7lT/Pe8we84yfezute/iCdl93HHxVv4S3v7lL/mc9zl3qB6fXnOAonLGY3kJaqOMsP\n0lwo8YNveoib/Yt810/9Lv/+02MeevN7+fRH/5BSXuXu1303JUXh2a9cwo11Ftc2EUURXTOoVmyk\nJGQ2nnBj69rcNZiHpEnCNJhQoKEpBqbh4Acp9VqNwfAIhJw8zfCCkIW1FVRDx/N9RNEkSWXcqUuj\nVqfTaDOZjljotDg82KXfO2Rv+yZeENJeXkYvWcRZiGFL9EZHVFottFIJUVYI/DH+rI+t6siFTLlU\n5/Kl88x6+xxt3SKJQ/IcVk+co7F6N7XmIpIiEvsZrcoCs34fy5DRdInhqI9tlhClDN+foWsyM99j\neWONernGjesX5uSuwMWb9rAsi8lkhqoI1Etw4Zkn5sNLVea5rz5Fo+JQCCKabhNlBaXyN1lKMk1T\nHMfh8PCQIAhQNdi6fZ0zd53AMkuUShVu3LiGYcwR7rV6hRdeuMSXvvQVut0uoijy5JNPEYUZcZxR\nLlURBBkBCd/356cNx6FWq3H27FncMKKQZAbDMbKi4VQrlOwqvhcxHE9QZBFDUzjY38UyFC5dOk+j\nUWFpqcPx48e55+xZkiRBEkSWlpbIkpR6tcbt27dpNmuEYUQYpIDI8Y0NRrMxMTGqKTHxhoTRjDTz\n0CwRUQXknHK5zP7+Pp7nMnMncxybBJVKmd2dbTrtBpcuXGR7e5thf4CiKNjGHPIqixKO45BnItVy\ni1ajyXQ6xrQtsqJAkiTSPEPOc5SiQCrgMPJxjq9Bw+HBV72G4yfuQlxsUKQiJbuEkoFXqtEtV/iu\nn3onjxYxp5tLnDj7Cn779/6MUa/LX3/4azzxzFXuPlPmux45x7gvIyoN1KU38PfXFZ65IWCVRG4e\nSgiZSffL53nTd7yeSA048C5x8tzLCdIjLn35L3n+Sx9j4dg9WKlArVmlUq6AoZPngCRTKTvUKhUk\nVWE4mtyB9mT0ej3SNKXsODhOiTCcnzAlUaNcruDPQqbDAE216XVHABiGhixLDAb9r1eze57HyvIa\nreYCvh9i2uX5/4es0uvPUDWHkyfuQcg1xAKcUhnIqdWriJpEkERolsbmXWdIZYnlE5tUKhXC0Mef\nDglmXQoSDo920XSJmT9FN1XStKA/GLG8skKUhFi6RRgEd6hkJge7e7i+xz33vIQ4Tig7VUbjGZbt\nICkq9cYCsmaxvnwC26hgGhX29oZ8/vGnqDccNEMmzkKWlhde9Hr8hghE/fZ/eO+/+cG3vgrDNKjW\nasiySp7nTKdTTpzcIM9gYXGFZ88/zakzJzjsHlEuN9k8tjEvO81iTpw8TbPVIUwiXN9HkGSGkynN\nRpPJeEatViOK5kUraZrfofDo1BtNJtMZUZpRCDJ2pUZvb5dqtTqvclM1NMNkPJ3S7HRQFJ3JaIgk\nyLRaLWYzl1MnT3HxwkVOnjzBaBQzG/bRDIXxZMRCc4V4OmD/xnUsReAvP/h/IwkiYiyyuNBgNBjR\naizSWeygyhLeeEKcRPSGY4RCZtQbs7SwSH8wJMsSFEGgUS+zv7/LeDqiZJh0Fip0D3fw3YipO6PR\nqGLaOkka43sKhaLiDsfsD1xcXUBq1FlYWkMSVQbdHpph4bpT+v2Qe9/5b3l6f8ZkfEQgSUQDj4pT\nZmNrwP6Vv+Ct9Ss8WNF587nbvPQt7+I1Szd55bKEWHG47K3xoUsRD77kEQ6KiJc+/EqGzftotU/j\nLJzgriUfZ/g0d0se3S++l7IwYfnhH0Yqn2T9rYagAAAgAElEQVTp3HdQHHwcQ7XIzCpJbhL5QwRk\nBKvMY5/+LCdW1xF1E0Vz8IMRlirQqJnkSLjTCX4wZX9/h831jbkiIwkE3og49lE1CVWCw8MtnFIJ\nWZJBLPB8n9nMpd6oc/XadRqNKqIkUSCgKBKeO6VWaeL6PoOJT5H4iFlAniUcHOyhiALBdIouiZAk\nCJLGjetXKesqtuWQkxMELqKQkbgBzfoiimahGzYpIEkCoqQxmbooioQqq2RJzMH+AY5TQtVUEBR6\ngyPK5Tp+4LO8tMygt0OjYqApBTe3bzCYzhAlGcOQWN84juNUkLQqSSpSKrcolxu8531/8P9NIOr/\nj0fTNQBqtRqTyWSuIVfmA0LfC8nznPPnLyKJCoeHR+zu7H+9lcZxHDpLi7i+x+2dbW7duoXrTfF9\nF11XabVajMYD+oMuR90DFhc7fPnLX6JScVhZWWY8mbGyuk6apnfgqhq1Zosozmm1l+gsLhPECZV6\nAz+IcapVJFlFlCU++/nHGE1mDMdTllfXuXTlBpVaCUlU2N89QChysjyiUqvQWlzA9T0ECRALBBny\nIqHiVEnijDTOcKceURCjySYw17MXFtqoqkq93qRUrqDdaanWNRNNnTsrd3f3MQyLU2dPkBESRCFR\nmJKnBd50hhhniAVYukyeFmRpzkxIoVnm1W/9Tj7z+OPc3O+iLzS4/ulP8Po3P8JL/8lPEIkt6g+9\nlMH+mOojEcePK3huQFkdsVQVmLzwNGdXNikv3MdvfeQWlwqDnguv3kg43tmklI9Y6j1PcPhZdq5d\n4eLTj7FePU7Zsbjv3GtZPvZSprMBsReSul2KXCTPUxRZRxRzVMO+w4g0ec1rX8tjX3wcIRfuvIGn\nTEcDdm/fpGSXECVot5ssLy3huu4cgJJDuVRlsbOMLM8rzhdaHdI0JYpCRFGmXHbI85zhcEir1WJv\n9wCyHE1RoShw7BISIs1Gh3qtQaveuDPJz2nVm6RpQaHI7B4ekQoFRrnEPfecZe9glzjN52pGo4Vl\n2li2zmTaI0l9gnCGYSpf55k49lwe9wIf3TTY3NzEdV1UeW6nbrVaxHGIUKTEiYso5fz93/81X/zi\n5ymXbVYW16mUqmQJiIJC2akSZR5h6jOY9Jj532SDxtMbS8Vf/d7PkSQxtVqdra0tVlfXeeZr5zl5\n8vS8bKUkUqQSW1tbtFoNpq6HImucPH03g/Gc6iOJCkWeYhrzyfR44qKoEvVqjZ2dHWaTMUe9HktL\nS8zcCWmazhuel9cYDHvs7R6wtLRMtWbT7w+RJZXDwyPq9TpJGlIqldjd3WV1aZnd3V3OnTtHmOfz\n0EuvR7td5vrVq7QXlhjPUhqNGuVKmYOtfTS54Hf/w6+gFgXD4ZhGy8TSHYIoZDp22Ti+iJArHE0v\nM+jPkFIbRZOpVEukScGJU/cQBDOKLMGfjTjqHiAKMpok4icRs/EMp1xH0lXiNGJzfYM81bn9xBbR\n2MPzPLb2Rv+D54osqHSTmEyA+7/nuwlv7MPtXYbRgId/5H8lVM+iHj9OauTs/fgb+G9/8Fr+4pPn\nWV91+Ertbdz84Ef4kUdnXL/S58Ozl/EvvmvKsZWX8vTn/oh7XvUDPHG7wcqZUyRewCBeIow0zu89\nTbJ4jGtXxvzyWx+ifWqBXm+APtjFbtaQTI/oxhGtBx/i5u1bHDtxAm88Ic9TfM9DlmVqToVpmDL2\nc9aObYAI/e09gtkR5ZKBH0Q06y1mfoZplwh9D6EoiDKXernCaDBAsXWKLGLQP6JeWyBJQdN00lyk\nZDuEYUhBRBpN0XSVIlPJAUXTcCd9DMtkNJxQcUoMBj0qts2tW7dodzpU6ovkQFZklE2Lvb0dTEun\nfzj/vlbXlimKnCzLCaKEJPAolcqMx2OqdYfDwx7lcpk8z1EkiXK5zGAwpFxxyMgJAo9bt26w3F6l\nUi1x8+Z1NjbWUCyL2azHdDRFVWXyFCqVDkdHR3MPhSKxcO/bvnkGjaqmUrJsFElhNBjTaLSYTT1O\nnz4NzKVDWZwf59bXVxElgXq1wsUXznNr6xqWZX39blmkCQd7eyRJwv7eNo1anc9//nME3lxTfuAl\n92OaJoudZc6cPsvh4SEXX3gO2zYplQ0ODnd54YUXEASBarVKp9OhXq+RhBGGqrGysoKuq7jTGU9/\n9SnSOIUc0ihn0J1Sa6+RFyruxMefhVx87llu3rjGJz/x93izCf1xD1Et0B0YzFyORgdkUshsOmY0\nmiCKNuVyCxELXTepVRtohkW16jDoDvBdj8ANyGMI3JijwRjfS1ldP44sSnjjKbIoUK/P034FGQIp\nAjkyoCCiivMT1gMvuRtdV5GbNazTG6yfupdBmmFWOlDX2Lv+RRaifTaOi2wdHTE+iNl+9oAHliQe\nu3SD3/3wkO88tsYj8nne8G+v8M9+8/Po93w7l770QSqVGte7CpFewZ1JlDtn6RlLpKJJR4yxnZQ4\ncCmrZZSVNUq1TeTbGX2/Ry7NN9qD/QMmkwnDQY9y2aYoMsIk5uhgF0vJGB/cYHT7MtPJ/tzpp6kc\nHvYIwgzPn+HORuRFjG0pxIFPGLlohkEYxoRhiKJo6LqKoauIokAc+AhihijBYDCal6XmBWme0xuM\n5g1QokwYpGiage/7aJqGIsmcPnkKqWA+w7JscuD27VtIIpBnmLpBjowXRIRxQpQkiMKcBJUXCYap\n0u3Oo9RpHJNEEZZl4XkepqEym81wvRBLN7jvnnOYhookCWxubvLC5Sv0jvrkmUDJahAHUOQCvjdD\n1xRURcI0jBe9Hr9hmpd2dvYQRZGFhUWG4y4gEEY+mmpg2SUmE58iz5FkGVlRGI0HPPrKR0AsMHWN\nkmXT6x5SMucT697RXGbcO7zF/Q+cYzweU6lXGI/HLC4tcPXqVZaWO9x//zmmszGjQQ9dlalXW2hm\nDd+bcXi0T6vVwvPcOyWxHgUZ9WqN++67jzTNGfQPmE76lMwSo/4+cRxw7OQ9HN/cII5DKiWbD73/\nN1HuaNutziq7e1s8+eQOKyunGM/AsVVmY48km5CLEpNZF6UwqMsdkiShtdBEVWVsq0CSCg52u8iS\nhu/HBJHE4WGPwcCjUVGolMp47oRaq4M761OIAbmUkpMhCHOlhwJSb8zBhR5yCrc/9QVKjWU+9pXP\n83O/8B7e8au/gAxohcYHyXjul+8i0iNe9f1V1gWLpy58jF978wnEc/fxB3/zF/x57ziLlQkHexMu\n7Qg8uPmTjIpPIcxayMvfy6NvfA3Xb14knezz0GrBq9/2MAdP/j6JENBZeAXLDz7KM//wPq589vc5\n+0PvQzJLSIpLFkxYWGgRzBSyKEDIEhRRoKRLJN4BjeVjzGY54Wgfu3kfN25ucfz4cYLEw7I0dFXA\n9aZsbW8jITKd5rQXlglHOXbFQRZFtm9fp93u0O0eolsmO7s3qVRqX+eZyrKI67ssLi5ydHSAIkl3\nrrljfM9D0zTCOCZNU0aTCY5i0esdUa44aKpMFAXzjVeWWWy2mUz7jAZ9arUalq5iV0uYusNwOETT\nNDRdYTQaYZgmw/EIx6mgqiqR62EoKuPBPpNRn3K5jCTVUXUDVTJoWDLPnX+ejc37cZwqfjBGVhRM\ny2J/fx8/iF/0evyG2BQESaDerDGZzBiOBwiFhCiKaJpKlIR0B12SWKBSsUmSBN0s0dAk/EmCrMD1\nq5fnMwjdpDs4oNWuUVBid3dIpaIRpxl2qcrh4SFly8adTBHyjKP9PTIyarUaO7deYGlpiSAq0O0C\nTSshSQaipFAuVbj/voe4fvMiCDnjiYduzd82vp9z7NgyRZ7SaNTJJIX+ZER37zJb169w/quPMXFd\nkiRgeXmZ2Pco6TbWskIRTzi92qRccfBmfSbuBDKFlfYakV8QhFPKhcLg4DZCGnNraxfTtBEki6KA\nfn9Iq25hL9uoho6qGQzGIxY2N1AkjyD1SL2MkIxICJBymZyUXIIiTVAFaBsazsu/ncpCE1SVd/za\nL/DZ//LrqDu/w//yG7u8fVlhJBfULJcPfepl5M4Krdnf8PrXO9SNXfaW2/zu//ZPefd//D12rh3x\nS+/7KPe9/Ay/8fY3EGz/DYPSaY6CBh0kiq9+lQdf9yM8/w/vx5pe4xVveCWPfe7f0//SEZ/7i/fT\nPG5iRQmh66NKoDTruMMxt3d3qFbn+RVV91BVFd9NKJIYdzrk2Jn7mLk+zWYbTbVI4gxVUonjmCwV\nWVzYYDAYIyASBAF5GpEkBlt7N1ld3yCPYxYWlohTqNUV4sgn9KYIssSwN6LZbOO6U2pOjYk7YTIZ\n4zgl6tUanuchKyJClOLUdHTTRlEkQs9DM+tkTCkEEdmqMhwOaDUbqLJCo93mYHcfu+IQxS6aLhJN\nRCRZo1ZtYtoWu/v7lISCyXjAdDyj0VygvHAMubKImATsHWyzurrKaNgnVc9w5p6X4blTDM3Bsh2G\n0zEH+7usrW6ga5UXvR6/Ia4PsiQhiRmWqWBbKnkaoGoS/cEBQeAhAUkY0Ts84PILzzObjIi8mPZC\nldlsTLXqkMQhklxQsqsEQYQsy6yvLSEJFkVRECcuzWaVJM4JggjDKFGt1hEEhSBIWFpdwwsSFjsr\nzGYjcgJEOWM67BGFE6ZuD1V2qJTW2Ty1BELC6spxHnzoPhotB9PSuLm1QxqHCLnH01/9NM987XFm\nsxkSBa1WC1EUiWKXIBzPDSkVA8+fsruzRa87YDZJmU1jBqMxo9k+pm3gulNc12Xr5i0MWSeceSiI\nhO6MxVadWNAo1DJhrlIt6zxw7jSmmpBFGlevbJMWCmqsYiQSmTzfEORCIlZULicC+UtOsX3lcX7v\nN3+JDz72cdYEkff+xM9xo/wT/NZJk3tWE0zDRw9lCsFl+f7TbL/mA/z+Rxv0/R6v/u6TPPvHv8JP\n/9NX8NJTDiuovPHkOh/5vz7Ou/7zNv/6X/4q9le/yE/9o2/jwhc+wUd+8u10f/9P+PIHvsZ/eedv\ncbp9iivbn4SKRblyLwgJpBlKkSEmCoVe4uQ991Jpt8kEkee+9iSVagnNMvCjgokbUwgylmVhl02m\nsyGiImNaCt3eHkWRsn+4R71ZRTM0wqCY3+FHewhZRhpGHB3uU+QRzz/3RQaH+5iSRpGEiHlEu17h\n8oXzSAJIIsRJhGEYxFHEaDzAqZZRNQ3NNlEMnbyQULUSgmBg6jKKBNVKldgPUBSFwXBMnCZcv3aZ\nnBixgPEwJg5lcilFIGHmTYACCQlvElIgsXbsBLe2dxj198mjGbu7u1QbK6SFzgMve4R+v0dv2MOp\n19g52GfiuRwdHLHQ6lDkOd2j/Re9Hr8hNoUsyxhPhuxu32Q87DMa9pmOhix0OtQaLYRCYDoZUa+V\nOXFskygIkUUFz3Mplx0CP8F1ffIcBsN9ymUbp1IiTVP6vV3ypCD0MkaDAYXgs7TcxLZ1RBGiKCBJ\nEgbDMYZhkmUZtmnhui43rl3n9tYWj33u83S7XZyKiabDheevEfgJgpjhexNuXr/C9vYW95y7H8uw\nufDs17j47FP40zGSKBOnM4aDA/Z3t/H9AMeposkao+GM7tGAOMqwtQpSkZMlU9xJbz4f8AJ2to/Y\n2T7CnQVkFGimAWJBIRYgQd1xIAtZWqoiCBL93gxHc+jtbeEPDghSEeScIs9JEckFyMWMQz/mO37g\nhzn18Mu5/uwF3v2mOi/86v3cznJumzrV/UOmtQ0UXyDXTYZ5ivGlr/DcO9/JyY//HnVrjKI4SNOQ\nl7/+NKePP8T3/9Cb+Os/+iX+j/d/nN98cpdeknEU6fzg+/4dr3vNQ/zG6x/G2PG4a2kdQ8hQpiof\n+MWnWM8V3vjGB+j1vkp1eRHVcBBllUL2ifwJvd4RSZrSWVljeWmDvb0eslkiKjLaix2CmY9QSPiu\njyzLaIrEzs4+lulgWk2q1Q1cPyHPBIJwSpomGHoZzTZBnLdiZ1HA8bUVXNfl4KiLbdXR1Aq7OwPu\nffBhkixHUmRatTaaYTMYz3AqFT7z6U+RRVOEeMrw6Ba2XuCOurijPkkcoCoK49GIWtWgXLZRVRVd\ns9lYP4WAzsH+Lu2FBpVqCVm16PbGyKrB7sE+i50W1ZqNKJmECWyeOE0Wxcg5bG5uUqks4Edglev0\n+wNkWaV/NMDUTcp2iXZngSRNmbkuhvniLwXfEJuCpqoc7vZZaK+iKTpn7zlDpVFGUSWEIrtT4jo/\n/oxGI8IooFK1MFWFJEo5PDwEseDW7euYZokb129z1BsRuAGqqlK2dSxT4vBwj2vXrjGbjRHEjL29\nPRY7qyiyjioWUASYusT161vUKnU0TSPKYl73hm9HQEIg5uqV8ywtdFCVnOvXnqEoBEyjyvbtI27d\nvMqXv/I4f/83f0caZBRpQZT52JZzxz4LJBlZmHCwfUjggW3V0HWdRIyJcgFBLINoUSDhTj3a7Q7N\nVhtJldA0hTgOSbOQZquKH0y5tX2L9fUNEj/l1vYtBEUgTDN2dnZoFR6VJYtZUMaNdFRASOEwBunE\nPfiijpbK3Mhi3ve3CX/wiTZtES5EIfb4z6Ai8G0fuMBjf3iJC3/SY+24wi/82v1I/T/jxOBrPPM3\nF1koFNLFADv9FNtfepxnvvRlFODfOS4f7ehIQsCvPHKOH3hQ5uF3nOZ73nWWYaWHqar0kphjr6jh\nZlt86uMf59GXPUIhuxQyxLlIHos4tQ62XWbQ7zMZjVk7czdHkxmmVaEoRMqlKntH+0S5gKiVGbsp\nEzekWutglSpE8Qw/6hImYzRLplJrIgoKlbJDu9aeuwB1nUmUojmL1BeXiKWYXNWRNBlRT7l55RKV\nksnlS89j2BbedMD+zk1uXnuBV738IZJwwLB7A72Ykk13sZSASrmY2+rjgDgBN0hwvQJZlgnDMXt7\nN0jCKUtLHXZ3bjIaHpHlEYtLx7BrDRBkAi/A83xE0eNw/wYUEdX2IqJZwvdjhuMDVCklmIw4e+/L\n8CJhTt0uNwmjDFOR0CSRPI5wx6MXvR6/ISTJsyeWi7/6nZ8hTuZa6u2dQxYXFzFNE8sqMZlMSJMC\n05pnIaqVBkHocvmF53AqHRY6TcI4QNdsxuMJYRij6zqHB13K1Qr1ep3RaIQsq+iadccclaLqGdOZ\nh+dFnDxxhqee+ioL7Tq2XebWrVs0Gg0URZnTpE0dRchJswTT1Ll0+QanTp4lRcDzfFRV5RN//SFu\n3bzGtWsXOLa0SuCFJKTISgF5SpZGlGyVNE3RNI2KszAfABUiURoS+AlJnKKbErVyDVEuGI272LaN\nKmsoikKz2eT6lauomki73cYLIrJMYDAY4JQMFtp1BtOATrXE1hNPkEgO3aGIN5pw81Di7nNnmIV9\nBF9CsFVmRU62fYjrJ3zLssivH+TEacr3rEq8+yUW+4dTXvunX+Ndr3sbD3f2wIFzjRRJNqGWcnAz\n4VVveR17TCESkaouTrHEQIpYWXZov+Jv+fGHzvDO793gylPXeeIvrnD2zDJPXtvh4bc9SHTYp/6P\n38yf/8l/45UnT/HWd/wiYvMYsZ+RFi5hAKaWkgsimm4g5SIIOVEUMpmMKNk2aZKTFyEgUip1iNII\n29YJgnlgTleV+dBQmxeiSnKOoSmQF/N8SBqj6yYIKoEfkeQFmqahagKhP6VcLuN7IUEQIAsZ0dSj\nc3KT2HUZdI/4u7/8AE7F5M1v+DYGw3BusLIMHGcR3wvxo5B6fQ7IzYsIVdOIohRTr+K5uyRBQrmx\niCxrhO6UWIzJopC6ZXA0GqGrDv70iFSxKZdqlDSFo2EXy6mgiAqDoz6tTousSOd5hzDAnU5xvSFh\n5M/zG5pN577v/eaRJAVBACHFLplUKhVOnbwLUVDIsoLZbEKr1SCKU1RVJkkDrl67wmjoc+quu9i6\nfYuZN0aSJBByFFmiUavilGxOnzmJbZexbZONjQ0CPyWOIwxLxy45bO8c4DgOSRYzGXuUS02iJCbL\nQVEUAHwvZDgYc7Tf44UXrkIq0O0NWV3fZDgZY9o2C4udO+WxRwx6A06cOEGe50iShCRpREmKaZdx\nnBKaapEmEIUZs9kMz5tRMIeNxnFItVZBEkBVRZI0xjAsJGmei0jzYl5nH8TYVuVOhZzAcDBiOh0z\nc2PcMCIXBdIkRhVho60jJyGqINFc6vD8xetcvrzP0fY+2y/cYu/SFhtWwLefKfjWl20QFCmSKnPx\nKCMqT1k77fD7P/7z/MwH/g0v+8l/zcLG/Vx4HA53Uyb9mHatzNeePEA8lKjlEvZIQlBT8sOc3mN7\nGEiMI50bWyWstQxFEkjGPqJlc8/3vZ7O60/x/W//Tzx7W8IunUAU539jqVSi7FSxbRNVVTFNE1lW\niaKMwE/Is4SKU0LXVUqWhUCILGZMRmMkqSBNPEqWjlAUhEFClkHg+tiGjePUERWZ8WRElkTMJmMO\n93fpH+5TbdTQNA0EgSyOUGWFbr+PokoYuk7geRSyyLXnLiCJUG3X+bG3/yij0YTnn38B2zTRNIX2\nyhKyLFNxSrSbNfI0JA7GFGmC5/n4YcrET0BUURWd0WCM57kUWYqQF+RpyP7eFrVqCU2VsG0TQ5cg\n89m9fR2nUsb357Kq4zgURUIWBfQPd4kjH9PSiZNwXjxTq309Ufpinm8I9SFNUzxPpT+YgJCwtrZG\nGAVcu3YNXTOwzDK6GpPHCkkiEkcTjKbM9u19Xv6yV7C3P0TCoHc0olIpc/nyFdbX14njmJyC7uHc\n395u2Vw4/wIlU6TbO6BdK9Mol9HW17l06Vmm0xkCCqoCulqgGyKmLhP4GUnkceLYKof9AxY6y8Th\njA9/6E95+0/8Ky688DxxMCZ0j2g3zXnwRojQTJGyWSIORWRZJIklkixF1iQ0eb7YBRJ8b4wkWRiq\nhCpnlJ0Ss9kI07SZTaeoos7WjV2KQsCd+HcGj7cJw5AwE+a9i0FMuRJjlWwWKg2Orm8z82Tu1RQY\nBcRUeUAacO4lJr6QUrElEALyApqixM20wms+dJ3jIvQSkWuFwP/+oYIfOu1y38bneOLnPsnRs9Ap\nixi2gncFjg4Ujr10QhS6PGP8MbF2nHi6x099z9sYAnYBkgJ/d/Uin3vhKr/ynR7f+ZM5gZyxufJK\nfuMPPsjekzf48JtNVl/u0Z98AVX7RyRZRuyFKIpEnsM0SKg2KiiyRiSHyKKGqs6NX0khYJYMLNUi\nTWNid4SlaXeUhxTP81hYaDOb9lFUmbiYMdhPMS0Jy2khqjKFm1CvVsjykJ2tq0ynLhubJ7h06Tor\nKyuEbkBmWvjhDC+KqHWWKJcqDPojWqvLxIHNa7/zR3GceQ+DLKvEcYMgHqPKElmeEkQFKRpxnJOm\nCc1mk4ODPRaWW4z6A0rVJrIgkRQzFLVJkcs4tkVWNAnCAc3mJuPZFKFIqC10GI7G1OpLCIJIKiQo\nskHoxgRBQJRmHFtbpeFWGQ4GzIYj1P8XPoVviJNCkswtxr7vs7Ozx97+IQVgl8qsrq+RFRl5KuD7\n4dwsoihMp1MarTrbuzuouoEgK6i6Qbc/JM/h4sWL5EmKJIgIhYgiqezv7rC5foyp20Uzcg72+1y4\neAVRLBgPxzSbTbI8oWzZCMD1y5cRKRCyhLJtkKYpNaeCP/FwzBLf993fiyjEJMmMrz37ZdJ8QhB4\nd3BdAVmWMRkNsG0TRdSJAgiDAnKDOCpwZxFxVDCb+ohIVCo1JuMpk+GUPBcZDQ8xdYUwnMttpuGg\nyQZlu4Qsy3feoCUQBEwLsgQkUaHXP2JxcZGZmzIcz4iQCbKMXI7IlAGKHCPmImmuoBoqlq0wTCdk\nUsa7X7GIW8QUGugVePiegsHNDCcx6Szq+JpOHiYUmY7kpmiuSMls0MwGmGaI3DjGe37kXSiFiirp\npAmcKHKKJMeSC9SF+6ksPUB28TO85/UmP//6U9grBb5fxlI0hqMJeRKQM6dIW4ZOuVIiigIOe4cI\nRYbvzYjTiCDwSNKAIk0YDQ+RJIFqtUMQRYxGI5IkQVVV8rzAcxOyVMQ0qximjSyoKLJMHIQstNrE\ncYxlGTSbbZaXVgmDmKWVDfYOekiqRYZCvbmEU25hFCKipROnCZkX4Pshi4tLqKpOjoiqmLhTH0Ux\n8MOImRdQcipIkoSoqFTrLcI0p96el5wvra4wm81I0xRFKiPKIb4/ZDZJkJSQSq3JzJ8RJx6yZiHI\nJmvrp7l9e4/JeIYqCiBIWHYJp9qg3VrAnfkgypw8c4ZcEFhd23jR6/EbZKawVPzSj76GpeVFqtUq\ncRx/PQeRZwWWZdEfTNg8ts6tW7fQNRvDKpNlBXHiURQJTrXG3t4eSyvrWLpFrVJld++Ag8NtWu0G\nw8EYUZzj1jWjQhylSLLAZHTI8ePH2bq5jSiKTCYjJrMpi4vLVKtVDEMjz+cKSe+oj++FrKwtU5AR\nxyGf/+8f4+KF82RxgoDKbDrEqdiULJUsT0jjCNNygHnsm1wgSRKyJCUvEiRFxLZNjg66VJ15a45A\nQhAEiGoNVZ7nQixdIEvHRFHCZBQS5PMTRzwT0XWdkiOzsLhIr9fj0Ve/it3nnubxT11DF0FO4dzd\nK3zusQOkUOCuExYvOR2QpQJqofHscxMOxzBEJ0/nce4/ThUych4g4//8PpMbl32WTkOtpJDtKfTc\nhNZmRkaOKJr0RxGv/GdvZDy+SKHcyyc/9TyLr/9ZHLXN0z/2Nh540wP8w0efIxgKyFbG97+jIOy1\n6Oc1WrqP3A4QpDLHvu+3mMlNEAuUQkI2BBJBwnVdbMMkTzMkWcCbjCnbNnGa4kcZnaUm3niK5waI\nqkC/f0il0mDUH2JoGo1abW5VNgymoylJnlG2bDIyEAV6R11kUaQo5m5Fq1yiKDImkwnNxjJRnKLr\nKnky7wo17SpbO1dZW1lFEnWSJCLwPOIoxXbKDEcTahWV8889S6fdplxdIM0hTWOiKEKUBGzDpNVo\ncXtnG02TGY8GrB87jedPIU9RJJnJZESTNY8AACAASURBVELv6AZ3nT7Dtas3WFzawDQtrl6/yvFT\n59je3WNtYw0RmTzP8YMZeRIjiBmmbuH7IZHvoSoiC/e99ZtopoCA4zioqkq32yXPc7rdLjdv3KJc\nLnN0dIRhaoShT7lss7o650aWyyamqdPpdCiynIX2IrqiEwYx/cGIUsnh1Mm7yQVIiZFVHT8QkFUJ\nxcip1+uYdp2pG1BpNOmsrHL67nt59Wu/jTOnz2LoFt1unywt7shbJVRdQZZVbKtCtdJh5o4psgin\nbJOkAWWnhCiKpGmGphpohg65iGXY5GnGbDYh9AOKQqDZWMC2y/R6A5aWm3PWoSBRiAa5LCJLKYKQ\noSgScZKhmwvIaoUwiZGEYr5AJAFZU0kygds7u3hByHA2YXK0y2IV2h2Zl5+r0b19yCwtCJWMi9fG\nPHUp4vmrAo894XHrAMIIFC0kryi85Q0rFEWCWmTczEUsBx44V2G8ZbD9VEIv9mmcMUAREUVIioLV\ntso/f89/xw0bkPR57cP/GDtP8TWB/ZHEFz6xhaUULNYKGrrMF290eNf7j/jZ918ml+M5Lk3TiYqM\nmBwvCNHLZaaej5QKEOWokkyS5SRJwvUblxClHEkSEWWFm9cvUxCSpBECMqZmE7gezXqdhVaN21tX\nMC0ZkTngtd1oUiqbZJmArtmYps3S0iKt5hKGXsYyK0wnHrblzPmPosjO3i6u6yOQMxnvk6UicVSA\nkPDU008gyxKVSgXbtqnValRai9Tbi9SaHfYP+0iySqPR4sTJ46wuLaMo/w91bx4r237V+X1+ex5q\n11xnPufec+d73zz5PU8Y29jYgMMUhoaQbroTLEQradKk04DSQU0jYkQCUcighog2JDSToWkTGiJj\nw/Pz9Mb7hjvfe+6Zq07NVXue80fdJo6UmBcJRe4tlVS1d/1qb5X2Wvv3W+s7qHSPd1heamFXTBqt\nJn6QoOsLCXfPjwmjhHp9Gc+P0c0KQZigqCaOUyVNU1aWOkyHA7pH+5T5AgY9Go0WRe44pFFf3OOl\nrL7tePy6qCmUlCwtd3Ach0qlQhwlrK5tcGb7/MJc1TaR5Jw7t29QsWwoBBXLwp1NsSyDJAooywLT\nMJCVjH5/QL3eYOpOcd2Q4eiYc2cvsLd/wkMPX2A6GxJFIdRkVtc2MEyFXu8Y2zYpygxLMzk43GMy\nnlGr1TBMbVG9VqFes6nWDPJMcHzc4/69HURekOc5SBF5qaAIAySdrJCJYpk88fHjhDRNMXWdMs+w\nLBXPm1NtVLl8+SJRNGc6nTPoT1EVEzeI2FxXmEwGqLpFvz/n/LltVEWhVnOYTufkBRRFRO5nKLJO\no1Gj4pic7B6g5CrrlZBSzvCnE/JY48pzTd764gmKBLtHCrmkkuUZklkhzRO2DIX/uhfxxL0+rUKi\nRDCR4Vf/lwCZBFsumcfww//dJ8iNDe5/+kcoC5XLpwR5mvPGSzE/cvVFfuP338vV3/unnPvIz3Jw\n+/NU1JzqbITREnzoezoUSsxDvzIkWjgjMtFHrKkpU7/FVtVAVmxCRWE6WUDPJQos20CIEkVXqDtV\nzl64wmg2R0agGTVmSUT36JhGtUPdLukeubRaTe7cukZZpKyuLePNZ8yDiEa9g+tNQdKxLANJyqhY\nOnt7e9ScGmkG9WYbq+LQ7rSYjnz8MGBluYPICqyqSZLOcJw1FFXi6GiHKxcvEQQe08xls3Ia15sx\nGHaxK1UkRWd1YxNJ5AzHA5JuxPBkwOOPPUkpCfwgIkkDVAFp5CHKgjKPURSL09tb5FnJzs4OZ89c\nIi9gHoSsrG0x82LyPKXVbuG7MybzCZXKotsmiZxe9xDbdZEllfZK623H49fF8uHhC5vlb/z83yOK\nXdqdJvNZhOu6rK6scefOfa5cucTMnePNZywtdxgNxwhZoV6vkqYJQeixubkAnlSsFpIM/X6fZnvp\nr6ZPcRyzurFKlMQ0Gi0CP160KG2Dt968jmEY2BUTx7JxHId+v4/vh2RpRKPRoNvtsr62QRiGnLtw\nkZdf/gpTb8qffep3kMWC8ivLMkkaUq1W8dwQigLPm7OysoIQAiGgyFXC0KdVd7ArFitryxi6zJ27\n1xmNJqRJhm3YDIdDVtfazN0SSzcI4z7VSg0hFMbTMVFoUxYyvf6AKIRTpzusry4o15mfY0YnOFHG\naFIip4s23uajqxS5wks7Bzx9/jRW9wBRWiQi5NELErNJwjyQ+Sd/Dq9pOe1UxVNTvmX9aZ599v0c\nPP97ZMe7bFgqj37XR3jPu47ISoNn/pMv8gffLdGbC57+8ONktka1rsPxjNt3D3CPqpjWGlp+zOPf\nvA6Gw+vdFXZ6xzz9zm/gt/77X8VsZZxMI37qE7/KPS9i6/QWerm47lKVIdeglHCDEYapICiQCg0o\n6PYOcF2XixceQlMMVDXBjQwkWUWIiP7RfRRFIUljttfPMxz2UK0FIKjuaMzdCapiLWYiaUy12uRo\nMGBzfYUizwnigrLMKckoSol6bfGUTuKUOAxRlYU9YLVa4+j4HitLKxhGBcWUKQvBfOJjGjaFpBJF\nEZQp9WqFfv+EpdUVZqM5VkVmOByy1NyiFN4Ckl/roKg5abJ4oKyurfHmG1dpt9vkBVQqFWbzCe2l\nVYpEopAkLMMmDkLcwKe51GQ2HdFq1JmcHNF5+N/7m5F4//9jK4qU494xa2vrGEYTz+/SarcZjAY8\n/sQTjEZTFNWm1jGQdJVzly9y/84esmrTbK3j+yFFoWJqJrduX+PM9gZZGvDmay+zsbVBkkUU5KhK\nQRJlTMcTWq0OBwd7mJ7OSqfK+vopRuMJsizz6msvsbK0yspSh+HohHa7ycrKCq+//jpRFCGrC+27\nQe+IskgQkoyhKYzHE8qyIA5CNE1HlgSNhkaRpkTJv+1PB2iajKbL1OomWZbgpzAcBlAYSEWC74es\nLHeIohDHrlLkJa3GElmWU6s2GI3nDEZjAh+Wli3KmiDwQoIYqvUGliqjTo6ZD0pKy6F34BEFCmU7\noFnT2G6uYY6PWFqSOfBc1pIKV1/zMdsKvRsZH2pCw5d5AdDQ6CoWo8c/SPuj7+X0tasc/8bPM7z2\nBdztR2Ct5Ge/e4Uw03jsBx8ijHvcPZC5LJ5ivPc/UwnghS+NEfkOf/dHV/m133yNf/Y5j53fey+l\nblNWUj7z0n3Wz5rUNjvYS+tsL5f48zGDYY9TZ84z8Twso4ooJZbbHWbTMYqso6kOiqZy+pyFN58R\nRRGxyEjLHE2VqFZsRCGo1htUKiZRHHD/4ICN7TXiJMRaGE/jVKoIdOI4RlEFYTqnIKA/OEIAhlnB\ndWfouobttOkdHWKaJp4/R9M0IjdZiLYUBZ1WB0lRmbsu+TTk+KjLww8/jqYJwmRCWWY0Gg36gyOS\n0mfYz7DMKr7vopsOqIsE017qoMgWaRbSaLQpsoz51KfVXKPRaGE4Ha6//hIXLp7htVeucv7CBu48\nYBiXWIaJYZmc9EaIIqd0JGSz9rbj8esiKWQZLK+tEcQRUeRj2lVkSUUzbHrDY3TNxjZMvCjAsOqM\nph7VRpMiC7hxc5/Llx9hMhzR7/c5f/Ey3nyMrlk0Wm2WVzcQMoRBwNyNGQ/mNGoNTo57JGFElgQ4\njsPzz3+Wi5ce5u7dOyx3luh2F9ZgQpTcv3+f+dxjqbNKpVJhd3eHPMuYjMeMR1MUCXRdo16vgSgQ\nskrkh6RpAiLFqCi0ajaj0YiV1WUQMZYlEycBzXade3fuoaoLgxDb0sgyibk3x9AdilxCkQV5XBAn\nEt1gTJbLdJYUVFUlnCUouolkSlSdDmnsISk6SSCoVMGdu2xvW+hCMAtcbh0VJCU8dGWTk/4By089\nxuDq61x8ukb9MOdfT1JOqxWWlur8zPf8GH/5+ud55ol34ekBj/ePefOTv0I1COlsPMStw1f50JlH\nefRDqyyFJUKNaM9Uah/8UT71JzewvI9xkT/i0sWCZCr41AtDPvk5k5kk8Uefd7HWNtm01/knP9Bi\ne6OgLy0zy2dU7A6aU6Wqm/i+j207KEJCUxQO7t9E0WyKMqTaTDja6bG9+Qih76JoOs1mh35/gKRH\nHB/eotNoYlkWM7+gWVtG3ZARSs5wv4ulGljtZeLExZ+NaC93INfJS6g3HEQpoUgyJ4MdnKqNKivE\ngY9tGRRFyuryGnGSkVoF7nxOSoZZaeG6IXlpoKsFFy9dYDbp0zvZZ2V5A92sQ2nSbGyRpAF5FhNG\nPqoqoSkSshSyd/8uZ8+e5fCkx/b2WU5Ojmk2aiBSTFtlNO6RHB+jKgXzWZ/lpQozP6IsJZaXqsSh\njzs+Zuv0Jv3+iH5viFH5dywpqOqCLjrsTaloNWQlJykLTnpDzGpJq97AtHWm7oz5aE5WpDhVg3Fv\nytapswtRiTLh8pWzhHHJ7t6Akozz58+S5QKpyBmP5pw9vc3NG69TrVlEccZoOuPxJ55gOByyfeYC\nnj/l3LlzyJJExakhIbF37x7Lqytsbm4i0JjNXCaTCbv3bjI6OUZRFFRZAILp1MXQLYRImc7miLKg\nKEvKsk+c6DiVFlEQIskpkyil3Vnls5/9HBWzwXgcYFoyk8GUJAdNNSjKjDQdoesq1YqFVZEJwoWp\nh++Cpgn8MKcMXOw6TD0ZKRdIjoI3zcGAx84vZiEVVUXXCmo1iCoSp3/gO/jUL/8KTx4dU91QCMYu\nw1lBJQFPLXnvs9/HndUVPnD2P2D5/lt85R9+D127xMwKBiFMzy/x2Pf+NJ8/mbMc3+OtiY/72pdp\nXn+V8Wd+kNq7fp376jrHa/8FW8n7ufiwy3NKzjBOuPmlgq33f4h5/zr/1d//L1lrpJzaXsGcd6mt\nNBkcHtOwO5SaxnSvRw2FRLhkpk6ltsTcHbO21CYvFRpWg8lkl4qRohkaURwiyhmS3ERTBKUomLsB\nimoS+nPyJKBMSvbuvchDF56kiFTSJGZ5qYbnTrEqJkVWICsqaRyRpxmbm6eI45g4jtEVHUkUjCc+\nkjLHMAzyRKPdbhNGM0xZEIgE1VKZjFPOrpwmLSdc6GyQ+ifcu3udK489wmA4YHV9jdiDie8CsLS0\nwuH+MefOXmQyGaNqgpPeMYUkU7B4aKiqgeMoyHUJJJO8KLBKDUNTSbKU/sij3W7SNOp4bkq1vsKb\nb77J1qnq247Hr4+awsXN8td+7m+TpYJ2e4mijEgTQVlAq2Nz8+Yt1tY3F7LdrkutWuVo/4h2a4l6\nu4Oh68RBSJJE5Cz8DYoC0jTFDz2WlzuUpSAKE+yKjuM4jMdT4ijDMAwMw+CFFz7Ps889QxJnCJE8\naCm1IBc4tRrz+Zxms02WZbz84pc52t/h6isvkqQZ5YOK+HKrQZKHCGlhI5fGGY7VIEp8ZFUhSTJM\ncwGscSo2cTSnLGSKUiJJElRFJor+Lymx4cCl2bKpmBZ5EWPoFSbzCUW5UKc2DYU4lilylUIE1ByF\nWqWJsBT03UM2V0ycNMFRNCaziGhFJ/IiLAyyM2c4Yxlk4yM4nrO/X/DQQwqf/sOAqQqqKFFkA/yI\nFQX6JlQLAyHlPPeD386OPeTDH/s4wWzC/vXP0DEf4c3eAfFRjw+2Ps9JvMX6+76f3y8+yvpn/x7n\n5TfRpBLF6vALnwz4/YGHKUMzB9uBK5sWz53X+Fuf+E0iP0I2G7Q7VXp7BwtEp6Ezno1xLIdWs06v\nd0y9scRoNMKuL6PJi7axH8QsdVYIAxfXG2FXDAyzitBMyrwkSTJUVUYRCYGfkBY5kefSabaQNZ0g\n8JBVhTACzVhQsI+O9kGWOHvpMvE0IEEgCwlvdIxpmhi6zUn/mEJKUNEQkkRWFmhmHadahzJn5+5N\nzly4SK87pFWrolBydHCX848+zvCki207SJJBEE0W928OvudhayZxniFkiZW1DY4PdtF1lTiOWdnY\nYjAYIAOSJOF5HrpuIZCxbZuiiEjSdCHpr1tUt9/3705LsijAtGocHnWZzmbMXZ8witjf3yfLFE6d\nuoCum1SrdXTVwJ37UEqkucTJcMhgOCTJE5xmFVmW6fV6BEGAH8Q8+tizNDuneePaXaxalUyozGcx\njfoSuq5Tq1eQlYJTp85QZDp5puBUTzObF8hajUqtxdFJHy+KMSyT/qBLf7jL3OuhahlBOCNMfCzb\nxAt8kgzCqCBKcqK0AAXCNENSNOyqTZQmSKqCF7iEcUSSSIwnPv3BnKKEApkkyzFti+3tzQUmX5aI\nkoxSKlFUC9/3kVWV6WxR+JLMkDAoKIoCP54yn47ZaAha1ZyKAUIk1FSNui7hrFRJ1nXicMRxoHNk\nnyGvZxzaKq+u/QLf+R0l3/tRlShUQEvImwo5MpoEaZFSSCov/Pbv0/sXf4E6vs69WzcZuAVb715l\nnDTR3vef8erx+/iC9Hf4N/m/jykHWN3XyW4qDHZKprtjfvp7Pab/dJmfeVxjRZf4xW8TjCYBYX0F\ny1ZptqroukwURWysb6FrJoEfIxXqYspdFGi6jVAkgiSmXjM5ONolzTz8yT7do5sU2ZSaU0GRHKIg\nxB3sEc+OmfV2ufn6ixzt3kZKxii5jyISxpMTwjhA1STGox6aElKmKYP+Ea1Gh5XWCl5/TOhHlGFM\n6s7RdYPp3CWnRDNMlpc2qS1tYNdaJHGOZWioioTrupw5s820d4ROxPDkkG7vkOWVDUIvQxILa7fp\nvEshDEx7ibQwaDZXOOgd4XrzB4A9j9nUw9QsGtUmoevSrFY5PjpC5BmqAFMrMbQE3+2iyQlZMoXc\np0zjtx2PXxfLhyJPSeOIqmPjuTM0vWR1dZ1G/Qx37t4gTEI6jQaN+hJFIbG9fZYv9v4SNZKptzvY\nlolAIc8kXnzpFTTN5P7uPt/4je9n9859RqMRV86d42DnJpcuP8R87rGz06NebxCFKUVR4Dg2ceKi\nmwqvv/EyqiEh5IJbd2+xsrKCpMgcdg+oNarUGm0O9g4Xhb1KDYkCWc6ZJx6O6SAyEz/MgYKZF2Na\nOnN3QpkqVBwDTRHEmYxQDJyKjlBTnLpDUcQ0m9UFWCqDKPXJ8hRbMclLCdcNcWc5iiJxMkgW+gxR\niFPViH0DbV2gagaKnlHMQ0pZgTwlzEtKpaSbaOxPc5YaVdasCqVVcu+1e4wzGyebcq//GcojeGor\n4z/+PouvXE2YTxLGpo42zZnIOU4ZoMoSoij4X/+jn2X5/Q+zfHqFT/7mH2IdhSSdJzj81k+AP2MY\nj3muO8dprzMuTji/Db3jnMyG4+mE1Usmo9dmRELnqXXBh54IUOUWc88jmE/QbBt72UaoBrZaYali\n4s+OmIzHOFaV2Peo2TayZqGaFYxKlQ3Dxg0KiqIkLyWicLYAIznLDAcnrLSWaLXaRLHP1I2otZqo\n6Ji6QpLFyKJFtSKoWBC6OrqcYGgCSVpgZ8pyYcNWqVRwKnVkWSeMfaLA5fhgl2prmWazzsrqMpam\nEcyn6KLgpNejVWliWhLjyQklCvPQR0tCmvUWc3eKIRUMhzvU7cuILCBLTdrtNrV6ncHwiJYG7XaT\nUggkAWka43lTqraFpBnUK3WSXMI0dDQjYTLqsbJxil7viDJ333Y8fl0kBVleINYURSIIAiZTn9l4\nThj4vOvd34AfxlRqBqPhFFVXiNOEKE5wvYhOx2A29dB1DcdxeO65dzKZTDAMnV6vy1Jnnc3KBv1+\nn/X1M/RHQ1rN5cUNkxS4oyFLrRaSJGg2G9y6dZsLZy8wGHbxZ1MaNQeKgma1zt3bd5jLgsSPmYyn\nDAYzTF3QqNWJ44jKA/hsmIU0bJM0LbANA88fYSgasqIQBwl5thCYRS4I/YA4TLAtjaKQcWcBqq6R\nlxmyJCHKhYeEpRqEkUS9JpjOQ7JUwjQKLFthPk0wLdAMh6zI8CcuirEgmuWUaEKQaQWOmrC5ZOAm\nCaFcohKyecnAKM5RFHPuzCUsVUahwE88nn6HA5hIbsQ8MLnTzZh0VRjmlC2BLUUsHV8jPr5F55s/\nxlpV5yvXf4fMuUBVsTDjjCM95rs+/gluPf+79K5/GttQkYMcYaRYuQSFzvoyfOfZDDmaMEsjtIrD\nRsvh/sEBldCnsbzEtdffRJIFp5frIBv4osDQBL3DHqtnTqEpKkf3blPVBFq7QxDkOFaLMIuwnUUV\nv9VeYhZMuXbjRd733g+jmiFOpYHrDhnPphi6Q5YP0OSM6zfucebsI6hIeGGEKKBWa2BYNqEX41RN\nkmyKVSaoskSj6aAYEvV6h8l4SKIJosREUw2CMMZpVAnCDDnXGE9DVldXkfLFMnM4HqAoCvfv7bKx\nfZlu7wQATVPQNYPJeEyntbZgWRYFSZKQJh6aYWEYFSpmhdF0AEhYVoUsKpEUwXC8h6ykVEyHOHr7\nBrN/7fJBCLEphPicEOK6EOKaEOI/fbD/Z4QQR0KIqw9e3/JVY35SCHFXCHFLCPHNf9050iQlS0Jm\nkxFL7SZnT2+T5zlxkvPKq28xnQV85s+eZ6m9TJ4mTEZTilRjc+MUqqqwstpiOOrROzlCVVWazQZ2\nxWT7zCZzdwyiYHVtGSEkkjAhCSOksmRrcxVdNvnSC6+wurrGdDYmSWKOD7tsbWxyavM0g5MhhqZz\n784OQshIhUwSh4TehJWlCrouM3Yn5FJOMEsp4gU8VxMFulQSexPKXJCmOVEWkhGgKAlxmiFKi7JQ\nFhz7ZEaRh9SdCqQprVoNbz5Dk+qksUaSZ5SKy0HXJUxN6o2CdrOBVbFpdRxSMiajiP7xHFkxKMoS\nhGAugdS0yas6WpxQz2NaSByMEo4GIVFiceOei5g4vOP8PR6vlwSGRO5IBIULRkSgxxgXYp58POex\n90g88X1XII+YXvlerv/QVfx3/2Oe7f8xkn+LH/iwzvvKT/CBw1+i/dkfpfLqX/K7/+2XOYweQ6rL\nmCsCIZWkMVzpFPz2j6eoekwly1ELGTNWGB0fkZQOjdYGUSyIw5Bmu8m58+fpjWbkhYQuGShqjVOn\nz5JlCmtnHuHcE+9Drp7D0tfIkwxdVlhuLqEqJrKkY9VqSJLB009+hPEoR5YrTCY9ykJFkSokcYZj\nmpz0Pc6c+8ACcBYr6GoVSdFQNB3X9SllCT9OkOQWmr6KVVtB1mpIosrMTbCry+jWKrXaOlmuYTsN\nVLmGYkrM/JAzZx9B0esoRg2ntoJVX8KsdTh78WmqNRPbrFCvtiiKgnrLQpZl4iQnLwRCCLI85fhk\nTIlEfzAkSGNa7RVa7SWq1SqaZlAmBRfOPYGq2GiaglH5GzSYBTLgH5ZleQV4DvgxIcSVB8d+6atN\nZx8khCvA9wMPAR8B/kchhPy1TqCoGltbW6yuLoxRu8MTzl68RBjnbJ89hx95PPnkk3zpS1+iVmvQ\naDR45zvfyWB4gmEYeF7AqVOn0DRlUWzyfYqi4MaNGwgh0+12SdOYwaCPbVoEXkC/12c6naLpCqe3\nt7h9+y6T0Zhmo8Z0OuZP//RPeeONN9B1Hd/3CYJoIRMfuA/IKwVRlFAUBbWKjaIo2LXKwrFJVZl7\nEVGSkZYgKxqKpjKfL/QiwjhatMlmM+azgCgqKAv5AcR5oaoUBAslqSTJFkIgsoKQFFRDRTdVKhWD\nKPZxvYCskMhSKIqCgpIkSh84TylIKriJRy5AoUQX4BgR680AR/HJsxo/+s9+gAsfqfMNH/jb3JAK\nhqOFQEyaQ5AKjGqdoLqNJ2DpQp2V91cYzwSXntmmUh2T6yPmIiPZ2aH34ls0DIf6+7b4rh/+IK1z\nPh/+8Ud55rsfIioL8iwBSZAIhSiBUkBYQkyFMq6iaVXqzRpCKjFMhZnvMhqNEUIQBAFOrUIYuWRp\niB/Mcd05x919ZFEwGo0wbQdZVlE1KEnoD7p43pi93bvcv/0Wc3fMZDIBkRMGM3bu30OVSyqWTrNe\nJQgCKqZFGE0wNR1Vk4mSGNcL8IKQosjI85CyCPH8KYatsbt3j5k3I0kjRJkyGp2QxAGH+3v47pQs\nCYhDD12GpXaLuedSliVxFBCFBUUumE09kFSOjo4W0HghyIoSP0xptzpkWfbAR1JC1wyuXHkYRVE5\ne/YsSApZIYiSnLm7WCY0V1fJU5fJuE8QRGSZ9jeXFMqy7JZl+eqD9y5wA1j/GkO+Hfjtsizjsizv\ns7Ckf8fXOsfcddm9f496tYahV9jcOI1tO3zs27+N2XTASruB53lMJhP6/T5vvfUGk2mfeq2J7yWo\nikWa5siyiu8vhDWKomBrawtNFzz88GV0XWdpaRV3HlHmCTWnwtHRIUtrDTZOtQn8OYpsYxp1bAcu\nXjrHaDT4K6ksWS6J02SBHzBNkqxAUgykXCaaByRuSBbPiTP3gemLRlRAKUsEccTMDanYdYrSpKRC\nGgUstZpQlJQpkFew622+8/u+jw999KMYRgXTckiSCFmVmM1CphOV427KvftjTo4jJsOcQkj40Ryz\nWkHRZAyrJI1KQkUiNW1MRyMqBAEKRaowGlZ5bVdj7jawbJWKfED3y6/hDUccf/FTPPOxb+PDP/6P\nkGIo1AbGyjojf8q7fuAnOHILTn/rj3H6mR/jF2/8Ouabv8Q7P/VR3jX/daR5iRIbSPf2Sf/oN3j9\n9/938lLm7EUTL0uZz/bYfOhj+AUopUCWSzwrQTIKapkEsksuR+RGQiwUVNXg1s7rNGxroYKlmiiS\nhm52aDQ3AIW8NLCrK2x1Nji4dZWmnZIVLkHqIykaUZRgajqKkDizdZHtU+ewTIOKbRDHE7Ii4amn\n3kOcxSSFT5x4GLpNrVoh8I8YDk4oyph6vYGk6MRRSs2pEHoukZ8yH46ZnhxRsQxGvT6tWhVbkziz\ntoouCsoswFBBlVIsXSb20gW3wbaYBXOqNRPLypFERNVRyYsIx17CnU8oywXbVpU6BEGCrqtomkya\nJQyHQyaj8UJfIsopC43eUR9v5pGlMaYqcev1V5BkE9OqYhoOQTh7uznh/1tNQQhxGngC+ArwbuDv\nCyH+Q+BlFrOJCYuE8eWvGnbI+CUI1QAAIABJREFU104i1GtVZN3k7u4Rjz7yGAUBd+/cIEkS3rh+\ni3q9zrd/7CMEYUSjsY4QDvO5hyTnWJbD7u4xURTwxBOPc+3GSzQaTdzjaGHbfXLIbJ4xGJywdWqN\nuEhQc5tWo0qlaDDqeWRZxmNPPEwQTTg+7FN1lmk0HOIo4eTkhGq1CqJkbXWDo6MjjrsHJEmCJFSE\nalLmECUJgZs8oID7OI5CludE84RqzUYWEmUh4c+nbG1tsNTuMJ354KpIsuAbPvAEDz9yiX/9r/4P\ndnd3icKSMteRlAKRB2QpzNwZ7baBUFRiP0PSBGFSkueQJ3MUoVBxHOapj35mlUmuoK8+glbIuPMj\nZqJElVeozBOyakg/mtGprzDJoOKscbHT4It/9C94MfszOq0aj378lykEeH/wy3zxF3+Ub/rhf8TU\n3qI4eItg40ku/uOfpywM7h32OXO+zZqUMzw8otlaYVurkugSxliitAryJMY49y6cUuPg+qdpFBFp\n2uTCB3+Sl//0J1nVFZTIZuydkPgZvi4429lE0iTSZMpsOqJeryHLGrKysIQXRUg0m3C7u4NuWoyG\nc+bzORXbwDQqSJKO688I4ogkOKRWaxBnJc1GB02TCH2fO7dvcP7SWSb9PigqmcjxowBFqbCy1CHy\nA7IsXpjyZBmzMKXe7ixa4+0GhrGoaZ07dwkhBLpVpYhTsixDU1SEbDCLpqzU64xnEzYbHeLQRc2n\nDPf2UIwlWs0aUeQhpyFRkbG2vEKv20VRdeLokCItyPKILFugH61GlTSLKYsY3w+p1+vEoYKuq9iW\njqwb1FpLZIqC5jj4SUBF+xucKXxVQqgAnwL+QVmWc+B/As4CjwNd4L9522dd/N6PCCFeFkK8fDKY\nkmcmW6fOEMYhr770yqJdWG3wjqffjSw7HPcGvHX9Jr1Bj8PuPpcePk+93mDn3i4CGdNq0RsMqDoN\nxqMp1WqdtbU1Nje2kYRKFGZomsZo6BKGMZ/77PP0+32ODvcJA4+XX7zGretdonABeJEVCT/waNSb\nCCRazTaKolGtLhSPVFUlT1O8ICDOUmRNxXJswiRANXRcPyPLNWR1YVybZgVe4LO82qbVqaNqGkfd\nE+benJk/50tfucYv//InuXnjPmmm4scxUR4RRjCZpiRZiaaqCzt0LyRDEGUleZmimxZ+CKAwm/sI\ntyCKMnQSjOYqgygCtUHroXW0tYKN8xZ7d+cMD2Xu3xGsntlmOD7h3/zZZ3jnOx7DEhnjZMr4+l+w\n8/JfEHQuI9fWGB29xODOX7J/cJdsPCDUSwL3KpeqY7zDA4L798gGLie9ESdHN9h9489Q0hOUcIyU\nZEjOIxgXvoWt7/iXTAsJ255w9c9/CtspieQSX42oKjKnNtYokgV9PM9SKraFLAkkIcjTjCzJURQN\nz59Qq5qcO3cGQ1ExZJ3l5WUaVYMkigiDOe1Gk3qtga6rBIGHrqrcunENz/MosgxJTvGmA/b27lOW\nOXu7R5h2DdtpsL+/S5wsxGPz0KVeMdAkQZnmGIpGFifMJ1NEmSFJBa4/5fDgLq43RNcEig6NRg1T\nreJ6KbZmMuid4Hshtt0iTk3K3CNOMrJcUGksU0QxO3fuIrICkZf0Dw8ZnvQRJeRZwmw65N7dmxRF\nhu/OsHSNXveIpVaVxJ8hFSm9431a7Rre+ITB0S55vPBWfbvb2/qmEEJ9kBD+t7Is/wCgLMuTrzr+\nq8AfP/h4BGx+1fCNB/v+b1tZlv8c+OcAW6v1cuaPaXUqBGHEycTj4OQa589foNGscvHiOiudFS6c\nO0O1YlF3zvHma7e5v3uL5ZUmO7seihZzfy+jtdxBqDL7x4fI6MRxRFloIBSuXb+JYy9RbTk8+sST\nUKr4wcK/QZEFlmkQhwE1awGKWVpeRtYrJJ6PZVc5Pr6Hoi4KeoEb0mpXCdOEOA9JS4lS2ICDIks0\n2jpzL2A2HyNJEoquk+XQO/EZz/YJ3SlZKiGEilQI9vePkIROmsZQliQp1Oo5cShhVprM3TFCVhmf\nzNB1ndv3Y1QVTq/pREFEWQh6hwkgaDZVokTDnwm+6T1tvOiE7skY9VWfWdxDMh3WzwtMXWXn2j77\n93fYOH2We90+w1Mf4FTnaXbu3GKeSqRxhmZHnP/w32Vn9y3e9573MO/HRO6I0f0uQmkxy1NsVRAU\nJtuPP0FUaEwnB8jTI5I8IJr7yGaN1P0CeWlTtRLkp3+a8+/9LuZFn+Fvf5y+e4hVl7ErdYYnJzSq\nLaI4BeBg/5B2p8He3n2GgxkXz5/BcycIdGRF49qbr9BpOCQiYDbLkdMEbzyk1qwRFRqVWpvVtdPc\nunmdyWiMU29gGBZHxz02tzeI/JAz5y4TZTmbmx2KNCCOS1ZWTxEmMQcHOzRbjQUgSauQC7AqNbrd\nLrKs4Lkhip6iqRbkEYZm4s5D0jxg0LtP7Lsolkmp1CjykpphcdI74fyFC0xdF0G+WJIEEbKkkech\nQRxiCDBNm1vXb+DYF6naGscnfba31xkOhpQiIjKqdI+HtGwTTSoZdvdIspy0YnO8f8jy8jLzyZjI\nf/styb8W0SiEEMAngXFZlv/gq/avlmXZffD+x4Fny7L8fiHEQ8BvsagjrAF/Dpwvy/L/tSdyaq1Z\n/s7/8J9TtRfS6l4YIlAZT1wOD++gyBpZ6rO9vc1kPGNlZZUkWUzVZ67PyuopanWT/f19anWHKExI\n0xzHqXFwsJjqP/H4U7zx5nU0XUFVU3wv5N3PfZCXXn6BMIw5d+4MSRQwHg9Z7qwt2lhpiqlb7O3t\nAbC2tkK3t8+//I1f46QfICQQ5YK7IUlgGiqaVkHVF0Ai1wtBGOR5/ldw6E7doigzJjOf2SxEkhTy\nbIF0/LcAlXq9ShQmSGpJGmeoqkoUxxSFhO/nmBUddy6R5BmGmlHmJaosUOQSVZOwjZLtizaYGk+f\nvkK70yBIQ27d3SPLY3q9HhvLEhvtDprSIPBlvMmE93/sozTWtvHmBQdHt9g++w7G4zGaWcdUXWa+\n4PGHT9O7exO1tcQg0qnX2lDmzIaHGPExYeTjS6dpb57DUA0m2R6G9AhKfoQe3Sfy50hZxmw+JM5S\nbLtFNuky9UKiwuLZb/07mE6NNI4XsmxJSKu1xNWrr7HUaWGqAs3QMSyb2WiMYehYlQpH96+TpinO\n0jqdlU2GJ0MgYT5zMU0b29RI84JGs871N17Dth0oZQxT4DgORSFxeNAFqcCyLGTVYmlpiTxPieMc\nz/eRRIkqSuIkp9VqMXYXCbpimYRhyHQ8WjAYH4TUaNynWrFQZYVbO/tsrm9QktGsV8jzkvs7e7SX\nVxBFjhACy3JQFJnJZISmK8hSwXQ85tTGFmES0Ts+RNM0ckqqdoUkCbAsi8FgQq3aQEglWVxgOxWe\n/8LneeYdT5Dn+UKlrN1m+bEf+htjSb4b+CHgTSHE1Qf7fgr4W0KIx4ES2AU+DlCW5TUhxO8C11l0\nLn7sayUEAFkSjPoDvnjny6yubjIcDzBMBded02g0aTbaeO6Mnb0ek8mEeRDhzidsbC5xsDvnuL9L\nrdpmfX2dG2/dpuLoZFnGfDrj6qsv88w7nuLVV77EUmeD3skxfjikyOFP/+TTCClla/MM3mxOEvts\nrK/x4otXUZSC5559J6+98hUee+xR7ty+yeFxjm6odE8CNFUn8FPQdZAkyjxFk1TyEnw/ZjqPkCWZ\nMveIEjBUgaIoTNyALMvICoEiVEhzqpUKE9cnSWIcW+G4O6daUUjjhRFpXkAYleRZjqRKTKcJmiXQ\n8pI8A92SiaMCSaikRUpY2oxOIlbWatQfu0hrbZ22F9DfvUthpZSZzcRvMPQlkBJa+gGPPbnNziSE\nSUAcHxDPDvHcGMmyaLe2mUkRWhly640eI3fAKTlEDXXmk31sW6cI56TJDHcyQa8rzA7G+GKKplWx\nbZl+f5/myhaoJpmYkTYMjCghjnzkWhtVi4mzEFnJiKMZFCmiKKjZDp7nLty7/AC1YpLmBTW7SjGc\nIEkSApluf8LKyhJkKfNhH6nMyUuJldUWd2+8RefKU3iBz/7ubVaXagz6Y5qdTSQlxvdmeG7IdDLi\nve9+BtcPGY09evt7ZHlCe20NTZdIkozxdESeZdi2ilRktGpt0iJFNzTWtrbw5j5lvtDNqNWWCYM5\nRtXkmWe+iTT2EVLG3u5dNtZWuXDhEkI26B7fpVa16HZv01k+h2naVKtVJsMuZ7ZOkWcF+/sHrC6t\nMp1OWV1fJ0kSvCDirVeu8swzz5DFCZpqUBYxGTGnz6wii4LV1RX2kpTDvf23EepvMymUZfkCIP4f\nDv3J1xjzc8DPvd2LkGSV/rTHN37wPcznPjff3MWPurRXLZJ0i1eu3uGxRy5x7vwqbuAjlQWnt89z\nfNxjY9uiUqlQFjJ/9Ok/55ErF/nyV17FcWo89fgznN48jTebc+fOHabjMZcfvsytWyPW19e5c/se\nly5ukxcxr7z8CqfPbC8UoKSYSrVFXmScjAa8eeMGw+GYhjNlNplSCAnFMCnjjJ2jBElA1VIphGA2\nd8nzjDgDyvyvXJ4pBXGaoimCvABZLlGknDQtUGceFCApEiES5AvPA8/NkGUQYU6agaYoSKIkKQTz\nUY5lGWRZgRcVuH7Jal1CKiErfYq6jqLGdK8d4OQSh70TAmeZU6fX8XYPObi6x0ZLYnsDlFTBPTjC\nbmfk8leoGk3srSYFNtsbp7l27y1UpY3tGOSFjiYtc9jzqDWihQ5ivoKQZWaxQqm2MFWJKPTI0iFR\nccKwextNKene3cFNC+IMti9eoFJtk2dt+v0hmWLRqp/CHcywbQtJl9FlnSCc8eYbV7l4/gKbqy1e\n+MvnaTQayEWELhfkGdy6c5/O+iZlCb39XR59/DGmobRAuhYamlHnrauvkCPY3DrNYfeIc+cvkyYF\nulFF03SKcsj5+iqTMCWOQ1RN0N48zXQ+48Uv/DkPXbpM1aqjt5cxDTjcP6DVrHPvzptohs7qyjru\ndIZUSsiyzMH+DucfegbPdRn0D1CFxGCyaK2qqopTq3N8fEzodtnaXOPgcA/DsHBMuHVzF/PseerV\nCteuv4aiqjj1Gv1+l6rjcPvGW+iGiWYanD13gVu373L+4jnSKAWRsb9/zPbWNook0z85YToZce7c\nubcbjl8fhKjTa83y13/h46RJTlnCpUeucNw7Ynt7mxdfeYskSRgcD2m3m4v+tWFQFoIo9Hnxi89z\n4cJFRjOPU9un8X2XpaU23e4JAoNOu8bBYQ8hBI8++iiHe31WOwsRzRv3XmI29el3E9a3OtQaOoah\nYkptqpUGfhyg6RZpFnFwdJeaY3H+4hU+8B0/QUZBoYCZq4RlRi6XnNFlUilHKkBRHpCy8pKilIji\nglICXZYoyxJZgChL0gd/vyqgFDJ5WSALCUWAoMQyBbKmEroRpQSdlkO35xElJUhgG4vWm2Wr6OQ0\nHQWhwTse26DqzFg5fZYolrHtGv40xfUnhNGcJI5pOhH1SkEyLiizgrMXmhh2RLd7QjmV6bQvoJh1\ntM4mmRSSB2OE0SAKC6JwjzJZyPKnaUoU1eisPYykSszmQxy9iSTneHFGrdPCyCUm7pBGZ+FPUIr6\nQq05D9E0mWvXbvLQlUcQQmFtdYMoyznqdRlOTvjWb/pmbt+8xt7+fSqWjePUECisbm5QrVeZzWbM\n5iM6nQ7/6g/+kKrlUGsvce7cBVbXT9E96dFoLGPbDdzZlJPRPQzDwjA07t/f48nH30mUJqDCVz73\nWc5vbfH888+zfuo0ly5dQjVq3N9/ndu3bvHsU++i3u7QWtrgYOeApZUOf/wHv8VofMxHv+XD2M4K\nd+/f4amn34HsrDM63EeRZsiFxHi+8GCYe+7Cy0PTSEOBbi0QvaZpMzjeAdWmUqkxG/QxdIGUp0iq\nwmAyZWdvl0ceeRRdUegNBhiGwdnz53jjldfY3FgnjGbUG40HKMySssiI4wgpKzn/zT/x747IiiRJ\nvPLKK9SqDcoSvvDyCwghI0sa+4d9Ll++yOHeIWG0Tq3mMJkUjEYTXn/tKs8+9Rivv/5/UvdmMdal\n13ne8017OOfUqXn657HngexutUhKokTa0RRJMQzDF0EGBEqEIMhFEhiCEySIkFhOkKvAiezEkA3E\nQWTDsAPLkRxEk02JktikSTZ7YLPn4Z//mqvOsPf+plysXdUMgiCtu+YBGo2/u/6qU2d/31rvet93\nrfUyNx9/ktFoxGzacPfOLsvLawSvcHbA1Ss3cM7x0Ue3GI9WyaqkGtUMhkv81E//PO+88za3bt1i\nNDxPaBMXrixR1gtcWzzP22++xTOPXeWRm6u89PWv8r//w/+NZx9f4p339vHRcm5VszeB/RbaFIkJ\n2QSV5PKnLAadrEFhSDmRUwathFHOYBT4DDlnQsgYFcGJe+2kjaguEiIsFZbJZIZzjhA9kcyk6/dU\nqIyfJVwRUZ3iZDZltGiZzma0reHkaEI9cAwXNKPxNu3JDirW7O50JLWPcYq9mWdzyaLKBWZE7j98\niOc+dQPj9cfFKjvzrG1dp1U1axcvUNYVk507bF87R7W4wd7+Lpsbl1kajGjmt7lYD9jbe0AxGtMd\naXZ2T+jCnI3tgrIeYah57fVvMZke0IUD1tcucffeh7iq5pEbl3lh8UlCN8foRF05rl+9xmuvvcbe\n/jE+em7cuMbXvvIVvvPqy2xtbfCZZ5+lm3eczBve+u7rLC6MCO0+Dx6eMKgXKGzNuc119g8mrJ2/\nxP7hjN/5nd/jueeeY2Fc82M/8ec5PtjjhR/5PG1oOJruc/PcRZS5zO79e5ADPk05OX5AMQJTR37u\nF/5VHjz8iP3DfZZWLrO1eY47d+6xvuEIoeHoeJeV8QoLCyJrqgz3797jwoULWOe4c/cWq6urGCMo\n4oPbH7EwWpZuSpN57bXXuXPvDj/5sz/DhQuXOTk54e6dWwyGC/zRH/0hh4eHEMF3gdFQgmRRV+QM\n05ms0ivKT37VPxVI4fql9fzf/fJflr0LSpEo0CqzsDDEd3PmU6hHlhACMXkZLjKfYdQIVEdVlExm\nc4bDIVpb2nnChxZjk2RloyhsSQyKpdVFckycnJzgfUvlKhmt5ScopWR7VG6pyiFdF5jNJnRdy+Vr\nV3n9u2+yurrJG69+k2bWcffOLu/f3cMUQ+7vTJl7caGBBAIZwabwMaERggWQ3QBEdM4MjEIbMEqJ\nGzFkmgAJGUWmMiQgAqZ/VAkJPEZ9/OeUFIXJaG1wtuDLzxrW1kqefuHHeeFzz/PmG9/he995mXaq\nOXp4lx/+TAlUkGse7j4gm8BzP/wMo9WbLK9ss7Z6mbaRhq/pyYydvV021jYxKA6ne4yXR8zCIa4q\naWaKze1rdF3D8dEudZHZe3jEaFAwOZzh/S5dSJzsJ7LWzJuOwYLlwoVLBJ945+0PWRyPaZoGrSK/\n+Zu/yS/9e/8Bb7zxJldvXGU0HLK5ucnB3i7tfM68m/P+Bx9hbcHmxjY7O7vs7e0ymUz47PPP0TQN\nG+urvPrK66TccePGDb721dcZL9Zsro/50z/9Ux598gZ3HtxhPB5x8cI1Uko89tijVNWQ5dV1Pnjv\nTdq5p+0St2+9z+OPPM3bb7/LZ194nlu3v8fh4SHjxUXatuXqzccxyuK9lyXGoaWqHPMTIYkHw4pv\nf+s1ZtHz3LPP8vprL3P16lU++ug22xvbrG2uMZ1OyVmxvz/jsy8+w/7+Qw527nN0dARBc+/uXa5e\nvcqtj+4wHi9y/tI6+/uHbG5sc+v2h1y+JHzDB+/foq5r1jc3uHvnIZcvX2NjY53j2T4XP//v/+Ag\nhRgjTdfhiopMZHGwynQ6YT6fQ1JUVYlGQcr4poUYKGyBKTJlMWByInMRY/Q07TFFMUabgqJKHOyf\nsLw0wvsWbQqOj3ZIKQFQ1Y7ZyQEPHt6B7BgvrHBycowtWkK7S1E5nCmwrmRvb4/Cluzu7HP1ymPM\nmyPmzYRbu1bGdVeKnBUxZWJ/kVPMYDJWiTqhE8SsyDnRaUWOGasUpIQiUzolewqypwmJnIXM0QZC\nhKA0Kct7N1oChQPov66JQMwoP8coQ2wVm+eWefedtzAqUbiak06CZ0gZrTxOT9gciy1858MP+N73\n9ki5ZGvrBmvbF6irgqIQw9DhyUMMjnKgaY4nzOZTUC3j0TLN/kPuPniAn0+ZTe7z4G6L0ZGyGHDS\nfY9aP4KrO1ZXl/Exsn84o6wPKVzF0vKIslDcvXeH5cUVnnnmSba2ttjZ2ePO7dtYa2XEWFHwxuuv\nSoficMz33nyb6aRh8/wF1rY2mUyljRmj2T84pihqtra32NnZ4Qs/+jn2D+6ztjZiPF7CNx0Xti9x\n9+595kuRhw93mRw2NN6zff48OUy5cuUa8+aEr/3JS4TWUriK3/6t3+Li9iZN49lcXuDatZvsH52w\nt3tE23reeeMdlpcXSVmS18bGBt99/XUee/RJ/sVX/5g/+cM/4v333uJob5+Lly9xfPiQFOecP3+e\nb377W6RQ8qd/uM/161e5/cEHWONYHI954onHKMtaTHA58+G7HxJj5nD3kKLU/Mkfv0RZlljteO2V\n13niqcSFC1d4eP8BD+/f5+Bw5xPfx09FUHBOJtcopYgx8s//r/+TpukYjUYsLy+ytLzK2sZ5xuMV\num4km5WUAiNSzvr6OkdHxwzqZTo/ZVCv4rsEKjIsVzFGE0JAG6hqRdvJ32uagDUDts5dJsaIb1q2\nzhtiEijvQ8t8foKzQ1JUbJ9bFvdYNyUluPHoZ/hXmjkhdrz33nv81j/9XUpXkGMkJ/ldslYy7TkI\n4RjIKAVdMPiUiUoRs5CPWhti9FQGagOFFZ4hJUE8MSeqSpbbdPNAjIAFZy3GKHT2pJwwM/jJP3eT\nnf0p/niHWXPC4e4++3t77O3t8MildUp2ickQfcKkQoLsYMyVSy8ym0bWlreZT+Yc7u9iDRhd9JN/\nHMaUxE50WKUUrpySkRZ4Y2fcvXebmzd/GGMVqYPDt99k4Zzj/oP3IMjexJuPPsbdu7c5mh5y7eoF\nXv726ywvbfHsMz/GytouX3npNbzv+OjWPhfOb/PH33yLjdU1qsXrJGXRaJTz6HKZ3/39l7hybZM7\nt97j3PktDg+OeXBvh+XlRd56u+TFH/oCb7z3Gg8fHPDuW0eE5hbZL3L98iVyVpw7f5X5zLNztEfX\nZlbXYGPjOn/zb/86qysbPP388zz3hSfZ2XnIUz/0I9y6c5cFs8BXv/V7/NjCj7KyvE49HnPh/CV2\n9yaMhmM++ugWzfEE30VGwzGvvPIaV65c4fhwjy996c9hrGV1Y5N2fsIrr7zCH/zBv+DRRx+FMOGb\nf/oy/+yf/FOeeuopjo8P+fCj93jxxRf58KNbPPro4ywsLNC2LXv7O1y+dJXdvSMWli8wqGv2H96l\n85mF0SqvfecVjI0M64LQNJ/4Pn4qgoJS4Erfw3hP52FpeY2UEnfu7jCddDTec+P6I9R1Tdd1GGOI\n/Q7G6fSEonCkLBOVm2Ymsw1dwhWaGDKDwYCUO7puTlHItielFHv7h2i9QtNIkxJATImyLBgMK8rS\nYG3FwmiJpp328/SczNRzFcPBAtpkLlzY4tvfeJ179+9SF5acEkplcs7oEmyh6EJmaDQ5J6qQ0NbS\nxkRKmqhE8gpKgobKoDSkFKVM0KCNJnYJbTKVVWSTmbdADjjrIEPKMF6BbjalMo63330LrVtKu4BO\niXENo8qBNRwfJpwtGZUz3CBTuMDigmw+SkZRVTXWaWJsxYthLAmFSgnjHFlHtFZ0ocGWBXVpqesF\n9g8GzNtAezLDxEzMhv3DGePRSGZAzGTR6sJ4QDyIvPbaayQVcaXh1u13WVxe56nlK8TU8rnPv8Dh\n4T4ba2s8fHgfa6TxazgaceHKk6SUePzJv8TmxgIn/fDWlApIGt80TKb7aJO4fPEqLz7/48y+1HL3\no+/yB7/zfzAcjbl85SYn0zk+g3WOtdVzfOtb3+Oznx3wi//Of8hkMuVv/I1f5xsvvUtZWS5fOcdr\nr7/NyeSIzfUl3n37n3H1+k3eeustvvAjnyPnzOXLl1lfX6eNhxxNpxweHnLvwQPuH+7zuRee5+Dg\ngPnxEcrVFIXh6rXHWF3fZmt9g7v3X+X5zz8O+QliCrx46Qnuf/A4k8mEK1eu0DSyHzKripXVdU7m\nJ2ydX2XnaE5ZL3D9kUtoE3j1tW9y8eJllhZHfOvbX+OF57/wye/jp4FTuHFlM/+Pv/KLZ9nce02M\nEa0yk5NGPOV1wvsAWbO1tYWxipwcOSkm00MWFxdpmo5Mi7MlOSuU7iico5lFcs4450hJy0oynUlR\noJjKAJoYvbRHK4v3YosGjbZwMjnCOsnY85nn8PAQYxQrKyvCCRiNNSWHh4f8xm/8Qw52D7FaEzpx\n5Z3yCypHNFJi5Cz8QkqZro1UVXnGRaQUQBeErkFlqApL13XkBFXl6JJsNjLKEJN8NjFrmpBYMvCl\nL11nZXuJRy8+jhsN+PD9d+nm9xlWcLT3AMIJb7/ZYo3h6mXD9oUhg6HhwdEYV1/AlpfItmJYV4TQ\nsbKyxqxtcNrQtnPKQSnPIGdOjjqqqmPnzgPmbcvm9jlCKhmN4GRnn8WVoZDGH7yESiXXbjzOcHmD\neeOZTjo2liy37z+gDYat9Qt0vqGbBUBjS03OhuAjKysrfPe1V9ne3gQlNuiyHDCZNYzqEb7tcIWg\nyKIeSZ1uGqqq4mCnYfvcGsZZ9o/uM6zXWVgYcHQyw5UV0+kJw1FF20zY3Nhmb3cGqiWEQDVwzJsZ\nZVmSQsF0LoNzlscy6h1gZWWFpp0xb72UZ6E3rDmH92K0m0+POD4+Zmm8iHMlJ5MZw/GY0MpZi75j\nMpuTk6MsS7z3XL58ke++8QoxZJomsHV+S2aKXlrn/fffZXd3l7nPHB/J5qoLW+vcvnOLtm1ZXFpl\nY2OV8cKQw4Mp//pf/fWWVT9bAAAgAElEQVRPxCl8KoLCzSvb+df+638LAK0tSmmMEWgqmb9FK4s2\nolSklNBGNv3Mpg3GKrTOsuXZKhQFwcuBFcQgGTflDqMrQgjkHDCmIOdAJqLiEGUmZyWMMSUpQkwt\nWYGzZW9D9hhdM583dF1LUWpyVmgla7tkOErCGcvx8Yy/+7f/FybTAwpnyHQYAb6orIk9Y9g1LXU1\nJqQpxkBOkWEFbQN1XRJzIJJJPmFNH6iMRytFiooue1AGkxwmJhZqz5ef2yZsDAmsQ3Y0vmOe5jTd\njOVBxZuvv8dsPmFj6xy7+zMunNvm0uVzLI7Wca7k+GhGNbKsr5wnR8ve4T10rlhcHci2a69IHk6O\nH7K9dYXoP2Q6n2DsmIcnnpXROj7sYWwgUePbgoXFjsXxMlU1pGtnhAjLC0NOXv99TJ0pB9sMHv0p\napuwRpFMic4QsyGR+/PRD4/xkJOcB+8j1kVy9AwGI95+57ucP3eFrBRl5Tg82aW2FUU1kHV82lCW\nJTEl2nlHzhJItD5NDALNchbpyFiwRtYH7tzfwxQZ6wzaBKw1+CbgfYMi0cwjPoijNXlZDS/fN9JO\nJ7LRGmmBTjlSj8dMJg3D4ZD5fEpps2wY8/2cTx+IwaPQgO7PbsS6mkFdgjL4bLD1gK5LjEYDtIa2\nm/PB+7fJWQx3S+NlfvqX/tsfHKJRabBOY7TD+4izJagkyEGLGcRaCLGj6zJtkyjLkt2dXTY21mVI\nSdOyt3vAm2+8gfcBawp+/MtfJOU5XScHyVjpsTfGknOBUlAUpUD8XNCGjNIKQknXZEDj3BBUgmzo\nfCCEyKyb4L0n5ySQOoJSmaI0FGZA085QhaWsC375P/8rfPeVb/P1r73Evfu3KbQidGJimrWeFKAo\nIKUJziWGVQnJYFDoIpBCizUKazRJKaxVlNaera+32hEY0KVICBG8ZntcYosdzHTCwsIRs6lmGjL7\nD1eotQdjOHf5HM10ia9/7V/y+c9vUroZ8+NDaA8JsUK7NUJOHOzJVq5qOCKFlslELqcyCasHLKys\nM0+GZ7/wH/H6q3/MZP8+FzYds3DI0srTHB3cYalYZHy+oI2JkDuCqdGhY+Qik90jkttgZdEyPQoo\nU9ClKcFrkg6yHo+I6pOEBAGPVglb9L4Osjwjkzk4PmDr3EWyzkBi3nZU5UimXnUepR3aGHb39lDK\nUFUDgpdy1HuPNuCsQ6GoqgFd1xGY0CmZrbl+YUCKIvuc9hgNBgqtl+QcqZIYPT52mMKdBS3QGJvw\nnWhQpz+vNIq1JEklJkEd3/j6N7ly5QobGxsyQdppcobZbEpRWpTKaFWQU2I+a5g0EaMTly5ucnh4\nTE4ehefi5RXmswnOWQ52/1/tR/+fr09FUIBM1yiKwpJzZHd3l7v3bnPp0kWGw1r6AyLkpHDOojXk\nnFhbXySmOTloZrM5xmhyKlheWuXRR29CFGuxTxmlND1xj1IJWfiqzuBf8g3GOSDinMXajDEWrRUx\neTJgneVgf0pdL1KWBccnh8IIG0PTdBQ9f2FNgVKSjY4ne1x/5DqPP/4o77//Hv/oH/x9Ys6EDIVR\nJAUGRe0s2IzWHdYoNBpbKIwqSNngfaSsHc4YSmeYTyGFQLIQ0wyVQfvE0mhIClNKWxCCZ/bwGK3h\n5mCRbvE2JxPD/bsDkt1gtKb41/6Nn6DxD9nfO6Q9OKYsPVlHltdqUlpEWYNyiv2jhsXFRUKKGGfw\nsWNza4PllUWWL3+Wwbnn+dkXn2Wy/zYv/fZvoNohOU056e4ySbtc9TepS4dOM5S36IHG6gzJsvL0\nE4TZMdrvo1WFsxZdZLKClAKFLsnEM+SnlEFph/dQFBUYmbPpk8dWBq0tRWFpuxmFqjHGYZSmtJq2\n9UCiLgfUw0oI3FqcoFrLGvmUEjlBVglXWnInXJMzjradY53uSzw5UNkqklLy7LVBmUypDCp6cgqY\npIjRo3ViWFnaxkNMlMagDBSFI3hBvkklPvfFF84QceWA5CArxislMTfkHDHGELxiXI9ZMgWFLXnn\nnff48IP7xBhlJ2tuKQrL4uIiTn/yXZKfivLhkavb+W/96r/L/v5er/WuSs2fBH6Jkans59N5BoMa\nlEynda4gxkxRFMxnLdZpMrL4NMaIpkBhBAqqhNElSnfEPMMoedgpt0wmc7QqyTkQvMBVZ0sODx/Q\nzDtS0v3Pc7RzgbEL4yGuSH1g0VhTS2Ai9iO2E/P5HGs1zhmabo5zNaWruP3eO/zjf/D3GA40Xdsw\nsJouKpKSpamgyMkQfUtOskTWVRlFolCG5C2gUSmjCo0nYAaa4OdcGGauPXGZspuSN17k4f4HDOfv\ns2haklYU5z7DN19u+NwPXeDg8B43n3yavbtvY08S+5OMLiq0HfLRQUvpCmI7w1UGrQzWGRYGy8ym\nhygtXZ2N11TVGinNWF6suXnzJuVQo7PlzvtvsLWi6SYPyUf7LK3OqN05UiGTrg9OCtriMo0fMm0y\nTz33GLoraPMcpS0xZhyng7vkGcYYUebj55pUh1FOYL/y+HkUzVYlNCVZdaAM1jqCT/K/dEKlSBcT\n1rk+ECRhvfuX95G6rsmpo23npASuNMS2E9SiHCmB1oms+/IjyHg9QM6oypAdkESyjh5jFCEkrC3Q\nGUDOUF2XfXKRFXcxelIOaJ1JPZoIsaEsC5p5RqMJMWKsxtiM1pmcpfTQWs5hXdfiOO0CT//5v/qD\nwyk8cnUr/09//RdlIKUXokZ4AghBGHKlM12bKMuKELzUVVZ2KQwGA5qmESIoBWKMxJh7M5LFd1Kf\nuQLm847hcAjkXuqDGDLGwt27dxmPxzhXkJPpo3WQg5EzIXZioApyAKwtiDESQqDrmp7sbLC24GD/\niKIocIXp62CDc4oueIwxzKcNiwsjfve3f4u3vvc9Rlbj4wRXQBcSXWPJBHIS+LkwLkiho6oKFBHn\nSspCeh+UDlgbiGlINjMWBpGL157m4e4BG2tzFt0G02lGj2Hv4C71QPP+ezPOra5z9eolbr/zLbbL\nyHrr8bHFWJgZQ2VK7GCAvfo0x3oVmwNGL1O6ltI7YuqwVU1hF9jzAZMThQEb77FW7xBJDIpFusZy\n8HCfNH+d+XFiYWOdpSuX0Lki0JL3Wo6bkmL1GRafehESxBgwxpKiJvqAtRKUv/+5JqTPLhOw1pFj\nIiclpQYJrRUpW2Jq0FkuakpiLHNFP+8wBE7vgFFaJowkaTTDZJGz46z/2ZEQEsO6ovMtRg1QSuOj\nqCkAhVvoVSdNiHOMUXRtoigtyWuK0uCco+s6FJaUogxMyfFjvkxrvG9JWSZ36wzzrpUzZIuzr0sp\nnXFgnZ8DwrHIK3E6LuX065/88i//4HAKKCXtnesrxBhJ0WC0I8ZASlPmTSsXzEkbsvADggasFaVA\nayGGvI9iFNIaa4X5zSScK6iqsg8k8gBzViilcYV4Ac6fP4/3QTwGRAlQI9kZmFJCUeKcoaoz3nc0\n7ZQUBrRty8J4iA8zitLQtQ1LyzKvUeZGykE2OlE7IaxWV1eZtzO+/LM/x8/8wl/m5Ze+yh9+5few\nKjCsLEPniOqEqioYLS5RVZZRUbGze8j5C1fxYcKHd96hKgpy1vgWisIQs8JrjRnCQii58cgXubX7\nPvM8Y9B1FGqTl772Kjoqnru2wpX6Pc49WVLqgnbvCBUj7Sxgs2Kpa7lzNGOlHFOFddwo0k46Ylzn\n2M5BdxQZ9rNiobQUbqGfByFTfpwN5LDD/buHeOvo8pinnv9p3r39dZZSQidFOLzH7L7FXHqUyeKY\nlQDJakpV43XGKgtIBp/NZj33ZAhZanNrLYqarhP7t9KgUKB6V2m/HEgbI1nXnpKIiZQksZxeMB+9\nbICyBqUVSkeyihSmIiawTlErR4qBwWDEfNZKkI4Rrc1Z45vMzFUMSgnahbP4rqUsLUordvf2GI1G\nQBJfiS1EBVMZ1AytM5UVriuGhNKGwlmKQgj0rgnY0lDVBSF2aBsZDEtCSP08Bo/S0r4cQgCVzkqd\nT/L6dASFDKtryzTNDGP0mZqQku4lHY/Rwz4giI4fQybpQIwK54xkTCW1VsajlUEpqKqalGRLTtdm\nMhlFgTGygEORIWtyjsQoH1xRVHRdgzYwnU4oy0rKhsaTEkxPMp2PjMdLzOMc6xQ5R0IH0XTUgwHW\nGkLosC5hoiGliNIRpRIqQowdw6oWckkd8dkfeY5nXnwRTeJbX/8af/j7v8NooWYy65jNHlIWmV2z\nQNd1LK1NyDEyKFZp2mNSjBT1gHqg8L5iZW0JW1Wsjta4fO063ra8efgaR9OG4YLm8194ivbQc29f\nc+/BlEsXTxhpGA8dOi2wvLzBd/ZWON5cxmbDw13LlA+4Waxipm9Rvf51mgXQ0ZBQLJpAXFhn+/pj\nRB0wxQDvM9YNieGENtSs2nOgJrzx7d/j3PUx3nYkbZhWV1HPvcjJTJNOFGFTk2eR4DwOTRc8thQO\nqaoLrB7J3IDQEJMnBjAmorRGGXkOSsvejBwyRQkpK5QO6KzovBiwtMpivkJjezNciJ4UhVRWOgIZ\njSXlLMEFRT4zq4MyDZnUb/lSaG1Ae8iZnISvUsoQYocx8p5iFGTrvcc5QTRWF71Zbo5iiNWWeTNF\nKeG1sgJXFqAUrlJop/s+mtAnQEcKGWuknDFaeK6UA7YQX0fEf+Lr+KkoH25cXs+/9tf+Tcqy7r0K\nLeQSrWTk1mCwgNElWpsz1jbnhHUVIQqKEFZXo00mJ4HrigRKaricpUY3uqDtBA4aYwFBHjlnQooU\ntiKFgDIS7dv2Y1mpmQkCUToSUwtEfIfUbaGlLIZo4/FhjjGO1E+RyFnqVKX6QxY1xnU9zBSUUtc1\nGd0fnMjq2jJ/7+/8Le599A7DskSnTNdByEm6QU9mTE8OSCpibKYsHZtri4xGFbvHh+SB4+KVK/hW\nc/HSBT669Q5VVIS2I/gONdDMJ4d0nWdcDNFpyiPbiWFxTMoWY8foVBKsY5LXCO0KZmAw7BNPjjHd\nnMJJiWeLDUJdMVWGNiYsR1xfcFjTolKLNSX37tynHAYWtq4R4iqvvjNmZWOb5fU1Prj3PhfPX+aD\ntz7ihz7/BdBC4qUsvhQxUEViyJSV65+XPJecFcZY3n3vA9n3qZPYywGthEwOoUPpjFYWMKAMIGWX\noDhDirknGQO6Pw+n56LzXp57/2etEpkoW8q8x9oSeiJUm3SGKpUSCdE5QbNGDdGml8CVyN3Ci+j+\nfCTIJaggSURlwJBpCH3Zeep3sUZMfNYJYvZtd/YehZcoJIH23BpZcenFX/rB4RQevbad/+av/tu9\nPgxaldDXgMpMe5LQ9jsWE20r7kdTSLavqgoVNVpbUK2QT0njbCG1dg/fU9QYXaB7V1zMR6Q0IPoC\nZRrmbUvyAvm1grIse1iY+w5GL+VN7iTbaIn8SllScGjbkKLp91ZKsClK+Z2EMFWE2JBjQmuoqhEh\nShZUSovm7VusyRLcMMzbhvX1dVQ0/De/8itsbi2h0wkxtD1UVGeDRrI+xqgRIXVcevw85y5d5ntv\nvsegHFG4TG4brKtQrqCuVmhnUsdqvOj1HLA+rgCoh54VXVIUlv12ShvHjOo1yjILsRrBJE3btkTj\naP0+ziZUsuKGXFggxswsDdAzi2ot3eIQpxVNmLOxusFsfoxSiZlXvPydN/mpn/w5bt640Ov6Ga1M\nvwnJ97WyIsYO0KSUey4g9yVAxhZSanRdQKGlWxVIKaN6aVsuiVywGD1gRWnIWRBfTCglpYDW+WMZ\nNAbhIxJUZUmMof9ZHcaqHm3mM5VEfCv06DCTUyEcg3YYU8h5zHOUrvqSI5NyxHeBstJn8iSAUbZv\nmBJn7nQ6ZTQa9V6chNIdzXyOc+4sIQIUpUGpKL0znfnEQeHTUT4AKCUTepXCFvREjOb4eM7W1hbe\nSyDIRELwlGUFRlNaR4y5tz7LB1sU9oxwCSGRxS4kRqHQYrKQiD5AM5+yMKxpo6auRwSbSKHDFKYf\nkJJ64jKijejmOtW080BZ1mROsMahnSZl1R+2zGgkysZsfsRg6IixIXrb170BbR2d98R4yn8kctYU\nRQ1kvG8xNmPNgPkskuKE/+qv/yqvv/Yd/tFv/BPGo0TQDWTXE2ZzXFERVEPSHlc2nBzf5Ue/+AwH\nu1PeefMdbNA0s0Mh4pYT9bCmqhdYXLnA5vYGh/cP+Oi9NzHlgK3tG9x5cEA7bakHjjppDkKiay3D\n4ZCsPU07JaiW1HUU9jqtziijSLmlaRw2izMz24CyBhs8WmkqLMoISTuZ7nD3wYQf/txneea5G4SZ\nBARiQFtLSnNiTOQkWj19Q5AEUiVZPonEHGOEFEUxCfQdtRalEiFKJjZkjNKkkHFWJMlE//2yJqaE\nUmKXPw0+Kcm2Lu8bschH8YSUVUFsG3QGhe0z9cdK1ynyUDrThURZadqmOQtwKUVS9GccWIqJuirx\nYdIHI5HIU24ZDEqMkb0mg4EQ6loXgKLtPK4oiTFSFMUZypFzkUhd4M9y1T8VQeH04oGYOqazEwb1\niJQyw+GC2JcTZMTtVZZ173yMpBTJOVKUQm5ZVfSasGI2P+k/pIC1YlbSRtHFgFWWFA2Fk5pP59R/\n6BmtOWOBY/9vkbskaymyOCdVJkeDcDgJrSpiTNLw1O+eEOVCiFBlCozJoHL//SJVNZAsZeUBSk8G\nGOOwVurb6CPaGI4mJ1x75DFuPH6dj959h4VhwWz2kJS0oCvlyVrRNhrFgLIY8+D+PrOpxxZD7t+9\nT1UVjMdD9h4cM14oOMo7zGcdk+MpRRQFzbdTdu6/j7WgVSZMoCtrjHO4cITqfN8OHimddG7pXKJI\npORx5YgcPUFBCgldluSYMLYjIW3rZlpy+96HTCe7OLfJha3zhKZFqQHOGFKIZ/BYqdT7AiLWOFCJ\nnFN/+HWP1hRGWzrfoXVC9w1cpwy9ylnKAqQkkb+X+vPTt7vn0+fOGQoRxGDEqKZ79Np7FOTM9mej\nz/an7wsVUNpJkEjSI5JizztA39Jv5X0Z4ZiU6i3ktSFJfJN+HqvQfSlyeke6rsMaIVaLouiDV+JU\nVTtt2TfWYoyoHZ/09akICigNRvfaaqIwhdSTqod02ZMIfR3WP8So0dqgMBjjyDmcZQ6jC1Bie5YP\nSv7pOpF4BEYmqqqQZa7RUxSOHCOEDjeoZJOR6QlNIlp5UrAoCmKaYUxJ5+dnh0speoLTit3aSObS\n2pKz7eGhJyULCONtbdkrLMJC5xwp3KCvC5uelU5Y6wBNSB3tJPBzf/Ev0bTHgOb2e/f57d/8x8K9\nOItRmi433L91RNvOMYMBs7n0SJx7ZMze7gn77S6PXr9CSi3NrOVof8bB3kdsbt4kFY7CGUylSCFT\nlDWj0Yj9g4ei/5OY+ZZhPZDejtCSoiGqGaPRiLaNkFqUNcQY2J3exzWwMFzkww8/YrQw4PBwn7c/\neIfsFcNRyZe+9GWWlpc5PJ5QO1EMTnsH2tZjtDtjz9tOpDdjoNcPBUGQ6NoGMP3l9j3rnvvSTQK6\nMZoUPyYMFeJfEHlazkfKHaHvdBXEIGjUGEVKnpg6XGn75CNkJepUSlTkpHulrOt5CCQoJIfC9gmh\nPzPR0nlFzhbTS6DRZ0FLgNKK6EuSUn1gksA1m82pa01VDog5M5vOODk5IqaWuq5YXFwkJ0hdQhvF\nn4Um+FQEBYXq9VbfS4uamL8vgidFjBlrNers0plejZBMnVJGayGWlDJC6p3pvgpI/d+z+Bg/vsgZ\nco60baJ0BcPhkGkrphOJ/ApyIqQsBzGCVtJ/oPpDKTVwABQY8RWcZoKun8Z8GrROD/fpYTn9s0R3\nzqJ8UdoeeqqeiBL4fFoLV5V4M5589gZPPPGf8bu/+wd866Wv0HVTYooc7O3iY8H1Jx7DFRXGOHZ3\nDhkOh3RN4M79e1y/fpWDgwN29x/gDOzsHbK8ssZgXLF7mFhd3aRIknWXl9YJscOamt3dh5wczxgM\nSyaTE+rBIsO6JsQGVyhCTFhlcYVle3ODBw/v8MEHH3BweJ979wMZT2HWZCBOqzl3foumm0uLZ1KE\nJIGmjY1k1zKekbWSvTMpGbngKZ71Fpx+5uIpOe1h6DOoyuSUPpbtAmgjiSOfln0pSnBAhtXElAUN\nqHQ2McuaAtMno9SfMUEs8n201iTSWcfuKZKR2cXq+0qeTFYZVO9kTFnUBHX6/eRrRfMQYjTnTEyC\nOhcWFs9K5Jwzq6urDIfDXtYMzGZTqrIEPvYpfNLXpyIooOgNHRGVVL8gNfZ1o8DtopBf7vSDkExb\nEXpfOTn3F12yjEBADdqQc0fbthS2xnc91LSWFORBkaGqCpqmwSVLIlOqAt/4Hv5pIf3mU4qiEsKp\nt5rmnMQL0WeD4DPWOpSRA5WJ+CBkk3XQeYU1rtfJxT+RUsQYS9u2qJ6QzDnhwxytLLk3ZAmHEdBa\nMZ9liqLEx0AXpnzuJz7DY5+5zh995au8+tJ3aGeGZhq4cOGYckFTFRU3rn2WxYUxh0e7WKuZTGYs\nrm7RNB2HBxMO9g+5//Ae4+UFLl68wutvfJ2T4yndLLOxsU30nvHigOnsmIxcOG1AqQWMHnEy2cWH\nBoPm+GB6ZhhbWV1CK8d0NkHnhA8N9dIy49GQ0DlO9g+pRwskFcVpmA1Ky08QTkAuWgwfZ7tTL4mY\nj9KZizDGQAj0HgSx9jZN2yeMPujqhHXy/js/72Xs/+depNOEkHPGkM6kP60TwZt+jHtCaYCi9wRE\njAVjIzkHUi5I0fcBQVxTZwEhaxQFvXdVpPGkyLYhJYNWRX+OHTE3GO1AZ5FAFcSgMCkTsqCRtpuS\nSShOFZl+c7kxhOTp3+gnen0qgkJKCd9EsTJnuQCn0V0pGXOee3ZXJJ4+sqZTmSahc+6zQcBY27sh\no8hFyeAKQ+hmZ7ZoUkmKEo01InNWRUkiY9C9XbrP6lksqroc9maQQMoalQua+ZSyLBHhJGFMIdZm\np3qbdolxUsaEYEEdn0V/rTNdK+aSTMBYfcaRGJtkcIpyKCV19SnjHoO0haOg6xTWllhKVpcdf+Ev\n/Dw//wt/keW1RXbvP+B//h/+e0b1MZPpHk9//jzXbzzBxvYKX/nKV2iahsIN2d5cYbiwyOLKhJCO\nIDu6NrGyWrO5MaCqSlJSNI2HNGdheQnfgbLSZKa0Y2m8wXvvHlGXNbaosFdLTAmdDywtVZwcz5mH\nKScHDcPBeTa2H2UymbG2vUbShqaVZ5lsIuUOa6te9u27ZLXpPSvfH4w0ZVn0ZZr5mAcKGedOreJK\nhtBYQZMhtH1tLm5GQRtiR6/rEik/ChIdORUoHcip6Hta4NXvvsZjjz0iXY1tIIaILXJf/0eIBcnL\nCICcujN0GkJ3xvukLCVQzvQlUitIOUViK6WOlMuqRwcyIOX0d0sKIKOsJich0olWAlYSHq1Ulhg1\nSglSKlz5ie/jpyIoKAUpKlLMKGXJyqCUDEtxhTxoZ6t+xoLqoaRGKSkDnDPEJKONjK7wXSL05UJK\niRQCJkvtKOWAwDFXZmL02MIRg8BWeT+KGG2PBAQRKBLeG0xhQXlCADKYsuRk3lK4irKsiMpgqyEh\nNqAtXcqESYftB7cqo4hevAWoCEoeQVaZ0tneQu1IqRMjjukNT1lTliUhfMy9aK1QVOQkcNP7OfO5\nyFWT40OKwvKf/hf/JYSSW7du8b/+/b/LG698HeMann32PPX2FqOFZT64911SgsF4SFEaqqpg/2HL\n4uJ5mrnn/oNjUpwzGIxwzrK3f9zzMlKLj8dD5k0kozk8mZFCS/TSSXrz8cdpphW+hcWFCwxKuHD+\nMj/9Mz9PyoHJ5BjrkBrbGJFscyaSiCmRVewb0/TZJQZplFLKCkIzghAVqe8FEEdsiJ1kSCWcwCky\nO31ZaynLgrZtGQwqIe/sKenYz9/Igj72D/YZLVRcuLjKP/+D3+LcuQs89eyLaK3puthDftu3cxdw\nVj5kUhYPQY4Reo+CMRaFo209MfSIxDhCMBSlBiIxddLGbysUticLhR+zVt63MWKSK5whZSlFmnkH\nGKxVdG34+Nx/wtenIijIS6CYaM4zUg4MRo4Upf88xPasfDhlm3PWYlTJ4j3LOWJLdSYFpRTRfFyz\ngzRXCRTth6JmQ4yaTNOTShadFeH0QeL7UWka3zeaoD0C+iJOG+ygRmnQuiNGsaYa0+voGVyvOFiD\nOPR00Y94LsA0oqTkmpPJjNIV4nYrDJkCH3ofQ1KkNCclj9YOY2N/mC2+C332M1TlmBQ1k5OGurKc\nzOak0LC8scJ//J/8Fbo2U5Y1bXfM8dERe7sH3Hr12zx48ICtzQUWV2Vm4nhxhApzHLA2HjGda3w3\npWvnzE4M1loWFgMrK2PIHX6+x/mtRebzAZNpZOdeoC43GVXXeOapZ7hy9Tx7B4cyKCZnDo/20Zq+\nt0DmZQTfygSt6MnaEEkY1YGScgsgRoVSfB8M79uItXwPtEMrReendH5KWYzP+AcQTiEGcKVIek3T\nimlIybnR2vTKkaKZ7fHyv/wO58+t89hjj7H3cI/jowlP3Hiab3/7ZS5efIKqqsla989fE3yWtndz\nimalDDk+PmBpYQOl5X2TDUbbM/nTWkvnG0yRpAiwFt9KKY3qyFmLCY9TxBB7i79HqYzvIikHVO/Y\nhD6wFeKm/TOsjf10BAUxjoirLHiJuGVZy0LQojqDT5IN49nKuJRERvTeU5TygXnfkoLGODG+nDZO\nycuIqUWDQpMS6BzpukaMHjoTvBdXWlScdVr25iAZ8qJQWqOdISdFCEiA6c32Snlc0asNScoJraSD\nLUTFoB4Rg6gPIWqsAUjCeZQWpS2lKWT7jhL5FBTOZPldnLSAhxDw3sswlKqQ/56DQFFgMBzRNlNS\nqlAEmiYzXl1gUA2YN556sWS8eonljS0uXbnI6uoGxsw43N/ncO+E2x++zdHxMdjMu+/dYmXlCgdH\nd2jbQF2tMIstg/eZO/EAAAz6SURBVHqTe7c6RoNNlpcXefrJG5RFxfb5C6QQmc728UGWwt66fb/n\nfIRsHQyke6/rxHzWdV0vKzpyjqTsUSRiymgjBLOclYAPnZDOuDOfQQwzqbtz2dt/NVVVQRIJ8Kxn\nxmis1cTcEJMiRYcrNCF4SCVKN2iXeHB7j4sXtvnRH/0iTXuAMrCzv4N1itFwlec++yIpdNy+vcv5\nyxeBfBaYfZDdH2SDUoKmFscrdN2sHx4kz7L1x+QknMlkMqMorEwFyw7fLwTxfk7KgeFggO+ArKXD\n0qmzUjpGj7aKwhRnXIiYoxq0EumcNPjE9/FT4Wi8eWUz/9pf+yVAAoTCgepQWlahpdOtc0q0VquH\neH+qLnQoHXpZUvVEpHS+nbaPnh2eJF93xvhHYZ21UcTYSpDoDSinaoX30p12am82RsvD4dTq+nEz\ni9EFysyJyaNVgVKn03tOgwSovqQR2BewWmOtZTQasLN7j0G1DGiy6Ug+YWwSJjwYlPZnn1HoPMb0\nZU8IPUxtKYqSmJPUla6imXcUtiQrhSliL38WpNxgnaE0lRigtNh7FYama6WnwAoyKyqPVVVvGXf4\nLskoOzqquqBpJhweHmOMaOPjoezvcH1LsnzmgXpQ0Dae2WxOWUq3nzHmDAEqpfjGN75BMz/hx7/4\n06DmGF0RpU3h+w587pWaU8kwQba9IW3OfD5nNBoDohQ554hpTvBgTAHKAxGyI+aZEH4aQJP6YC4W\naiO7OWho5x2FG6Bzg7Zynk5OphJ8hoOPz1sWpet0tOBpEDTGoEKB9x1FKd232mrI5ZlrMsZISC1K\nnyIgi1KOGFtkxzPizcmht2zrPlF6tAoo5HcT01M6W3GQs4JUcuWHf/EHx9F42v4pJUHE9AsNYoCQ\nO6wz/RQmGYTRNJ2UGXmO7mcPkI0c1tRRD+wZXEwpkLMRbTqbM796UTpAHJA5yu4Fci0TjrInhNi/\nrwwE6npISsJ7aBNEAlWiSojvPYj2HWsUJX3lIGgj+f7gZZpuKlxHkEMeoxyigwOPs3UvhQZC2zPq\nqZXyIRuSVx8bWOyAEOTwFKU484piTEwBp1N/ILMgCB3RGEiJHCNGBwlaURGSl7LHy9guGVpryE5B\ndqTcETtDxONDoK60XECTOT6e4X2irmoGNRhdAdKdenx8jDWGsnb4FFFR0flISJ5qKMYzyL2CICRi\nDJEXXniBlBTogLEFOlu876Rk1JmcxA6ekvQFfNz+nDBImVDXskGMXjbMOfYbvvtxZrSi4mCkezIF\nlO4lu1TJYJsoqE8GeFbUoyGg0akAncgpoJyiHtSk+LG0mekgW8py2A/1EZJYA6lMoObcv3cPowwb\nm+fpkkcp2/cq9GRqaCTw6Sz7SJPCGCkBfCdyLCbirBXFrBAvhUITUkOOIqkGz5lUaszHjVz/f69P\nRVDIyIMTll2cfqlvVNFaanThGgI5S70VU8dp63hMnphzf6BL6Tbr5zvmLHKl0kr8EGSK0mGtJvR8\ng7WaGNWZA67rPMYqWRZTybJa8cknGXmmYk9WKrIRd51WMnsxqygHBkEQrZeOTVBoI++vbU7JS2jm\nLc7pPsv1xJJRFEUlpZSB6CO+DQwGNUVhewVEzDQose86a/sZBEqmVPVyXcgRldX/3d7ZhNp1VXH8\nt84+H/fl5ZmYJpRQQz+kiMWBPiRNpBZHaiOSOuvIDgQnCjpwEOmkUwUdCCIoFqqIHViLHSj4QcGB\nNLZKmraWtLEWbUia9PN93HvPOfvs5WDte3NfyEuCfe+dK+wfXO65597H+d913l1n77XXWoeyWCBE\nhxK6bhqwU6zHxK6F3bx16d3pXbszycmLdcoK2sZGPEVesL6+Ria78N2IpqnJc8do5AlNzcrKiPXV\nIfsP7KHIK9beWWOJAU07sszSbkCZD8icEkJjGYKZ2BVcMqpBie/GFrCjpW1yQhjHgjaLC41HYxZ2\nVXHpOU7BSkG0IsT6lKqqcOSW4qtjRmPBZZV9W7Eqxqr4AJks0AWrdPRNa6XIwZxp19UUuU1xJBOr\nnsTjuwbNbNhelrbyURa78D7Gm9QyYn3bIc7OZ+5KVlbXWVxcIm+F3/7qCY7cfZQLr17go0fvBjLq\nxrMwGDCuxxBzGQB8NzSnFhOh8mKBaXxAvOWF+NriC77B5TkaHBoKMuenuRptc+OBxhuPPmwrMk0C\nsamCXXXNUQiT+x4A0+HixCFMglVF4ajij72qSvLcWVs1V8ZeDM6Cf/iYmebj38p0iFXk1uugLAfT\nTryAFVyJJZfYMR1FUZE7K5gSsXXuroOm7uh8Rtsqvs3QcDnBpOs6xuOGlZWVeLIczlXTKUgmlpDV\nth3r6yO6mKiTZRlltTDVm7mAaoviqSqLVrtcUW1p29quWEGQGMBSVep6RJbnMzn9lgfRhYaiBB9W\nWVxcjE1iMjIXAMdo1MagXof39fS7AFYDoUrrvS1PSsaePXuo6xFvXXqTejTm7YuXkMaz/u4KEjra\npib4zrocBUU0Y1BWoDGeMy1cykEsMzTLrOeFc46yLGOAraYo3DTuY9+LGFe4nMwUgkXkRSYVjG2c\nh1vHrS6MALsq13Udl8Itwm9L4zodnQHTzt1NU08d6zjeU8GOaT/gzIlVyWoWYyi7EJczWNjL4aOf\n4fCnPk2TX05bVpW4NGkjg85PmgAFxGXWmk48HSO8DuP0qYuJUUoX2mkDoMtZvDrNh0H+30YKiv2A\nZvrwWfVXSzapGFQbPql2dDqOc3ZH29qqgg9KluU0XRd7Jjh8Ay5XG3VkxJiDiznrlugRCOQ5ONmN\nypAs96A5IUyWBlvG9dDWm7MixgSYXlFCN4pzwpzcZRCvGN53CKZp4uCCNpQV7Nu/SCzmIC+ULGvj\n3HDS+AWr9gwdQWMfwW5MJ9ZtuKoK8pgqPRqvg2a4bIC4yVzTo8FRZAOC78izYFOYYL0eQ2f3wxgN\n7QeSZzbvLkrIEVq/ztpqTZ4txo5WHper3akrc+Qup5ABa2vvsbi4m3oYKJaUfQcK1IH3jtNPPcPS\n0hK33XqI4UjogkdW32O4bi3XEas4LYqCxb17Y83LiJv276Xz0DZDW9rD4X3HpFJ1kvfvXMGkWQoC\nF//9Oif/8jRfuP+LrA5XqapYU1IobTu20mVnU5vxuEWkoW6gKBzaCb7rYjbmmKZtrXVbGMeR2IgQ\nBgSUYdNSZSVFUVl3JgLOQdAxVsNjdSxdGOJjPY/3MR8mDFkTx0eWl3mn8Xxs+YhNoIJlTaq01tND\nLV6QZTnej2nCGs5V09hVnhfUtY+5Khm52w0oRbka4xBCpys0tcQUb0XcjWc0zkWgUUQuAevAm31r\nmWE/Sc/1mDdNSc+1uVVVD1zvQ3PhFABE5NkbiYzuFEnP9Zk3TUnP1jAnMYVEIjEvJKeQSCQ2ME9O\n4cd9C7iCpOf6zJumpGcLmJuYQiKRmA/maaSQSCTmgN6dgoh8XkTOiMhZETnRk4bXROR5ETklIs/G\nfftE5A8i8kp8/uA2a3hERC6KyAsz+66qQYwfRJudFpHlHdLzsIici3Y6JSLHZt77dtRzRkQ+tw16\nDonIUyLyDxF5UUS+Eff3aaPNNPVmpy1hkhTSxwNwwD+BO4ASeA64qwcdrwH7r9j3XeBE3D4BfGeb\nNdwLLAMvXE8DcAz4HSDAEeDkDul5GPjWVT57Vzx3FXB7PKdui/UcBJbj9hLwcjxunzbaTFNvdtqK\nR98jhcPAWVV9VVUb4DHgeM+aJhwHHo3bjwL3b+fBVPXPwNs3qOE48DM1ngb2isjBHdCzGceBx1S1\nVtV/AWexc7uVes6r6t/j9irwEnAL/dpoM02bse122gr6dgq3AP+Zef061zbqdqHA70XkbyLy1bjv\nZlU9H7cvADf3oGszDX3a7etxOP7IzJRqR/WIyG3AJ4CTzImNrtAEc2Cn/5W+ncK8cI+qLgP3AV8T\nkXtn31Qb+/W6TDMPGoAfAR8GPg6cB7630wJEZDfwOPBNVV2Zfa8vG11FU+92ej/07RTOAYdmXn8o\n7ttRVPVcfL4IPIEN6d6YDDfj88Wd1nUNDb3YTVXfUNVOrTfaT7g89N0RPWKdRh4HfqGqv467e7XR\n1TT1baf3S99O4RngThG5XURK4AHgyZ0UICKLIrI02QY+C7wQdTwYP/Yg8Jud1BXZTMOTwJdjhP0I\n8N7MEHrbuGJO/iXMThM9D4hIJSK3A3cCf93iYwvwU+AlVf3+zFu92WgzTX3aaUvoO9KJRYlfxiKx\nD/Vw/DuwiPBzwIsTDcBNwJ+AV4A/Avu2WccvsaFmi801v7KZBiyi/sNos+eBT+6Qnp/H453G/sEP\nznz+oajnDHDfNui5B5sanAZOxcexnm20mabe7LQVj5TRmEgkNtD39CGRSMwZySkkEokNJKeQSCQ2\nkJxCIpHYQHIKiURiA8kpJBKJDSSnkEgkNpCcQiKR2MB/AUsvfSqnmTG0AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] }, "output_type": "display_data" } ], "source": [ "from matplotlib.pyplot import imshow\n", "co = repo.checkout()\n", "image_column = co.columns['images']\n", "dataset = make_tensorflow_dataset(image_column)\n", "for image in dataset:\n", " imshow(image[0].numpy())\n", " break" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "FTArZhtZfg7S" }, "source": [ "### New columns\n", "\n", "For our example, we would need two columns. One for the image and another one for captions. Let's wipe our existing repository (`remove_old` argument in `repo.init` does this) and create these columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "ISMdkXtYHg2c", "outputId": "0e18d9d3-1a4f-4f75-d388-c8c0c316f69b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: hangar_repo/.hangar\n" ] } ], "source": [ "repo = Repository(repo_path)\n", "repo.init(user_name=username, user_email=email, remove_old=True)\n", "co = repo.checkout(write=True)\n", "\n", "images_column = co.add_ndarray_column('images', shape=img_shape, dtype=np.uint8)\n", "captions_column = co.add_ndarray_column('captions', shape=(60,), dtype=np.float, variable_shape=True)\n", "co.commit('column init')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "z_fUUIpKCMen" }, "source": [ "### Store image and captions to Hangar repo\n", "Each image will be converted to RGB channels with dtype `uint8`. Each caption will be prepended with `START` token and ended with `END` token before converting them to floats. We have another preprocessing stage for images later.\n", "\n", "We'll start with loading the caption file:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "VlX-su-gCMep" }, "outputs": [], "source": [ "import json\n", "annotation_file = 'annotations/captions_train2014.json'\n", "with open(annotation_file, 'r') as f:\n", " annotations = json.load(f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "UMcYzkWgCMes" }, "outputs": [], "source": [ "import spacy\n", "# if you have installed spacy and the model in the same notebook session, you might need to restart the runtime to get it into the scope\n", "nlp = spacy.load('en_core_web_md')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "wxpbxEvmCMev" }, "outputs": [], "source": [ "def sent2index(sent):\n", " \"\"\"\n", " Convert sentence to an array of indices using SpaCy\n", " \"\"\"\n", " ids = []\n", " doc = nlp(sent)\n", " for token in doc:\n", " if token.has_vector:\n", " id = nlp.vocab.vectors.key2row[token.norm]\n", " else:\n", " id = sent2index('UNK')[0]\n", " ids.append(id)\n", " return ids" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "RIvqFIHUCMey" }, "source": [ "### Save the data to Hangar" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "__I8ntp3CMez", "outputId": "287685d1-2e7c-4d3f-94b7-87db73f966e3" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 414113/414113 [00:03<00:00, 122039.19it/s]\n" ] } ], "source": [ "import os\n", "from tqdm import tqdm\n", "\n", "all_captions = []\n", "all_img_name_vector = []\n", "limit = 100 # if you are not planning to save the whole dataset to Hangar. Zero means whole dataset\n", "\n", "co = repo.checkout(write=True)\n", "images_column = co.columns['images']\n", "captions_column = co.columns['captions']\n", "all_files = set(os.listdir(image_dir))\n", "i = 0\n", "with images_column, captions_column:\n", " for annot in tqdm(annotations['annotations']):\n", " if limit and i > limit:\n", " continue\n", " image_id = annot['image_id']\n", " assumed_image_paths = 'COCO_train2014_' + '%012d.jpg' % (image_id)\n", " if assumed_image_paths not in all_files:\n", " continue\n", " img_path = os.path.join(image_dir, assumed_image_paths)\n", " img = Image.open(img_path)\n", " if img.mode == 'L':\n", " img = img.convert('RGB')\n", " img = img.resize(img_shape[:-1])\n", " img = np.array(img)\n", " cap = sent2index('sos ' + annot['caption'] + ' eos')\n", " cap = np.array(cap, dtype=np.float)\n", " key = images_column.append(img)\n", " captions_column[key] = cap\n", " if i % 1000 == 0 and i != 0:\n", " if co.diff.status() == 'DIRTY':\n", " co.commit(f'Added batch {i}')\n", " i += 1\n", "co.commit('Added full data')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "gXvSa2iCCMe2" }, "source": [ "### Preprocess Images\n", "\n", "Our image captioning network requires a pre-processed input. We use transfer learning for this with a pretrained InceptionV3 network which is available in Keras. But we have a problem. Preprocessing is costly and we don't want to do it all the time. Since Hangar is flexible enough to create multiple columns and let you call the group of column as a `dataset`, it is quite easy to do make a new column for the processed image and we don't have to do the preprocessing online but keep a preprocessed image in the new column in the same repository with the same key. Which means, we have three columns in our repository (all three has different samples with the same name):\n", "- `images`\n", "- `captions`\n", "- `processed_images`\n", "\n", "Although we need only the `processed_images` for the network, we still keep the bare image in the repository in case we need to look into it later or if we decided to do some other preprocessing instead of InceptionV3 (it is always advised to keep the source truth with you).\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "QBGCS_ceCMe2" }, "outputs": [], "source": [ "import tensorflow as tf\n", "tf.compat.v1.enable_eager_execution()\n", "image_model = tf.keras.applications.InceptionV3(include_top=False, weights='imagenet')\n", "new_input = image_model.input\n", "hidden_layer = image_model.layers[-1].output\n", "image_features_extract_model = tf.keras.Model(new_input, hidden_layer)\n", "\n", "\n", "def process_image(img):\n", " img = tf.keras.applications.inception_v3.preprocess_input(img)\n", " img = np.expand_dims(img, axis=0)\n", " img = image_features_extract_model(img)\n", " return tf.reshape(img, (-1, img.shape[3]))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "ANFPvYByCMe5" }, "outputs": [], "source": [ "from hangar import Repository\n", "import numpy as np\n", "\n", "repo_path = 'hangar_repo'\n", "\n", "repo = Repository(repo_path)\n", "co = repo.checkout(write=True)\n", "images = co.columns['images']\n", "sample_name = list(images.keys())[0]\n", "prototype = process_image(images[sample_name]).numpy()\n", "pimages = co.add_ndarray_column('processed_images', prototype=prototype)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "jWN6AxiHCMe7" }, "source": [ "#### Saving the pre-processed images to the new column" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "HdFxmi5ECMe8", "outputId": "38dddea0-64f8-47cf-fc9d-6b14a6140135" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 101/101 [00:11<00:00, 8.44it/s]\n" ] } ], "source": [ "from tqdm import tqdm\n", "\n", "with pimages:\n", " for key in tqdm(images):\n", " pimages[key] = process_image(images[key]).numpy()\n", "\n", "co.commit('processed image saved')\n", "co.close()" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "zacZutpTCMe_" }, "source": [ "### Dataloaders for training\n", "We are using Tensorflow to build the network but how do we load this data from Hangar repository to Tensorflow?\n", "\n", "A naive option would be to run through the samples and load the numpy arrays and pass that to the `sess.run` of Tensorflow. But that would be quite inefficient. Tensorflow uses multiple threads to load the data in memory and its dataloaders can prefetch the data before-hand so that your training loop doesn't get blocked while loading the data. Also, Tensoflow dataloaders brings batching, shuffling, etc. to the table prebuilt. That's cool but how to load data from Hangar to Tensorflow using TF dataset? Well, we have `make_tensorflow_dataset` which accepts the list of columns as a parameter and returns a TF dataset object." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "gcKsE3d4CMfA", "outputId": "a42c5c84-e62f-4178-cc3a-175dac08aa7c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " * Checking out BRANCH: master with current HEAD: 3cbb3fbe7eb0e056ff97e75f41d26303916ef686\n" ] } ], "source": [ "from hangar.dataset import make_tensorflow_dataset\n", "co = repo.checkout() # we don't need write checkout here" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 105 }, "colab_type": "code", "id": "TybRGUGaCMfC", "outputId": "8e75b46d-f8da-4dd3-c607-1174b23a15a0" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(repo_pth=hangar_repo/.hangar, aset_name=processed_images, default_schema_hash=f230548212ab, isVar=False, varMaxShape=(64, 2048), varDtypeNum=11, mode=r)\n", "(repo_pth=hangar_repo/.hangar, aset_name=captions, default_schema_hash=4d60751421d5, isVar=True, varMaxShape=(60,), varDtypeNum=12, mode=r)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/dist-packages/hangar/dataloaders/tfloader.py:88: UserWarning: Dataloaders are experimental in the current release.\n", " warnings.warn(\"Dataloaders are experimental in the current release.\", UserWarning)\n" ] } ], "source": [ "BATCH_SIZE = 1\n", "EPOCHS = 2\n", "embedding_dim = 256\n", "units = 512\n", "vocab_size = len(nlp.vocab.vectors.key2row)\n", "num_steps = 50\n", "\n", "\n", "captions_dset = co.columns['captions']\n", "pimages_dset = co.columns['processed_images']\n", "\n", "dataset = make_tensorflow_dataset([pimages_dset, captions_dset], shuffle=True)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "27mQc673CMfF" }, "source": [ "### Padded Batching\n", "\n", "Batching needs a bit more explanation here since the dataset does not just consist of fixed shaped data. We have two dataset in which one is for captions. As you know captions are sequences which can be variably shaped. So instead of using `dataset.batch` we need to use `dataset.padded_batch` which takes care of padding the tensors with the longest value in each dimension for each batch. This `padded_batch` needs the shape by which the user needs the batch to be padded. Unless you need customization, you can use the shape stored in the `dataset` object by `make_tensorflow_dataset` function." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "8tpHg3w2CMfF", "outputId": "e2145382-c73b-4acf-9076-40ff64554ade" }, "outputs": [ { "data": { "text/plain": [ "(TensorShape([Dimension(64), Dimension(2048)]), TensorShape([Dimension(None)]))" ] }, "execution_count": 9, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "output_shapes = tf.compat.v1.data.get_output_shapes(dataset)\n", "output_shapes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "imMQrtn7CMfI" }, "outputs": [], "source": [ "dataset = dataset.padded_batch(BATCH_SIZE, padded_shapes=output_shapes)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "tY6Z7y8TCMfO" }, "source": [ "### Build the network\n", "\n", "Since we have the dataloaders ready, we can now build the network for image captioning and start training. Rest of this tutorial is a copy of an official Tensorflow tutorial which is available at https://tensorflow.org/beta/tutorials/text/image_captioning. The content of Tensorflow tutorial page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License.\n", "Access date: Aug 20 2019\n", "\n", "\n", "In this example, you extract the features from the lower convolutional layer of InceptionV3 giving us a vector of shape (8, 8, 2048) and quash that to a shape of (64, 2048). We have stored the result of this already to our Hangar repo. This vector is then passed through the CNN Encoder (which consists of a single Fully connected layer). The RNN (here GRU) attends over the image to predict the next word." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "6Kc-yZ0iCMfO" }, "outputs": [], "source": [ "class BahdanauAttention(tf.keras.Model):\n", " def __init__(self, units):\n", " super(BahdanauAttention, self).__init__()\n", " self.W1 = tf.keras.layers.Dense(units)\n", " self.W2 = tf.keras.layers.Dense(units)\n", " self.V = tf.keras.layers.Dense(1)\n", "\n", " def call(self, features, hidden):\n", " # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)\n", " # hidden shape == (batch_size, hidden_size)\n", " # hidden_with_time_axis shape == (batch_size, 1, hidden_size)\n", " hidden_with_time_axis = tf.expand_dims(hidden, 1)\n", " # score shape == (batch_size, 64, hidden_size)\n", " score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))\n", " # attention_weights shape == (batch_size, 64, 1)\n", " # you get 1 at the last axis because you are applying score to self.V\n", " attention_weights = tf.nn.softmax(self.V(score), axis=1)\n", " # context_vector shape after sum == (batch_size, hidden_size)\n", " context_vector = attention_weights * features\n", " context_vector = tf.reduce_sum(context_vector, axis=1)\n", "\n", " return context_vector, attention_weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "up0nVnIZO2_c" }, "outputs": [], "source": [ "class CNN_Encoder(tf.keras.Model):\n", " # Since you have already extracted the features and dumped it using pickle\n", " # This encoder passes those features through a Fully connected layer\n", " def __init__(self, embedding_dim):\n", " super(CNN_Encoder, self).__init__()\n", " # shape after fc == (batch_size, 64, embedding_dim)\n", " self.fc = tf.keras.layers.Dense(embedding_dim)\n", "\n", " def call(self, x):\n", " x = self.fc(x)\n", " x = tf.nn.relu(x)\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "4qAEbanRO77k" }, "outputs": [], "source": [ "class RNN_Decoder(tf.keras.Model):\n", " def __init__(self, embedding_dim, units, vocab_size):\n", " super(RNN_Decoder, self).__init__()\n", " self.units = units\n", " self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n", " self.gru = tf.keras.layers.GRU(self.units,\n", " return_sequences=True,\n", " return_state=True,\n", " recurrent_initializer='glorot_uniform')\n", " self.fc1 = tf.keras.layers.Dense(self.units)\n", " self.fc2 = tf.keras.layers.Dense(vocab_size)\n", " self.attention = BahdanauAttention(self.units)\n", "\n", " def call(self, x, features, hidden):\n", " # defining attention as a separate model\n", " context_vector, attention_weights = self.attention(features, hidden)\n", " # x shape after passing through embedding == (batch_size, 1, embedding_dim)\n", " x = self.embedding(x)\n", " # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)\n", " x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)\n", " # passing the concatenated vector to the GRU\n", " output, state = self.gru(x)\n", " # shape == (batch_size, max_length, hidden_size)\n", " x = self.fc1(output)\n", " # x shape == (batch_size * max_length, hidden_size)\n", " x = tf.reshape(x, (-1, x.shape[2]))\n", " # output shape == (batch_size * max_length, vocab)\n", " x = self.fc2(x)\n", " return x, state, attention_weights\n", "\n", " def reset_state(self, batch_size):\n", " return tf.zeros((batch_size, self.units))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "9ZlfcS5VO_yA" }, "outputs": [], "source": [ "def loss_function(real, pred):\n", " mask = tf.math.logical_not(tf.math.equal(real, 0))\n", " loss_ = loss_object(real, pred)\n", " mask = tf.cast(mask, dtype=loss_.dtype)\n", " loss_ *= mask\n", " return tf.reduce_mean(loss_)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "s5kEPFlZCMfR" }, "outputs": [], "source": [ "@tf.function\n", "def train_step(img_tensor, target):\n", " loss = 0\n", " # initializing the hidden state for each batch\n", " # because the captions are not related from image to image\n", " hidden = decoder.reset_state(batch_size=target.shape[0])\n", " # TODO: do this dynamically: '' == 2\n", " dec_input = tf.expand_dims([2] * BATCH_SIZE, 1)\n", "\n", " with tf.GradientTape() as tape:\n", " features = encoder(img_tensor)\n", " for i in range(1, target.shape[1]):\n", " # passing the features through the decoder\n", " predictions, hidden, _ = decoder(dec_input, features, hidden)\n", " loss += loss_function(target[:, i], predictions)\n", " # using teacher forcing\n", " dec_input = tf.expand_dims(target[:, i], 1)\n", " total_loss = (loss / int(target.shape[1]))\n", " trainable_variables = encoder.trainable_variables + decoder.trainable_variables\n", "\n", " gradients = tape.gradient(loss, trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, trainable_variables))\n", " return loss, total_loss" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "cQeg3v4KCMfU" }, "outputs": [], "source": [ "encoder = CNN_Encoder(embedding_dim)\n", "decoder = RNN_Decoder(embedding_dim, units, vocab_size)\n", "optimizer = tf.keras.optimizers.Adam()\n", "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "pyYHHBHVCMfW" }, "source": [ "### Training\n", "\n", "Here we consume the dataset we have made before by looping over it. The dataset returns the image tensor and target tensor (captions) which we will pass to `train_step` for training the network.\n", "\n", "The encoder output, hidden state (initialized to 0) and the decoder input (which is the start token) is passed to the decoder. The decoder returns the predictions and the decoder hidden state. The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss. Use teacher forcing to decide the next input to the decoder. Teacher forcing is the technique where the target word is passed as the next input to the decoder. The final step is to calculate the gradients and apply it to the optimizer and backpropagate." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": {}, "colab_type": "code", "id": "l4gg61xSCMfX" }, "outputs": [], "source": [ "import time\n", "\n", "loss_plot = []\n", "\n", "for epoch in range(0, EPOCHS):\n", " start = time.time()\n", " total_loss = 0\n", " for (batch, (img_tensor, target)) in enumerate(dataset):\n", " batch_loss, t_loss = train_step(img_tensor, target)\n", " total_loss += t_loss\n", " if batch % 1 == 0:\n", " print('Epoch {} Batch {} Loss {:.4f}'.format(\n", " epoch + 1, batch, batch_loss.numpy() / int(target.shape[1])))\n", " # storing the epoch and loss value to plot later\n", " loss_plot.append(total_loss / num_steps)\n", "\n", " print('Epoch {} Loss {:.6f}'.format(epoch + 1,\n", " total_loss / num_steps))\n", " print('Time taken for 1 epoch {} sec\\n'.format(time.time() - start))\n" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "J7JPiJjtCMfb" }, "source": [ "#### Visualize the loss" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 295 }, "colab_type": "code", "id": "M0icezYgCMfd", "outputId": "5c2bf016-120c-4ca9-f7d2-cef69eb216a0" }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAApA0lEQVR4nO3deXxddZ3/8dcna9s0\ne9ItS9OWlm5AWkoXqmXHwjgUxRE6iIJLdRQZnRGXn7+Z8YfOKDqDIwqjqIigwyIoorIItOy0NHSj\ne9M9oW3SpG3ahjbb5/fHPY3XkK1tbk6S+34+HvfRe8/53ns+Pb3Ju+f7Ped7zN0REREBSAi7ABER\n6TsUCiIi0kqhICIirRQKIiLSSqEgIiKtFAoiItJKoSASEjO70Mwqwq5DJJpCQeKCme0ws0tD2O6N\nZtZsZkfMrM7MVpnZ+0/hc+4zs2/FokaRaAoFkdh73d2HAlnAz4FHzCw73JJE2qdQkLhnZp8ys3Iz\nqzWzJ8xsVLDczOz7ZlYV/C//LTObGqy70szWm9lhM6s0sy91tR13bwHuBQYD49qpY5KZvWBmB81s\nnZldFSxfBFwPfDk44vhDD/71Rf6KQkHimpldDHwb+DAwEtgJPBSsvhyYB0wAMoM2NcG6nwOfdvd0\nYCqwuBvbSgI+CRwBtrRZlwz8AfgzMAz4PPBrMzvT3e8Bfg18192HuvvfnvJfWKQLCgWJd9cD97r7\nCnc/DnwNmGNmJUAjkA5MBMzdN7j7nuB9jcBkM8tw9wPuvqKTbcw2s4PAXmAh8AF3P9S2DTAU+I67\nN7j7YuCPQXuRXqNQkHg3isjRAQDufoTI0UBB8Iv5R8BdQJWZ3WNmGUHTa4ArgZ1m9qKZzelkG0vd\nPcvd89x9trs/10Edu4MuphN2AgWn/lcTOXkKBYl3bwOjT7wwszQgF6gEcPc73f1cYDKRbqRbg+XL\n3X0Bka6ex4FHeqCOIjOL/pksPlEHoOmMpVcoFCSeJJvZoKhHEvAgcJOZlZpZKvAfwDJ332Fm55nZ\nrKC//yhwDGgxsxQzu97MMt29EagDWjrcavcsA+qJDCYnm9mFwN/yl/GNfcDY09yGSJcUChJPngTe\niXp8I+jK+RfgMWAPkbOCrgvaZwA/BQ4Q6cqpAb4XrLsB2GFmdcBniIxNnDJ3byASAlcA+4G7gY+6\n+8agyc+JjGEcNLPHT2dbIp0x3WRHRERO0JGCiIi0UiiIiEgrhYKIiLRSKIiISKuksAs4WXl5eV5S\nUhJ2GSIi/cqbb765393zu2oXs1Aws3uB9wNV7j61nfUTgV8A04Gvu/t/dudzS0pKKCsr69FaRUQG\nOjPb2XWr2HYf3QfM72R9LXAL0K0wEBGR2ItZKLj7S0R+8Xe0vsrdlxOZWExERPoADTSLiEirfhEK\nZrbIzMrMrKy6ujrsckREBqx+EQrufo+7z3D3Gfn5XQ6ei4jIKeoXoSAiIr0jlqekPghcCOSZWQXw\nb0AygLv/2MxGAGVEZqJsMbMvAJPdvS5WNYmISOdiFgru3ultBN19L1AYq+23tWnvYR59czdfvGwC\nQ1L63TV7IiK9Im66jyoO1PPTl7eztlIHIiIiHYmbUCgtygJg1e4D4RYiItKHxU0o5A5NpShnMKt2\nHwy7FBGRPituQgGgtCibVbsOhl2GiEifFWehkMXbh46xr+5Y2KWIiPRJcRcKACt1tCAi0q64CoUp\nozJITjSNK4iIdCCuQmFQciKTRmboDCQRkQ7EVShApAvprYpDNLd42KWIiPQ5cRcK04qzONrQzJaq\nw2GXIiLS58RdKJQWZQPo1FQRkXbEXSiU5A4ha0iyBptFRNoRd6FgZpxTmKVQEBFpR9yFAkQGmzfv\nO8zR401hlyIi0qfEZygUZ9HisKbiUNiliIj0KfEZCoVZAOpCEhFpIy5DITsthZLcIbqITUSkjbgM\nBYiMK6zcdRB3XcQmInJCXIdC1eHj7DmkGVNFRE6I31AoDi5i07iCiEirmIWCmd1rZlVmtraD9WZm\nd5pZuZmtMbPpsaqlPZNGppOSmKBQEBGJEssjhfuA+Z2svwIYHzwWAf8Tw1reJTUpkcmjMjTdhYhI\nlJiFgru/BNR20mQBcL9HLAWyzGxkrOppT2lRFm9VHqKpuaU3Nysi0meFOaZQAOyOel0RLHsXM1tk\nZmVmVlZdXd1jBUwrzuKdxmY27dOMqSIi0E8Gmt39Hnef4e4z8vPze+xzpxVpsFlEJFqYoVAJFEW9\nLgyW9ZqinMHkpKVoXEFEJBBmKDwBfDQ4C2k2cMjd9/RmAWZGaZFmTBUROSEpVh9sZg8CFwJ5ZlYB\n/BuQDODuPwaeBK4EyoF64KZY1dKZ0qIslmyq4vCxRtIHJYdRgohInxGzUHD3hV2sd+Bzsdp+d5UW\nZeHBjKlzz8gLuxwRkVD1i4HmWDqnKAvQYLOICCgUyByczNj8NFbu0oypIiJxHwpA62CzZkwVkXin\nUACmFWWx/0gDFQfeCbsUEZFQKRSAUl3EJiICKBQAmDgyndQkzZgqIqJQAJITE5hakKlQEJG4p1AI\nlBZlsbbyEI2aMVVE4phCITCtOIvjTS1s3KMZU0UkfikUAqWtF7HpegURiV8KhUBB1mDyhqayUuMK\nIhLHFAoBzZgqIqJQ+CvTirPYVn2UQ/WNYZciIhIKhUKUE+MKqysOhlqHiEhYFApRzi7MxAxW6k5s\nIhKnFApR0gclc0b+UJ2BJCJxS6HQhmZMFZF4plBoo7Q4iwP1jeyqrQ+7FBGRXqdQaKNUd2ITkTgW\n01Aws/lmtsnMys3sq+2sH21mz5vZGjN7wcwKY1lPd5w5PJ3ByYkabBaRuBSzUDCzROAu4ApgMrDQ\nzCa3afafwP3ufjZwG/DtWNXTXUmJCZylGVNFJE7F8khhJlDu7tvcvQF4CFjQps1kYHHwfEk760NR\nWpzF+rfrON7UHHYpIiK9KpahUADsjnpdESyLthr4YPD8A0C6meW2/SAzW2RmZWZWVl1dHZNio00r\nyqKhuYUNmjFVROJM2APNXwIuMLOVwAVAJfCu/567+z3uPsPdZ+Tn58e8qNLiLABW7dL1CiISX5Ji\n+NmVQFHU68JgWSt3f5vgSMHMhgLXuPvBGNbULSMzBzM8I1XjCiISd2J5pLAcGG9mY8wsBbgOeCK6\ngZnlmdmJGr4G3BvDek6KZkwVkXgUs1Bw9ybgZuAZYAPwiLuvM7PbzOyqoNmFwCYz2wwMB/49VvWc\nrNKibHbU1FN7tCHsUkREek0su49w9yeBJ9ss+9eo548Cj8ayhlPVOmPq7oNcNHFYuMWIiPSSsAea\n+6yzCzNJMHQnNhGJKwqFDqSlJjFheLrGFUQkrigUOlFalMVqzZgqInFEodCJ0qIsDr3TyPb9R8Mu\nRUSkVygUOtF6EZu6kEQkTigUOjF+WDppKYkKBRGJGwqFTiQmGGcVasZUEYkfCoUuTCvOZsOeOo41\nasZUERn4FApdKC3KorHZWfd2XdiliIjEnEKhC9N0e04RiSMKhS4MyxjEqMxBCgURiQsKhW4oLc5i\npe6tICJxQKHQDaVFWVQceIf9R46HXYqISEwpFLqhtCgbgFW7DoZbiIhIjCkUuuGsgkwSE0zjCiIy\n4CkUumFwSiJnasZUEYkDCoVuKi2OzJja0qIZU0Vk4FIodNO0oiwOH29i6faasEsREYkZhUI3XXHW\nSIpzhnDrb9Zw6J3GsMsREYmJmIaCmc03s01mVm5mX21nfbGZLTGzlWa2xsyujGU9p2NoahI/uK6U\nfXXH+Prv3tKNd0RkQIpZKJhZInAXcAUwGVhoZpPbNPu/wCPuPg24Drg7VvX0hGnF2Xzxsgn8cc0e\nHltRGXY5IiI9LpZHCjOBcnff5u4NwEPAgjZtHMgInmcCb8ewnh7xmQvGMXtsDv/6+7Xs0B3ZRGSA\niWUoFAC7o15XBMuifQP4iJlVAE8Cn2/vg8xskZmVmVlZdXV1LGrttsQE4/vXlpKcmMAtD62koakl\n1HpERHpS2APNC4H73L0QuBJ4wMzeVZO73+PuM9x9Rn5+fq8X2dbIzMHcfs1ZrKk4xB3Pbg67HBGR\nHhPLUKgEiqJeFwbLon0CeATA3V8HBgF5Maypx8yfOpKFM4v5yUtbebV8f9jliIj0iFiGwnJgvJmN\nMbMUIgPJT7Rpswu4BMDMJhEJhXD7h07Cv7x/EmPz0vinR1ZRe7Qh7HJERE5bzELB3ZuAm4FngA1E\nzjJaZ2a3mdlVQbN/Bj5lZquBB4EbvR+d6zkkJYkfXDeNA0cb+cpja3Saqoj0e9bffpHNmDHDy8rK\nwi7jr/zs5W18608b+NbVU/nI7NFhlyMi8i5m9qa7z+iqXdgDzQPCx+eO4YIJ+Xzzj+vZvO9w2OWI\niJwyhUIPSEgw/vPvziF9UBK3PLiSY43NYZckInJKFAo9JD89le996Bw27j3Md57aGHY5IiKnRKHQ\ngy6aOIyb5pZw32s7WLxxX9jliIicNIVCD/vK/IlMHJHOrb9ZQ9XhY2GXIyJyUhQKPWxQciI/XDiN\nI8eb+OdHVuumPCLSr3QrFMxsnJmlBs8vNLNbzCwrppX1Y+OHp/Mv75/My1v2c++r28MuR0Sk27p7\npPAY0GxmZwD3EJm+4n9jVtUAcP2sYi6bPJzbn97I2spDYZcjItIt3Q2FluAK5Q8AP3T3W4GRsSur\n/zMzbr/mbHLSUrjloZXUNzSFXZKISJe6GwqNZrYQ+Bjwx2BZcmxKGjhy0lL4/odL2b7/KN/84/qw\nyxER6VJ3Q+EmYA7w7+6+3czGAA/ErqyB4/wz8vjMBeN48I3dPPXWnrDLERHpVFJ3Grn7euAWADPL\nBtLd/fZYFjaQ/NNlE3itfD9feWwNo3PTmDwqo+s3iYiEoLtnH71gZhlmlgOsAH5qZnfEtrSBIzkx\ngTsXTmNIShIf+vFrPLteF7aJSN/U3e6jTHevAz4I3O/us4BLY1fWwDM6N43f3zyXM4YNZdEDZfzk\nxa2aaltE+pzuhkKSmY0EPsxfBprlJA3PGMTDi+Zw5dSRfPupjXz50TW6x7OI9CndGlMAbiNys5xX\n3X25mY0FtsSurIFrcErkiudx+WncubicnbX1/Pgj55KTlhJ2aSIiuslOmH6/qpJbH13DiIxB3Hvj\nDM4Ylh52SSIyQPXoTXbMrNDMfmdmVcHjMTMrPP0y49uC0gIe/NRs6hua+MDdr/HS5n5ze2oRGaC6\nO6bwC+AJYFTw+EOwTE7TuaOzefxzcynIGsxN9y3nl6/tCLskEYlj3Q2FfHf/hbs3BY/7gPwY1hVX\nCrOH8Og/nM+FE/L5tyfW8S+Pr6WpWQPQItL7uhsKNWb2ETNLDB4fAWq6epOZzTezTWZWbmZfbWf9\n981sVfDYbGYHT7L+AWNoahL3fHQGi+aN5YGlO7npvuUceqcx7LJEJM50NxQ+TuR01L3AHuBDwI2d\nvcHMEoG7gCuAycBCM5sc3cbdv+jupe5eCvwQ+O3JFD/QJCYY/+fKSdx+zVm8vrWGD979Kjv2Hw27\nLBGJI90KBXff6e5XuXu+uw9z96uBa7p420yg3N23uXsD8BCwoJP2C4EHu1PPQHftecX86pOzqDna\nwNV3v8rSbV0elImI9IjTufPaP3WxvgDYHfW6Ilj2LmY2GhgDLO5g/SIzKzOzsurq+DhDZ/bYXB7/\n7Fxy01K44efLeGT57q7fJCJymk4nFKzHqoDrgEfdvbm9le5+j7vPcPcZ+fnxM75dkpfGbz87l9lj\nc/nyY2v4jyc3aABaRGLqdEKhq6veKoncoe2EwmBZe65DXUftyhyczC9uPI8bZo/mnpe28YG7X2P9\n23VhlyUiA1SnoWBmh82srp3HYSLXK3RmOTDezMaYWQqRX/xPtLONiUA28Pop/h0GvKTEBL559VTu\nvn46ew69w1U/eoX/+vMmjje1e2AlInLKOg0Fd09394x2Hunu3um8ScHtO28mMmfSBuARd19nZreZ\n2VVRTa8DHvL+Nt9GCK48ayTPfvECriodxQ8Xl/P+O19hxa4DYZclIgOI5j7qp5ZsquLrv32LPXXH\n+PjcMfzz5RMYktLd+Q1FJN706NxH0vdcdOYwnvniPK6fVczPX9nO/P9+mde27g+7LBHp5xQK/Vj6\noGS+dfVZPLxoNgkGf//TZXztt29Rd0xXQovIqVEoDACzxuby9Bfm8el5Y3l4+S4uv+Mlnt+gW36K\nyMlTKAwQg5IT+dqVk/jdZ+eSNSSZT/yyjH98aCW1RxvCLk1E+hGFwgBzTlEWT9z8Hr5w6XiefGsP\nl97xIk+sflv3gxaRblEoDEApSQl84dIJ/PHz76UoezC3PLiST93/JvvqjoVdmoj0cQqFAezMEen8\n9rNz+fqVk3h5SzWX3vEiP3lxK8caddGbiLRPoTDAJSYYn5o3lme+MI9zR2fz7ac2cvF/vsBvynbT\n3KIuJRH5awqFOFGSl8Z9N83kwU/NJj9jELc+uoYrfvASz63fp/EGEWmlUIgzc8bl8vhnz+fu66fT\n2Ox88v4yPvyT13lzZ23YpYlIH6BQiENmxpVnjeTPX5zHt66eyvb99VzzP6+z6P4yyqsOh12eiIRI\ncx8JR483ce8r2/nJS9uob2jiwzOK+MKlExiROSjs0kSkh3R37iOFgrSqOXKcHy0p51dLd5KYYHx8\n7hg+fcE4Mgcnh12aiJwmhYKcsl019dzx7CYeX/U2WUOSufmiM/jI7NEMSk4MuzQROUUKBTltaysP\ncfvTG3l5y34KsgbzxcsmcHXpKJISNRQl0t9o6mw5bVMLMnngE7P41SdmkZOWwpd+s5pL73iRh5fv\noqFJ94oWGYh0pCDd0tLi/Hn9Xn60pJy1lXWMyhzEonljuW5msbqVRPoBdR9JTLg7L2yu5q7F5ZTt\nPEDe0BQ++d6xfGT2aIam6s5vIn2VQkFiyt1Ztr2Wu5aU8/KW/WQOTubG80u4aW4JWUNSwi5PRNro\nE2MKZjbfzDaZWbmZfbWDNh82s/Vmts7M/jeW9UjPMTNmj83lgU/M4vefm8vMMTn84PktzP3OYr79\n5AaqDmtGVpH+KGZHCmaWCGwGLgMqgOXAQndfH9VmPPAIcLG7HzCzYe5e1dnn6kih79q4t467l2zl\nj2veJjkxgevOK2LRBeMoyBocdmkica8vHCnMBMrdfZu7NwAPAQvatPkUcJe7HwDoKhCkb5s4IoM7\nF07j+X++kAWlo/j1sl1c8N0lfPnR1WzffzTs8kSkG2IZCgXA7qjXFcGyaBOACWb2qpktNbP57X2Q\nmS0yszIzK6uuro5RudJTxuSl8d0PncOLX76I62cV8/tVb3PJf73A5x9cyardBzUrq0gfFvbpIknA\neOBCoBB4yczOcveD0Y3c/R7gHoh0H/VyjXKKCrIG8/8WTOXmi8fzs1e28eulu/jD6rc5qyCTG+aM\n5qpzRul0VpE+JpZHCpVAUdTrwmBZtArgCXdvdPftRMYgxsewJglBfnoqX7tiEkv/zyV8c8EUjjc1\n8+VH1zDrP57n3/+0nh3qWhLpM2I50JxE5Jf8JUTCYDnw9+6+LqrNfCKDzx8zszxgJVDq7jUdfa4G\nmvu/E6ezPrB0J8+s3UtTi3PBhHxumD2aiyYOIzHBwi5RZMDp7kBzzLqP3L3JzG4GngESgXvdfZ2Z\n3QaUufsTwbrLzWw90Azc2lkgyMBw4nTW2WNz2Vd3jIfe2M3/vrGTT95fRkHWYK6fXcy1M4rIHZoa\ndqkicUcXr0mf0NjcwnPr93H/6zt5fVsNKYkJ/M3ZI7lhzmimFWVhpqMHkdOhK5ql39qy7zC/WrqT\nx1ZUcuR4E1NGZfDROaO56pwCBqdoYFrkVCgUpN87cryJx1dW8sDrO9m07zAZg5K4eloBH55RxNSC\nzLDLE+lXFAoyYLg7y3cciAxMr9tLQ1MLU0ZlcO15RSw4p4DMIboznEhXFAoyIB2sb+D3q97m4eW7\nWb+njtSkBOZPHcG1M4qYPTaXBJ25JNIuhYIMeGsrD/Hw8t08vqqSw8eaKM4Zwt+dW8iHZhQyMlPz\nLYlEUyhI3DjW2MzTa/fy8PLdvL6thgSDeRPyuXZGEZdMGk5Kkm4wKKJQkLi0s+Yovymr4NE3K9hb\nd4zctBQ+MK2Aa88rYvzw9LDLEwmNQkHiWnOL89Lmah5evpvnNuyjqcWZVpzFh84t5P1njyJzsAan\nJb4oFEQC+48c5/GVlTy8fDdbqo6QkpTAZZOGc825Bcwbn09SorqXZOBTKIi04e6srazjsRUV/H5V\nJQfqG8kbmsrVpaP44PRCJo/KCLtEkZhRKIh0oqGphRc2VfHYigoWb6yisdmZNDKDa6YXsKC0gPx0\nzbskA4tCQaSbao828IfVb/PbFRWsrjhEYoJxwYR8rpleyCWThumeDzIgKBRETsGWfYd5bEUlj6+s\nZG/dMTIGJfH+c0ZxzfRCphdrYj7pvxQKIqehucV5bet+HnuzgqfX7eVYYwtj8tL4m7NGMn/qCKaM\nylBASL+iUBDpIUeON/HkW3v43YpKlm2vocWhKGcw86eMYP7UEUwrytb0GtLnKRREYqDmyHGe27CP\np9bu5dXy/TQ2O8MzUnnflBHMnzKCmWNydIqr9EkKBZEYqzvWyOINVTy1dg8vbq7mWGMLOWkpXDZp\nOPOnjuD8M3JJTdIgtfQNCgWRXlTf0MSLm6p5et1ent9QxZHjTaSnJnHxpGFcMXUEF0wYphsESagU\nCiIhOd7UzGvlNTy1dg/Prt/HgfpGBiUncOGEYVw6eTgXnplPnu4/Lb2su6GQFOMi5gM/ABKBn7n7\nd9qsvxH4HlAZLPqRu/8sljWJxFpqUiIXTRzGRROH0dTcwhvba3lq7V6eWbeXp9ftxQzOLszikonD\nuHjiMJ3JJH1KzI4UzCwR2AxcBlQAy4GF7r4+qs2NwAx3v7m7n6sjBemv3J11b9exeGMVz2+sYk3F\nQdxheEYqF50ZCYi5Z+SRlhrT/6tJnOoLRwozgXJ33xYU9BCwAFjf6btEBigzY2pBJlMLMrnlkvFU\nHz7OC5uqWLKpij+t2cNDy3eTkpjArLE5wVHEcIpzh4RdtsSZWB4pfAiY7+6fDF7fAMyKPioIjhS+\nDVQTOar4orvvbuezFgGLAIqLi8/duXNnTGoWCUtDUwtlO2pZvLGKxRur2Lb/KABnDBvKxROHcdGZ\nw5hRkk2yTneVUxT6QHM3QyEXOOLux83s08C17n5xZ5+r7iOJBzv2H20NiGXba2hsdtIHJTFrTC6z\nx+Ywa0wuk0dlkKiL5qSb+kL3USVQFPW6kL8MKAPg7jVRL38GfDeG9Yj0GyV5aXz8PWP4+HvGcOR4\nE69sqeaFTdUs3VbDcxv2AZCemsSMkmxmjc1l1pgcphZk6khCTlssQ2E5MN7MxhAJg+uAv49uYGYj\n3X1P8PIqYEMM6xHpl4amJjF/6kjmTx0JwN5Dx1i2vYZl22tZtq2GJZuqARiSksi5o7OZHYTE2YVZ\nuj+1nLSYhYK7N5nZzcAzRE5Jvdfd15nZbUCZuz8B3GJmVwFNQC1wY6zqERkoRmQOYkFp5L4PANWH\nj/PG9lqWbqth2fYavvfMJgAGJScwvTibWWNymTU2h9KiLE0DLl3SxWsiA0zt0Qbe2F7D0m21LNte\ny8a9dbhDSlIC55VkM298PvMm5DNxRLquj4gjoQ80x4pCQeTkHKpv5I0dkSOJl7dUs3nfEQDy01N5\n7/g8LpiQz3vOyCNXV1kPaAoFEWnX3kPHeGlLNS9truaV8v0crG/EDKaOyuS94/OYNyGf6cXZGo8Y\nYBQKItKl5hbnrcpDvLy5mpe2VLNi10GaW5y0lETmjMtl3oR85o3PpyQvLexS5TQpFETkpNUda+T1\nrTW8FITE7tp3ACjOGcJ7x+fx3vF5zB6bS9aQlJArlZOlUBCR0+Lu7Kip5+Wgq+m1rTXUNzS3djWd\nf0Yuc8flcV5JjqYF7wcUCiLSoxqaWlhdcZBXy/fzWnkNK3cfoLHZSUlMYFpxFnPPyGPuGbmcXZil\ni+j6IIWCiMRUfUMTb2yv5bWtNbxavp/1eyKnvg5NTWLmmBzOH5fL3DPydOprH9EXprkQkQFsSEoS\nF545jAvPHAbAgaMNvL4tEhCvba1h8cYqAPKGpjBnXB5zx+Vy0cRhDM8YFGbZ0gUdKYhITFQefCfo\natrPq1trqD58HIBpxVnMnzKC900ZobOaepG6j0Skz3B3Nu87wrPrI3efW1tZB8DEEelcPmUE86eM\nYNJIdTPFkkJBRPqs3bX1/Hn9Pp5Zt5flO2pxh6KcwcyfMoL5U0cwrSibBE0L3qMUCiLSL+w/cpzn\n1u/j6XV7ebV8P43NTn56KpdPHs77poxg9thcXV3dAxQKItLv1B1rZMnGKp5Zt5cXNlVT39BMxqAk\nLpk0nPdNGc6ccXlkDk4Ou8x+SaEgIv3ascZmXt6yn2fW7eW5Dfta52iaMCyd6aOzmTE6mxkl2RTn\nDNFYRDcoFERkwGhqbmH5jgMs31HLmzsPsGLXAQ4fawIgb2gq547O4tzR2Zw7OoepBRmkJukK67Z0\nnYKIDBhJiQnMGZfLnHG5ALS0OJurDvPmzgO8ueMAb+46wDPrIrcpTUlK4OyCzCAkspk+Ops8TQve\nbTpSEJEBoerwMVbsPMibOyNHE2sr62hobgFgTF4a04uzec/4XC6YMIyctPib0E/dRyIS1441NrO2\n8hBlOw9Ejih2HqD2aANmUFqUxcVnDuOiicOYMiojLsYkFAoiIlFagntHLN5YxQubqlhdcQiAYemp\nXHTmMC6amM97xuczNHVg9qr3iVAws/nAD4BE4Gfu/p0O2l0DPAqc5+6d/sZXKIhIT6g+fJwXN1ez\nZGMVL22u5vDxJpITjZljcoKQGMbYvLQBcxQReiiYWSKwGbgMqACWAwvdfX2bdunAn4AU4GaFgoj0\ntsbmFt7ceYAlG6tYsqmq9T7Wo3OHtAbErDE5DEruv2c19YWzj2YC5e6+LSjoIWABsL5Nu28CtwO3\nxrAWEZEOJScmMHtsLrPH5vK1Kyexu7aeFzZVsWRTNQ++sYv7XtvB4OREZpRkc15JDjPH5FBalNWv\nQ6IjsQyFAmB31OsKYFZ0AzObDhS5+5/MTKEgIn1CUc4QbphTwg1zSjjW2Mzr22p4YWMVy7bX8v3n\nNuMOyYnG2YVZnFeSw3kl2cwYnUPmkP5/tXVoIypmlgDcAdzYjbaLgEUAxcXFsS1MRCTKoOTESBdS\ncN+IQ/WNlO2s5Y0dtSzfXsvPX9nGj190zODM4emRkBiTw8ySHEZk9r97R8RyTGEO8A13f1/w+msA\n7v7t4HUmsBU4ErxlBFALXNXZuILGFESkL3mnoZlVuw+yfEdt6xXX9Q3NABTnDGFGSTYzg6AYk5sW\n2uyvfWGgOYnIQPMlQCWRgea/d/d1HbR/AfiSBppFpD9ram5h/Z463theGwRF5PoIgNSkBIpzhjA6\nN42S3CGMzo08H507hIKswSTF8N7WoQ80u3uTmd0MPEPklNR73X2dmd0GlLn7E7HatohIWJISEzi7\nMIuzC7P45HvH4u5srT5C2Y4DbK0+ws6aenbW1PNKeTXHGlv+8r4EozB7MMWtgZHG6JwhlOQNoTB7\nSK8NauviNRGRELg7VYePs2P/UXbW1rOz5ig7aurZVVPPjpqjrRP+AZjByIxB3DR3DJ+aN/aUthf6\nkYKIiHTMzBieMYjhGYOYNTb3r9a5OwfrG9lRc7T1yGJnzVGGZcR+Yj+FgohIH2NmZKelkJ2WwrTi\n7F7dtu5xJyIirRQKIiLSSqEgIiKtFAoiItJKoSAiIq0UCiIi0kqhICIirRQKIiLSqt9Nc2Fm1cDO\nU3x7HrC/B8vpaX29Puj7Naq+06P6Tk9frm+0u+d31ajfhcLpMLOy7sz9EZa+Xh/0/RpV3+lRfaen\nr9fXHeo+EhGRVgoFERFpFW+hcE/YBXShr9cHfb9G1Xd6VN/p6ev1dSmuxhRERKRz8XakICIinVAo\niIhIqwEZCmY238w2mVm5mX21nfWpZvZwsH6ZmZX0Ym1FZrbEzNab2Toz+8d22lxoZofMbFXw+Nfe\nqi/Y/g4zeyvY9rvufWoRdwb7b42ZTe/F2s6M2i+rzKzOzL7Qpk2v7z8zu9fMqsxsbdSyHDN71sy2\nBH+2e7cUM/tY0GaLmX2sF+v7npltDP4Nf2dmWR28t9PvQwzr+4aZVUb9O17ZwXs7/XmPYX0PR9W2\nw8xWdfDemO+/HuXuA+oBJAJbgbFACrAamNymzWeBHwfPrwMe7sX6RgLTg+fpwOZ26rsQ+GOI+3AH\nkNfJ+iuBpwADZgPLQvy33kvkopxQ9x8wD5gOrI1a9l3gq8HzrwK3t/O+HGBb8Gd28Dy7l+q7HEgK\nnt/eXn3d+T7EsL5vAF/qxneg05/3WNXXZv1/Af8a1v7rycdAPFKYCZS7+zZ3bwAeAha0abMA+GXw\n/FHgEjOz3ijO3fe4+4rg+WFgA1DQG9vuQQuA+z1iKZBlZiNDqOMSYKu7n+oV7j3G3V8Catssjv6e\n/RK4up23vg941t1r3f0A8Cwwvzfqc/c/u/uJu8MvBQp7ervd1cH+647u/Lyfts7qC353fBh4sKe3\nG4aBGAoFwO6o1xW8+5dua5vgh+IQkEsvC7qtpgHL2lk9x8xWm9lTZjaldyvDgT+b2Ztmtqid9d3Z\nx73hOjr+QQxz/50w3N33BM/3AsPbadNX9uXHiRz9taer70Ms3Rx0b93bQfdbX9h/7wX2ufuWDtaH\nuf9O2kAMhX7BzIYCjwFfcPe6NqtXEOkSOQf4IfB4L5f3HnefDlwBfM7M5vXy9rtkZinAVcBv2lkd\n9v57F4/0I/TJ87/N7OtAE/DrDpqE9X34H2AcUArsIdJF0xctpPOjhD7/8xRtIIZCJVAU9bowWNZu\nGzNLAjKBml6pLrLNZCKB8Gt3/23b9e5e5+5HgudPAslmltdb9bl7ZfBnFfA7Iofo0bqzj2PtCmCF\nu+9ruyLs/Rdl34luteDPqnbahLovzexG4P3A9UFwvUs3vg8x4e773L3Z3VuAn3aw3bD3XxLwQeDh\njtqEtf9O1UAMheXAeDMbE/xv8jrgiTZtngBOnOXxIWBxRz8QPS3of/w5sMHd7+igzYgTYxxmNpPI\nv1OvhJaZpZlZ+onnRAYj17Zp9gTw0eAspNnAoahukt7S4f/Owtx/bUR/zz4G/L6dNs8Al5tZdtA9\ncnmwLObMbD7wZeAqd6/voE13vg+xqi96nOoDHWy3Oz/vsXQpsNHdK9pbGeb+O2Vhj3TH4kHk7JjN\nRM5K+Hqw7DYiX36AQUS6HcqBN4CxvVjbe4h0I6wBVgWPK4HPAJ8J2twMrCNyJsVS4PxerG9ssN3V\nQQ0n9l90fQbcFezft4AZvfzvm0bkl3xm1LJQ9x+RgNoDNBLp1/4EkXGq54EtwHNATtB2BvCzqPd+\nPPgulgM39WJ95UT64098D0+ckTcKeLKz70Mv1fdA8P1aQ+QX/ci29QWv3/Xz3hv1BcvvO/G9i2rb\n6/uvJx+a5kJERFoNxO4jERE5RQoFERFppVAQEZFWCgUREWmlUBARkVYKBZGAmTXbX8/A2mMzbppZ\nSfQMmyJ9VVLYBYj0Ie+4e2nYRYiESUcKIl0I5sP/bjAn/htmdkawvMTMFgcTtj1vZsXB8uHB/QlW\nB4/zg49KNLOfWuQ+Gn82s8FB+1sscn+NNWb2UEh/TRFAoSASbXCb7qNro9YdcvezgB8B/x0s+yHw\nS3c/m8hkcncGy+8EXvTIhHzTiVzJCjAeuMvdpwAHgWuC5V8FpgWf85nY/NVEukdXNIsEzOyIuw9t\nZ/kO4GJ33xZMZrjX3XPNbD+RqRcag+V73D3PzKqBQnc/HvUZJUTumzA+eP0VINndv2VmTwNHiMzm\n+rgHk/mJhEFHCiLd4x08PxnHo54385cxvb8hMpfUdGB5MPOmSCgUCiLdc23Un68Hz18jMisnwPXA\ny8Hz54F/ADCzRDPL7OhDzSwBKHL3JcBXiEzj/q6jFZHeov+RiPzF4DY3X3/a3U+clpptZmuI/G9/\nYbDs88AvzOxWoBq4KVj+j8A9ZvYJIkcE/0Bkhs32JAK/CoLDgDvd/WAP/X1ETprGFES6EIwpzHD3\n/WHXIhJr6j4SEZFWOlIQEZFWOlIQEZFWCgUREWmlUBARkVYKBRERaaVQEBGRVv8f850UGBZWDxQA\nAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "execution_count": 23, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "import matplotlib.pyplot as plt\n", "# Below loss curve is not the actual loss image we have got\n", "# while training and kept it here only as a reference\n", "plt.plot(loss_plot)\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Loss')\n", "plt.title('Loss Plot')\n", "plt.show()" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "dataloaders.ipynb", "provenance": [], "toc_visible": true, "version": "0.3.2" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/Tutorial-QuickStart.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Quick Start Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A simple step-by-step guide that will quickly get you started with Hangar basics, including initializing a repository, adding and committing data to a repository." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Installation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can install Hangar via `pip`:\n", "\n", "```\n", "$ pip install hangar\n", "```\n", "\n", "or via `conda`:\n", "\n", "```\n", "$ conda install -c conda-forge hangar\n", "```\n", "\n", "Please refer to the [Installation](installation.rst) page for more information." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Quick Start for the Impatient" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The only import statement you'll ever need:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from hangar import Repository" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create and initialize a new Hangar `Repository` at the given path:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: /Volumes/Archivio/tensorwerk/hangar/quick-start/.hangar\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "//anaconda/envs/hangar-tutorial/lib/python3.8/site-packages/hangar/context.py:92: UserWarning: No repository exists at /Volumes/Archivio/tensorwerk/hangar/quick-start/.hangar, please use `repo.init()` method\n", " warnings.warn(msg, UserWarning)\n" ] }, { "data": { "text/plain": [ "'/Volumes/Archivio/tensorwerk/hangar/quick-start/.hangar'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "!mkdir /Volumes/Archivio/tensorwerk/hangar/quick-start\n", "\n", "repo = Repository(path=\"/Volumes/Archivio/tensorwerk/hangar/quick-start\")\n", "\n", "repo.init(\n", " user_name=\"Alessia Marcolini\", user_email=\"alessia@tensorwerk.com\", remove_old=True\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Checkout the `Repository` in write mode:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar WriterCheckout \n", " Writer : True \n", " Base Branch : master \n", " Num Columns : 0\n" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout = repo.checkout(write=True)\n", "master_checkout" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Inspect the `columns` we have (we just started, none so far):" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 0 \n", " Column Names / Partial Remote References: \n", " - " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Prepare some random data to play with:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.17961852, 0.31945355],\n", " [0.10929027, 0.2681622 ],\n", " [0.29397449, 0.02659856]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "dummy = np.random.rand(3,2)\n", "dummy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a new column named `dummy_column`:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : float64 \n", " Shape : (3, 2) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col = master_checkout.add_ndarray_column(name=\"dummy_column\", prototype=dummy)\n", "dummy_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add data to `dummy_column`, treating it as a normal Python dictionary:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "dummy_col[0] = dummy" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "dummy_col[1] = np.random.rand(3,2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Commit your changes providing a message:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=c104ef7e2cfe87318e78addd6033028488050cea'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.commit(\"Add dummy_column with 2 samples\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Add more data and commit again:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : float64 \n", " Shape : (3, 2) \n", " Number of Samples : 3 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col[2] = np.random.rand(3,2)\n", "dummy_col" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=099557d48edebb7607fa3ec648eafa2a1af5e652'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.commit(\"Add one more sample to dummy_column\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "See the master branch history:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=099557d48edebb7607fa3ec648eafa2a1af5e652 (\u001B[1;31mmaster\u001B[m) : Add one more sample to dummy_column\n", "* a=c104ef7e2cfe87318e78addd6033028488050cea : Add dummy_column with 2 samples\n" ] } ], "source": [ "master_checkout.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Close the write-enabled checkout:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "master_checkout.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Inspect the status of the `Repository`:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary of Contents Contained in Data Repository \n", " \n", "================== \n", "| Repository Info \n", "|----------------- \n", "| Base Directory: /Volumes/Archivio/tensorwerk/hangar/quick-start \n", "| Disk Usage: 237.53 kB \n", " \n", "=================== \n", "| Commit Details \n", "------------------- \n", "| Commit: a=099557d48edebb7607fa3ec648eafa2a1af5e652 \n", "| Created: Mon May 4 13:00:43 2020 \n", "| By: Alessia Marcolini \n", "| Email: alessia@tensorwerk.com \n", "| Message: Add one more sample to dummy_column \n", " \n", "================== \n", "| DataSets \n", "|----------------- \n", "| Number of Named Columns: 1 \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"dummy_column\", layout=\"flat\") \n", "| Num Data Pieces: 3 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_hasher_tcode: 1 \n", "| - data_hasher_tcode: 0 \n", "| - schema_type: fixed_shape \n", "| - shape: (3, 2) \n", "| - dtype: float64 \n", "| - backend: 01 \n", "| - backend_options: {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'} \n", "\n" ] } ], "source": [ "repo.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "### Quick Start - with explanations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1. Create and initialize a \"Repository\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Central to Hangar is the concept of [Repository](api.rst#hangar.repository.Repository).\n", "\n", "A `Repository` consists of an **historically ordered mapping** of **Commits** over time by various **Committers** across any number of **Branches**. Though there are many conceptual similarities in what a Git repo and a Hangar repository achieve, Hangar is designed with the express purpose of dealing with **numeric data**.\n", "\n", "To start using Hangar programmatically, simply begin with this import statement:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from hangar import Repository" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create the folder where you want to store the `Repository`:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "!mkdir /Volumes/Archivio/tensorwerk/hangar/quick-start" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Initialize the `Repository` object by saying where your repository should live.\n", "\n", ".. note:: Note that if you feed a path to the `Repository` which does not contain a pre-initialized Hangar repo, Python shows you a warning saying that you will need to initialize the repo before starting working on it." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "//anaconda/envs/hangar-tutorial/lib/python3.8/site-packages/hangar/context.py:92: UserWarning: No repository exists at /Volumes/Archivio/tensorwerk/hangar/quick-start/.hangar, please use `repo.init()` method\n", " warnings.warn(msg, UserWarning)\n" ] } ], "source": [ "repo = Repository(path=\"/Volumes/Archivio/tensorwerk/hangar/quick-start\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Initialize the `Repository` providing your name and your email.\n", "\n", ".. warning:: Please be aware that the `remove_old` parameter set to `True` **removes and reinitializes** a Hangar repository at the given path." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: /Volumes/Archivio/tensorwerk/hangar/quick-start/.hangar\n" ] }, { "data": { "text/plain": [ "'/Volumes/Archivio/tensorwerk/hangar/quick-start/.hangar'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.init(\n", " user_name=\"Alessia Marcolini\", user_email=\"alessia@tensorwerk.com\", remove_old=True\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2. Open the Staging Area for Writing\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To start interacting with Hangar, first you need to check out the `Repository` you want to work on.\n", "\n", "A repo can be checked out in two modes:\n", "\n", "* [write-enabled](api.rst#hangar.checkout.WriterCheckout)\n", "* [read-only](api.rst#hangar.checkout.ReaderCheckout)\n", "\n", "We need to check out the repo in **write mode** in order to initialize the columns and write into them." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar WriterCheckout \n", " Writer : True \n", " Base Branch : master \n", " Num Columns : 0\n" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout = repo.checkout(write=True)\n", "master_checkout" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A checkout allows access to `columns`. The `columns` attribute of a checkout provide the interface to working with all of the data on disk!" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 0 \n", " Column Names / Partial Remote References: \n", " - " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3. Create some random data to play with" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's create a random array to be used as a dummy example:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.54631485, 0.26578857],\n", " [0.74990074, 0.41764666],\n", " [0.75884524, 0.05547267]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "\n", "dummy = np.random.rand(3,2)\n", "dummy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4. Initialize a column" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "With checkout write-enabled, we can now initialize a new column of the repository using the method [add_ndarray_column()](api.rst#hangar.checkout.WriterCheckout.add_ndarray_column).\n", "\n", "All samples within a column have the same data type, and number of dimensions. The size of each dimension can be either fixed (the default behavior) or variable per sample.\n", "\n", "You will need to provide a column name and a prototype, so Hangar can infer the shape of the elements contained in the array. `dummy_col` will become a column accessor object." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : float64 \n", " Shape : (3, 2) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col = master_checkout.add_ndarray_column(name=\"dummy_column\", prototype=dummy)\n", "dummy_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Verify we successfully added the new column:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 1 \n", " Column Names / Partial Remote References: \n", " - dummy_column / False" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5. Add data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To add data to a named column, we can use **dict-style mode** as follows.\n", "Sample keys can be either str or int type." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "dummy_col[0] = dummy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As we can see, `Number of Samples` is equal to 1 now!" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : float64 \n", " Shape : (3, 2) \n", " Number of Samples : 1 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "dummy_col[1] = np.random.rand(3,2)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : float64 \n", " Shape : (3, 2) \n", " Number of Samples : 2 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.17590758, 0.26950355],\n", " [0.88036219, 0.7839301 ],\n", " [0.87321484, 0.04316646]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can also iterate over your column, as you would do with a regular Python dictionary:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Key: 0\n", "Value: [[0.54631485 0.26578857]\n", " [0.74990074 0.41764666]\n", " [0.75884524 0.05547267]]\n", "\n", "Key: 1\n", "Value: [[0.17590758 0.26950355]\n", " [0.88036219 0.7839301 ]\n", " [0.87321484 0.04316646]]\n", "\n" ] } ], "source": [ "for key, value in dummy_col.items():\n", " print('Key:', key)\n", " print('Value:', value)\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**How many samples are in the column?**" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(dummy_col)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Does the column contain that key?**" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "0 in dummy_col" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "5 in dummy_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 6. Commit changes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Once you have made a set of changes you want to **commit**, just simply call the [commit()](api.rst#hangar.checkout.WriterCheckout.commit) method (and pass in a message)!" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=4f42fce2b66476271f149e3cd2eb4c6ba66daeee'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.commit(\"Add dummy_column with 2 samples\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's add another sample in the column:" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : dummy_column \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : float64 \n", " Shape : (3, 2) \n", " Number of Samples : 3 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummy_col[2] = np.random.rand(3,2)\n", "dummy_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`Number of Samples` is equal to 3 now and we want to keep track of the change with another commit:" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=753e28e27d4b23a0dca0633f90b4513538a98c40'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.commit(\"Add one more sample to dummy_column\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To view the **history** of your commits:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=753e28e27d4b23a0dca0633f90b4513538a98c40 (\u001B[1;31mmaster\u001B[m) : Add one more sample to dummy_column\n", "* a=4f42fce2b66476271f149e3cd2eb4c6ba66daeee : Add dummy_column with 2 samples\n" ] } ], "source": [ "master_checkout.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Do not forget to close the write-enabled checkout!**" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "master_checkout.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the **state of the repository** and get useful information about disk usage, the columns you have and the last commit:" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary of Contents Contained in Data Repository \n", " \n", "================== \n", "| Repository Info \n", "|----------------- \n", "| Base Directory: /Volumes/Archivio/tensorwerk/hangar/quick-start \n", "| Disk Usage: 237.53 kB \n", " \n", "=================== \n", "| Commit Details \n", "------------------- \n", "| Commit: a=753e28e27d4b23a0dca0633f90b4513538a98c40 \n", "| Created: Tue Apr 21 21:50:15 2020 \n", "| By: Alessia Marcolini \n", "| Email: alessia@tensorwerk.com \n", "| Message: Add one more sample to dummy_column \n", " \n", "================== \n", "| DataSets \n", "|----------------- \n", "| Number of Named Columns: 1 \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"dummy_column\", layout=\"flat\") \n", "| Num Data Pieces: 3 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_hasher_tcode: 1 \n", "| - data_hasher_tcode: 0 \n", "| - schema_type: fixed_shape \n", "| - shape: (3, 2) \n", "| - dtype: float64 \n", "| - backend: 01 \n", "| - backend_options: {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'} \n", "\n" ] } ], "source": [ "repo.summary()" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python (hangar-tutorial)", "language": "python", "name": "hangar-tutorial" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/Tutorial-RealQuickStart.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## \"Real World\" Quick Start Tutorial" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This tutorial will guide you on working with the basics of Hangar, while playing with some \"real world\" data:\n", "\n", "* adding data to a repository\n", "* commiting changes\n", "* reading data from a commit\n", "* inspecting contents of a commit" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can install Hangar via `pip`:\n", "\n", "```\n", "$ pip install hangar\n", "```\n", "\n", "or via `conda`:\n", "\n", "```\n", "$ conda install -c conda-forge hangar\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Other requirements for this tutorial are:\n", "\n", "* pillow - the python imaging library\n", "* tqdm - a simple tool to display progress bars (this is installed automatically as it is a requirement for `Hangar`)\n", "\n", "```\n", "$ pip install pillow\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Create and Initialize a \"Repository\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When working with Hangar programatically (the CLI is covered in later tutorials), we always start with the following import:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from hangar import Repository" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create the folder where you want to store the Hangar `Repository`:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "!mkdir /Volumes/Archivio/tensorwerk/hangar/imagenette" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "and create the `Repository` object. Note that when you specify a new folder for a Hangar repository, Python shows you a warning saying that you will need to initialize the repo before starting working on it." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "//anaconda/envs/hangar-nested/lib/python3.7/site-packages/hangar-0.5.0.dev1-py3.7-macosx-10.9-x86_64.egg/hangar/context.py:94: UserWarning: No repository exists at /Volumes/Archivio/tensorwerk/hangar/imagenette/.hangar, please use `repo.init()` method\n", " warnings.warn(msg, UserWarning)\n" ] } ], "source": [ "repo = Repository(path=\"/Volumes/Archivio/tensorwerk/hangar/imagenette\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Initialize the `Repository` providing your name and your email.\n", "\n", ".. warning:: Please be aware that the `remove_old` parameter set to `True` **removes and reinitializes** a Hangar repository at the given path." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hangar Repo initialized at: /Volumes/Archivio/tensorwerk/hangar/imagenette/.hangar\n" ] }, { "data": { "text/plain": [ "'/Volumes/Archivio/tensorwerk/hangar/imagenette/.hangar'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repo.init(\n", " user_name=\"Alessia Marcolini\", user_email=\"alessia@tensorwerk.com\", remove_old=True\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Open the Staging Area for Writing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A `Repository` can be checked out in two modes: write-enabled and read-only. We need to checkout the repo in write mode in order to initialize the columns and write into them." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "master_checkout = repo.checkout(write=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A checkout allows access to `columns`. The `columns` attribute of a checkout provides the interface to working with all of the data on disk!" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 0 \n", " Column Names / Partial Remote References: \n", " - " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Download and Prepare Some Conventionally Stored Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To start playing with Hangar, let's get some data to work on. We'll be using the [Imagenette dataset](https://github.com/fastai/imagenette).\n", "\n", "The following commands will download ~96 MB of data to the local directory and decompress the tarball containing ~ 9,200 `.jpeg` images in the folder `data` in the current working directory." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2020-04-04 13:25:37-- https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz\n", "Resolving s3.amazonaws.com... 52.216.238.197\n", "Connecting to s3.amazonaws.com|52.216.238.197|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 98948031 (94M) [application/x-tar]\n", "Saving to: ‘data/imagenette2-160.tgz’\n", "\n", "imagenette2-160.tgz 100%[===================>] 94.36M 4.52MB/s in 22s \n", "\n", "2020-04-04 13:26:00 (4.31 MB/s) - ‘data/imagenette2-160.tgz’ saved [98948031/98948031]\n", "\n" ] } ], "source": [ "!wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz -P data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "!tar -xzf data/imagenette2-160.tgz -C data" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2020-04-04 13:26:24-- http://image-net.org/archive/words.txt\n", "Resolving image-net.org... 171.64.68.16\n", "Connecting to image-net.org|171.64.68.16|:80... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 2655750 (2.5M) [text/plain]\n", "Saving to: ‘data/imagenette2-160/words.txt’\n", "\n", "words.txt 100%[===================>] 2.53M 884KB/s in 2.9s \n", "\n", "2020-04-04 13:26:27 (884 KB/s) - ‘data/imagenette2-160/words.txt’ saved [2655750/2655750]\n", "\n" ] } ], "source": [ "!wget http://image-net.org/archive/words.txt -P data/imagenette2-160" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### The dataset directory structure on disk is as follows:\n", "\n", "Each subdirectory in the `train` / `val` folders (named starting with `\"n0\"`) contains a few hundred images which feature objects/elements of a common classification (tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute, etc.). The image file names follow a convention specific to the ImageNet project, but can be thought of as essentially random (so long as they are unique).\n", "\n", "```\n", "imagenette2-160\n", "├── train\n", "│   ├── n01440764\n", "│   ├── n02102040\n", "│   ├── n02979186\n", "│   ├── n03000684\n", "│   ├── n03028079\n", "│   ├── n03394916\n", "│   ├── n03417042\n", "│   ├── n03425413\n", "│   ├── n03445777\n", "│   └── n03888257\n", "└── val\n", " ├── n01440764\n", " ├── n02102040\n", " ├── n02979186\n", " ├── n03000684\n", " ├── n03028079\n", " ├── n03394916\n", " ├── n03417042\n", " ├── n03425413\n", " ├── n03445777\n", " └── n03888257\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Classification/Label Data\n", "\n", "The labels associated with each image are contained in a seperate `.txt` file, we download the `words.txt` to the directory the images are extracted into.\n", "\n", "Reviewing the contents of this file, we will find a mapping of classification codes (subdirectory names starting with `\"n0\"`) to human readable descriptions of the contents. A small selection of the file is provided below as an illustration.\n", "\n", "```\n", "n01635343\tRhyacotriton, genus Rhyacotriton\n", "n01635480\tolympic salamander, Rhyacotriton olympicus\n", "n01635659\tPlethodontidae, family Plethodontidae\n", "n01635964\tPlethodon, genus Plethodon\n", "n01636127\tlungless salamander, plethodont\n", "n01636352\teastern red-backed salamander, Plethodon cinereus\n", "n01636510\twestern red-backed salamander, Plethodon vehiculum\n", "n01636675\tDesmograthus, genus Desmograthus\n", "n01636829\tdusky salamander\n", "n01636984\tAneides, genus Aneides\n", "n01637112\tclimbing salamander\n", "n01637338\tarboreal salamander, Aneides lugubris\n", "n01637478\tBatrachoseps, genus Batrachoseps\n", "n01637615\tslender salamander, worm salamander\n", "n01637796\tHydromantes, genus Hydromantes\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Mapping Classification Codes to Meaningful Descriptors\n", "\n", "We begin by reading each line of this file and creating a dictionary to store the corrispondence between ImageNet synset name and a human readable label." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "dataset_dir = Path(\"./data/imagenette2-160\")\n", "\n", "synset_label = {}\n", "with open(dataset_dir / \"words.txt\", \"r\") as f:\n", " for line in f.readlines():\n", " synset, label = line.split(\"\\t\")\n", " synset_label[synset] = label.rstrip()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read training data (images and labels) from disk and store them in NumPy arrays." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import os\n", "from tqdm import tqdm\n", "\n", "import numpy as np\n", "from PIL import Image" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10/10 [00:31<00:00, 3.12s/it]\n" ] } ], "source": [ "train_images = []\n", "train_labels = []\n", "\n", "for synset in tqdm(os.listdir(dataset_dir / \"train\")):\n", " label = synset_label[synset]\n", "\n", " for image_filename in os.listdir(dataset_dir / \"train\" / synset):\n", " image = Image.open(dataset_dir / \"train\" / synset / image_filename)\n", " image = image.resize((163, 160))\n", " data = np.asarray(image)\n", "\n", " if len(data.shape) == 2: # discard B&W images\n", " continue\n", "\n", " train_images.append(data)\n", " train_labels.append(label)\n", "\n", "train_images = np.array(train_images)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(9296, 160, 163, 3)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_images.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ".. note:: Here we are reading the images from disk and storing them in a big Python list, and then converting it to a NumPy array. Note that it could be impractical for larger datasets. You might want to consider the idea of reading files in batch." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read validation data (images and labels) from disk and store them in NumPy arrays, same as before." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 10/10 [00:12<00:00, 1.22s/it]\n" ] } ], "source": [ "val_images = []\n", "val_labels = []\n", "\n", "for synset in tqdm(os.listdir(dataset_dir / \"val\")):\n", " label = synset_label[synset]\n", "\n", " for image_filename in os.listdir(dataset_dir / \"val\" / synset):\n", " image = Image.open(dataset_dir / \"val\" / synset / image_filename)\n", " image = image.resize((163, 160))\n", " data = np.asarray(image)\n", "\n", " if len(data.shape) == 2: # discard B&W images\n", " continue\n", "\n", " val_images.append(data)\n", " val_labels.append(label)\n", "\n", "val_images = np.array(val_images)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3856, 160, 163, 3)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val_images.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4. Column initialization" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "With checkout write-enabled, we can now initialize a new column of the repository using the method `add_ndarray_column()`.\n", "\n", "All samples within a column have the same data type, and number of dimensions. The size of each dimension can be either fixed (the default behavior) or variable per sample.\n", "\n", "You will need to provide a column `name` and a `prototype`, so Hangar can infer the shape of the elements contained in the array.\n", "`train_im_col` will become a column accessor object." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "train_im_col = master_checkout.add_ndarray_column(\n", " name=\"training_images\", prototype=train_images[0]\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Verify we successfully added the new column:" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar Columns \n", " Writeable : True \n", " Number of Columns : 1 \n", " Column Names / Partial Remote References: \n", " - training_images / False" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Get useful information about the new column simply by inspecting `train_im_col` ..." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (160, 163, 3) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_im_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "... or by leveraging the dict-style columns access through the `checkout` object. They provide the same information." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (160, 163, 3) \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns[\"training_images\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Since Hangar 0.5, it's possible to have a column with string datatype, and we will be using it to store the labels of our dataset." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "train_lab_col = master_checkout.add_str_column(name=\"training_labels\")" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : training_labels \n", " Writeable : True \n", " Column Type : str \n", " Column Layout : flat \n", " Schema Type : variable_shape \n", " DType : \n", " Shape : None \n", " Number of Samples : 0 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_lab_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5. Adding data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To add data to a named column, we can use dict-style mode (refer to the `__setitem__`, `__getitem__`, and `__delitem__` methods) or the `update()` method. Sample keys can be either `str` or `int` type." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "train_im_col[0] = train_images[0]\n", "train_lab_col[0] = train_labels[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As we can see, `Number of Samples` is equal to 1 now." ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : training_labels \n", " Writeable : True \n", " Column Type : str \n", " Column Layout : flat \n", " Schema Type : variable_shape \n", " DType : \n", " Shape : None \n", " Number of Samples : 1 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.columns[\"training_labels\"]" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "data = {1: train_images[1], 2: train_images[2]}" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "train_im_col.update(data)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : training_images \n", " Writeable : True \n", " Column Type : ndarray \n", " Column Layout : flat \n", " Schema Type : fixed_shape \n", " DType : uint8 \n", " Shape : (160, 163, 3) \n", " Number of Samples : 3 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_im_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's add the remaining training images:" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 9296/9296 [00:36<00:00, 257.92it/s]\n" ] } ], "source": [ "with train_im_col:\n", " for i, img in tqdm(enumerate(train_images), total=train_images.shape[0]):\n", " if i not in [0, 1, 2]:\n", " train_im_col[i] = img" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "code_folding": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 9296/9296 [00:01<00:00, 5513.23it/s] \n" ] } ], "source": [ "with train_lab_col:\n", " for i, label in tqdm(enumerate(train_labels), total=len(train_labels)):\n", " if i != 0:\n", " train_lab_col[i] = label" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Hangar FlatSampleWriter \n", " Column Name : training_labels \n", " Writeable : True \n", " Column Type : str \n", " Column Layout : flat \n", " Schema Type : variable_shape \n", " DType : \n", " Shape : None \n", " Number of Samples : 9296 \n", " Partial Remote Data Refs : False\n" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_lab_col" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Both the `training_images` and the `training_labels` have 9296 samples. Great!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ".. note:: To get an overview of the different ways you could add data to a Hangar repository (also from a performance point of view), please refer to the Performance section of the Hangar Tutorial Part 1." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6. Committing changes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Once you have made a set of changes you want to commit, simply call the `commit()` method and specify a message.\n", "\n", "The returned value (`a=ecc943c89b9b09e41574c9849f11937828fece28`) is the commit hash of this commit." ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=ecc943c89b9b09e41574c9849f11937828fece28'" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.commit(\"Add Imagenette training images and labels\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's add the validation data to the repository ..." ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "val_im_col = master_checkout.add_ndarray_column(\n", " name=\"validation_images\", prototype=val_images[0]\n", ")\n", "val_lab_col = master_checkout.add_str_column(name=\"validation_labels\")" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 3856/3856 [00:08<00:00, 474.25it/s]\n" ] } ], "source": [ "with val_im_col, val_lab_col:\n", " for img, label in tqdm(zip(val_images, val_labels), total=len(val_labels)):\n", " val_im_col[i] = img\n", " val_lab_col[i] = label" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "... and commit!" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'a=e31ef9a06c8d1a4cefeb52c336b2c33d1dca3fba'" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "master_checkout.commit(\"Add Imagenette validation images and labels\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To view the **history** of your commits:" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* a=e31ef9a06c8d1a4cefeb52c336b2c33d1dca3fba (\u001B[1;31mmaster\u001B[m) : Add Imagenette validation images and labels\n", "* a=ecc943c89b9b09e41574c9849f11937828fece28 : Add Imagenette training images and labels\n" ] } ], "source": [ "master_checkout.log()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Do not forget to close the write-enabled checkout!" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "master_checkout.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's inspect the repository state! This will show disk usage information, the details of the last commit and all the information about the dataset columns." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Summary of Contents Contained in Data Repository \n", " \n", "================== \n", "| Repository Info \n", "|----------------- \n", "| Base Directory: /Volumes/Archivio/tensorwerk/hangar/imagenette \n", "| Disk Usage: 862.09 MB \n", " \n", "=================== \n", "| Commit Details \n", "------------------- \n", "| Commit: a=e31ef9a06c8d1a4cefeb52c336b2c33d1dca3fba \n", "| Created: Sat Apr 4 11:29:12 2020 \n", "| By: Alessia Marcolini \n", "| Email: alessia@tensorwerk.com \n", "| Message: Add Imagenette validation images and labels \n", " \n", "================== \n", "| DataSets \n", "|----------------- \n", "| Number of Named Columns: 4 \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"training_images\", layout=\"flat\") \n", "| Num Data Pieces: 9296 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (160, 163, 3) \n", "| - dtype: uint8 \n", "| - backend: 01 \n", "| - backend_options: {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'} \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"training_labels\", layout=\"flat\") \n", "| Num Data Pieces: 9296 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: str \n", "| - schema_type: variable_shape \n", "| - dtype: \n", "| - backend: 30 \n", "| - backend_options: {} \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"validation_images\", layout=\"flat\") \n", "| Num Data Pieces: 1 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: ndarray \n", "| - schema_type: fixed_shape \n", "| - shape: (160, 163, 3) \n", "| - dtype: uint8 \n", "| - backend: 01 \n", "| - backend_options: {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'} \n", "|\n", "| * Column Name: ColumnSchemaKey(column=\"validation_labels\", layout=\"flat\") \n", "| Num Data Pieces: 1 \n", "| Details: \n", "| - column_layout: flat \n", "| - column_type: str \n", "| - schema_type: variable_shape \n", "| - dtype: \n", "| - backend: 30 \n", "| - backend_options: {} \n", " \n", "================== \n", "| Metadata: \n", "|----------------- \n", "| Number of Keys: 0 \n", "\n" ] } ], "source": [ "repo.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Great! You've made it until the end of the \"Real World\" Quick Start Tutorial!! 👏🏼\n", "\n", "Please check out the other tutorials for more advanced stuff such as branching & merging, conflicts resolution and data loaders for TensorFlow and PyTorch!" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: docs/api.rst ================================================ .. _ref-api: ========== Python API ========== This is the python API for the Hangar project. Repository ========== .. automodule:: hangar.repository :members: Remotes ======= .. autoclass:: Remotes() :members: :exclude-members: __init__ Write Enabled Checkout ====================== Checkout -------- .. autoclass:: hangar.checkout.WriterCheckout() :members: :inherited-members: :special-members: __getitem__, __setitem__, __len__, __contains__, __iter__ :exclude-members: __init__ Columns ------- .. autoclass:: hangar.columns.column.Columns() :members: :special-members: __getitem__, __setitem__, __delitem__, __contains__, __len__, __iter__ :exclude-members: __init__ Flat Column Layout Container ---------------------------- .. autoclass:: hangar.columns.layout_flat.FlatSampleWriter() :members: :inherited-members: :special-members: __getitem__, __setitem__, __delitem__, __contains__, __len__, __iter__ :exclude-members: __init__ Nested Column Layout Container ------------------------------ .. autoclass:: hangar.columns.layout_nested.NestedSampleWriter() :members: :inherited-members: :special-members: __getitem__, __setitem__, __delitem__, __contains__, __len__, __iter__ :exclude-members: __init__ .. autoclass:: hangar.columns.layout_nested.FlatSubsampleWriter() :members: :inherited-members: :special-members: __getitem__, __setitem__, __delitem__, __contains__, __len__, __iter__ :exclude-members: __init__ Differ ------ .. autoclass:: hangar.diff.WriterUserDiff() :members: :exclude-members: __init__ Bulk Importer ------------- .. automodule:: hangar.bulk_importer :members: Read Only Checkout ================== Checkout -------- .. autoclass:: hangar.checkout.ReaderCheckout() :members: :inherited-members: :special-members: __getitem__, __len__, __contains__, __iter__ :exclude-members: __init__ Flat Column Layout Container ---------------------------- .. autoclass:: hangar.columns.layout_flat.FlatSampleReader() :members: :inherited-members: :special-members: __getitem__, __setitem__, __contains__, __len__, __iter__ :exclude-members: __init__ Nested Column Layout Container ------------------------------ .. autoclass:: hangar.columns.layout_nested.NestedSampleReader() :members: :inherited-members: :special-members: __getitem__, __contains__, __len__, __iter__ :exclude-members: __init__ .. autoclass:: hangar.columns.layout_nested.FlatSubsampleReader() :members: :inherited-members: :special-members: __getitem__,, __contains__, __len__, __iter__ :exclude-members: __init__ Differ ------ .. autoclass:: hangar.diff.ReaderUserDiff() :members: :exclude-members: __init__ ML Framework Dataloaders ======================== Tensorflow ---------- .. autofunction:: hangar.dataset.make_tensorflow_dataset Pytorch ------- .. autofunction:: hangar.dataset.make_torch_dataset Numpy ----- .. autofunction:: hangar.dataset.make_numpy_dataset ================================================ FILE: docs/authors.rst ================================================ .. include:: ../AUTHORS.rst ================================================ FILE: docs/backends/hdf5_00.rst ================================================ Local HDF5 Backend ================== .. automodule:: hangar.backends.hdf5_00 ================================================ FILE: docs/backends/hdf5_01.rst ================================================ Fixed Shape Optimized Local HDF5 ================================ .. automodule:: hangar.backends.hdf5_01 ================================================ FILE: docs/backends/lmdb_30.rst ================================================ Variable Shape LMDB String Data Store ===================================== .. automodule:: hangar.backends.lmdb_30 ================================================ FILE: docs/backends/numpy_10.rst ================================================ Local NP Memmap Backend ======================= .. automodule:: hangar.backends.numpy_10 ================================================ FILE: docs/backends/remote_50.rst ================================================ Remote Server Unknown Backend ============================= .. automodule:: hangar.backends.remote_50 ================================================ FILE: docs/backends.rst ================================================ .. _ref-backends: .. note:: The following documentation contains highly technical descriptions of the data writing and loading backends of the Hangar core. It is intended for developer use only, with the functionality described herein being completely hidden from regular users. Any questions or comments can be directed to the `Hangar Github Issues Page `_ ================= Backend selection ================= .. automodule:: hangar.backends.__init__ Backend Specifications ====================== .. toctree:: :maxdepth: 2 :titlesonly: ./backends/hdf5_00 ./backends/hdf5_01 ./backends/numpy_10 ./backends/lmdb_30 ./backends/remote_50 ================================================ FILE: docs/benchmarking.rst ================================================ .. include:: ../asv_bench/README.rst ================================================ FILE: docs/changelog.rst ================================================ .. include:: ../CHANGELOG.rst ================================================ FILE: docs/cli.rst ================================================ Hangar CLI Documentation ======================== The CLI described below is automatically available after the Hangar Python package has been installed (either through a package manager or via source builds). In general, the commands require the terminals ``cwd`` to be at the same level the repository was initially created in. Simply start by typing ``$ hangar --help`` in your terminal to get started! .. click:: hangar.cli:main :prog: hangar :show-nested: ================================================ FILE: docs/codeofconduct.rst ================================================ .. _ref-code-of-conduct: .. include:: ../CODE_OF_CONDUCT.rst ================================================ FILE: docs/concepts.rst ================================================ .. _ref-concepts: #################### Hangar Core Concepts #################### .. warning:: The usage info displayed in the ``latest`` build of the project documentation do not reflect recent changes to the API and internal structure of the project. They should not be relied on at the current moment; they will be updated over the next weeks, and will be in line before the next release. This document provides a high level overview of the problems Hangar is designed to solve and introduces the core concepts for beginning to use Hangar. *************** What Is Hangar? *************** At its core Hangar is designed to solve many of the same problems faced by traditional code version control system (ie. ``Git``), just adapted for numerical data: * Time travel through the historical evolution of a dataset * Zero-cost Branching to enable exploratory analysis and collaboration * Cheap Merging to build datasets over time (with multiple collaborators) * Completely abstracted organization and management of data files on disk * Ability to only retrieve a small portion of the data (as needed) while still maintaining complete historical record * Ability to push and pull changes directly to collaborators or a central server (ie. a truly distributed version control system) The ability of version control systems to perform these tasks for codebases is largely taken for granted by almost every developer today; however, we are in-fact standing on the shoulders of giants, with decades of engineering which has resulted in these phenomenally useful tools. Now that a new era of "Data-Defined software" is taking hold, we find there is a strong need for analogous version control systems which are designed to handle numerical data at large scale... Welcome to Hangar! *********** Inspiration *********** The design of Hangar was heavily influenced by the `Git `_ source-code version control system. As a Hangar user, many of the fundamental building blocks and commands can be thought of as interchangeable: * checkout * commit * branch * merge * diff * push * pull/fetch * log Emulating the high level the git syntax has allowed us to create a user experience which should be familiar in many ways to Hangar users; a goal of the project is to enable many of the same VCS workflows developers use for code while working with their data! There are, however, many fundamental differences in how humans/programs interpret and use text in source files vs. numerical data which raise many questions Hangar needs to uniquely solve: * How do we connect some piece of "Data" with a meaning in the real world? * How do we diff and merge large collections of data samples? * How can we resolve conflicts? * How do we make data access (reading and writing) convenient for both user-driven exploratory analyses and high performance production systems operating without supervision? * How can we enable people to work on huge datasets in a local (laptop grade) development environment? We will show how Hangar solves these questions in a high-level guide below. For a deep dive into the Hangar internals, we invite you to check out the :ref:`ref-hangar-under-the-hood` page. **************************** How Hangar Thinks About Data **************************** Abstraction 0: What is a Repository? ==================================== A "Repository" consists of an historically ordered mapping of "Commits" over time by various "Committers" across any number of "Branches". Though there are many conceptual similarities in what a Git repo and a Hangar Repository achieve, Hangar is designed with the express purpose of dealing with numeric data. As such, when you read/write to/from a Repository, the main way of interaction with information will be through (an arbitrary number of) Columns in each Commit. A simple key/value store is also included to store metadata, but as it is a minor point is will largely be ignored for the rest of this post. History exists at the Repository level, Information exists at the Commit level. Abstraction 1: What is a Dataset? ================================= Let's get philosophical and talk about what a "Dataset" is. The word "Dataset" invokes some meaning to humans; a dataset may have a canonical name (like "MNIST" or "CoCo"), it will have a source where it comes from, (ideally) it has a purpose for some real-world task, it will have people who build, aggregate, and nurture it, and most importantly a Dataset always contains pieces of some type of information type which describes "something". It's an abstract definition, but it is only us, the humans behind the machine, which associate "Data" with some meaning in the real world; it is in the same vein which we associate a group of Data in a "Dataset" with some real world meaning. Our first abstraction is therefore the "Dataset": a collection of (potentially groups of) data pieces observing a common form among instances which act to describe something meaningful. *To describe some phenomenon, a dataset may require multiple pieces of information, each of a particular format, for each instance/sample recorded in the dataset.* **For Example** a Hospital will typically have a *Dataset* containing all of the CT scans performed over some period of time. A single CT scan is an instance, a single sample; however, once many are grouped together they form a *Dataset*. To expand on this simple view we realize that each CT scan consists of hundreds of pieces of information: * Some large ``numeric array`` (the image data). * Some smaller ``numeric tuples`` (describing image spacing, dimension scale, capture time, machine parameters, etc). * Many pieces of ``string`` data (the patient name, doctor name, scan type, results found, etc). When thinking about the group of CT scans in aggregate, we realize that though a single scan contains many disparate pieces of information stuck together, when thinking about the aggregation of every scan in the group, most of (if not all) of the same information fields are duplicated within each samples. *A single scan is a bunch of disparate information stuck together, many of those put together makes a Dataset, but looking down from the top, we identify pattern of common fields across all items. We call these groupings of similar typed information:* **Columns**. Abstraction 2: What Makes up a Column? ====================================== A ``Dataset`` is made of one or more ``Columns`` (and optionally some ``Metadata``), with each item placed in some ``Column`` belonging to and making up an individual ``Sample``. It is important to remember that all data needed to fully describe a single ``sample`` in a ``Dataset`` may consist of information spread across any number of ``Columns``. To define a ``Column`` in Hangar, we only need to provide: * a name * a type * a shape The individual pieces of information (``Data``) which fully describe some phenomenon via an aggregate mapping access across any number of "Columns" are both individually and collectively referred to as ``Samples`` in the Hangar vernacular. According to the specification above, all samples contained in a ``Column`` must be numeric arrays with each having: 1) Same data type (standard ``numpy`` data types are supported). 2) A shape with each dimension size <= the shape (``max shape``) set in the ``column`` specification (more on this later). Additionally, samples in a ``column`` can either be named, or unnamed (depending on how you interpret what the information contained in the ``column`` actually represents). Effective use of Hangar relies on having an understanding of what exactly a ``"Sample"`` is in a particular ``Column``. The most effective way to find out is to ask: "What is the smallest piece of data which has a useful meaning to 'me' (or 'my' downstream processes"). In the MNIST ``column``, this would be a single digit image (a 28x28 array); for a medical ``column`` it might be an entire (512x320x320) MRI volume scan for a particular patient; while for the NASDAQ Stock Ticker it might be an hours worth of price data points (or less, or more!) The point is that **when you think about what a ``sample`` is, it should typically be the smallest atomic unit of useful information.** Abstraction 3: What is Data? ============================ From this point forward, **when we talk about "Data" we are actually talking about n-dimensional arrays of numeric information. To Hangar, "Data" is just a collection of numbers being passed into and out of it.** Data does not have a file type, it does not have a file-extension, it does not mean anything to Hangar itself - it is just numbers. This theory of "Data" is nearly as simple as it gets, and this simplicity is what enables us to be unconstrained as we build abstractions and utilities to operate on it. Summary ======= .. code-block:: text A Dataset is thought of as containing Samples, but is actually defined by Columns, which store parts of fully defined Samples in structures common across the full aggregation of Dataset Samples. This can essentially be represented as a key -> tensor mapping, which can (optionally) be Sparse depending on usage patterns Dataset | ----------------------------------------- | | | | Column 1 Column 2 Column 3 Column 4 | | | | ------------------------------------------------------ image | filename | label | annotation | ------------------------------------------------------ S1 | S1 | | S1 | S2 | S2 | S2 | S2 | S3 | S3 | S3 | | S4 | S4 | | | More techincally, a Dataset is just a view over the columns that gives you sample tuples based on the cross product of keys and columns. Hangar doesn't store or track the data set, just the underlying columns. S1 = (image[S1], filename[S1], annotation[S1]) S2 = (image[S2], filename[S2], label[S2], annotation[S2]) S3 = (image[S3], filename[S3], label[S3]) S4 = (image[S4], filename[S4]) .. note:: The technical crowd among the readers should note: * Hangar preserves all sample data bit-exactly. * Dense arrays are fully supported, Sparse array support is currently under development and will be released soon. * Integrity checks are built in by default (explained in more detail in :ref:`ref-hangar-under-the-hood`.) using cryptographically secure algorithms. * Hangar is very much a young project, until penetration tests and security reviews are performed, we will refrain from stating that Hangar is fully "cryptographically secure". Security experts are welcome to contact us privately at `hangar.info@tensorwerk.com `__ to disclose any security issues. ****************************************** Implications of the Hangar Data Philosophy ****************************************** The Domain-Specific File Format Problem ======================================= Though it may seem counterintuitive at first, there is an incredible amount of freedom (and power) that is gained when "you" (the user) start to decouple some information container from the data which it actually holds. At the end of the day, the algorithms and systems you use to produce insight from data are just mathematical operations; math does not operate on a specific file type, math operates on numbers. Human & Computational Cost -------------------------- It seems strange that organizations & projects commonly rely on storing data on disk in some domain-specific - or custom built - binary format (ie. a ``.jpg`` image, ``.nii`` neuroimaging informatics study, ``.cvs`` tabular data, etc.), and just deal with the hassle of maintaining all the infrastructure around reading, writing, transforming, and preprocessing these files into useable numerical data every time they want to interact with their Columns. Even disregarding the computational cost/overhead of preprocessing & transforming the data on every read/write, these schemes require significant amounts of human capital (developer time) to be spent on building, testing, and upkeep/maintenance; all while adding significant complexity for users. Oh, and they also have a strangely high inclination to degenerate into horrible complexity which essentially becomes "magic" after the original creators move on. The Hangar system is quite different in this regards. First, **we trust that you know what your data is and what it should be best represented as**. When writing to a Hangar repository, you process the data into n-dimensional arrays once. Then when you retrieve it you are provided with the same array, in the same shape and datatype (unless you ask for a particular subarray-slice), already initialized in memory and ready to compute on instantly. High Performance From Simplicity -------------------------------- Because Hangar is designed to deal (almost exclusively) with numerical arrays, we are able to "stand on the shoulders of giants" once again by utilizing many of the well validated, highly optimized, and community validated numerical array data management utilities developed by the High Performance Computing community over the past few decades. In a sense, the backend of Hangar serves two functions: 1) Bookkeeping: recording information about about columns, samples, commits, etc. 2) Data Storage: highly optimized interfaces which store and retrieve data from from disk through its backend utility. The details are explained much more thoroughly in :ref:`ref-hangar-under-the-hood`. Because Hangar only considers data to be numbers, the choice of backend to store data is (in a sense) completely arbitrary so long as ``Data In == Data Out``. **This fact has massive implications for the system**; instead of being tied to a single backend (each of which will have significant performance tradeoffs for arrays of particular datatypes, shapes, and access patterns), we simultaneously store different data pieces in the backend which is most suited to it. A great deal of care has been taken to optimize parameters in the backend interface which affects performance and compression of data samples. The choice of backend to store a piece of data is selected automatically from heuristics based on the column specification, system details, and context of the storage service internal to Hangar. **As a user, this is completely transparent to you** in all steps of interacting with the repository. It does not require (or even accept) user specified configuration. At the time of writing, Hangar has the following backends implemented (with plans to potentially support more as needs arise): 1) `HDF5 `_ 2) `Memmapped Arrays `_ 3) `TileDb `_ (in development) Open Source Software Style Collaboration in Dataset Curation ============================================================= Specialized Domain Knowledge is A Scarce Resource ------------------------------------------------- A common side effect of the `The Domain-Specific File Format Problem`_ is that anyone who wants to work with an organization's/project's data needs to not only have some domain expertise (so they can do useful things with the data), but they also need to have a non-trivial understanding of the projects dataset, file format, and access conventions / transformation pipelines. *In a world where highly specialized talent is already scarce, this phenomenon shrinks the pool of available collaborators dramatically.* Given this situation, it's understandable why when most organizations spend massive amounts of money and time to build a team, collect & annotate data, and build an infrastructure around that information, they hold it for their private use with little regards for how the world could use it together. Businesses rely on proprietary information to stay ahead of their competitors, and because this information is so difficult (and expensive) to generate, it's completely reasonable that they should be the ones to benefit from all that work. **A Thought Experiment** Imagine that ``Git`` and ``GitHub`` didn't take over the world. Imagine that the ``Diff`` and ``Patch`` Unix tools never existed. Instead, imagine we were to live in a world where every software project had very different version control systems (largely homeade by non VCS experts, & not validated by a community over many years of use). Even worse, most of these tools don't allow users to easily branch, make changes, and automatically merge them back. It shouldn't be difficult to imagine how dramatically such a world would contrast to ours today. Open source software as we know it would hardly exist, and any efforts would probably be massively fragmented across the web (if there would even be a 'web' that we would recognize in this strange world). Without a way to collaborate in the open, open source software would largely not exist, and we would all be worse off for it. Doesn't this hypothetical sound quite a bit like the state of open source data collaboration in todays world? The impetus for developing a tool like Hangar is the belief that if it is simple for anyone with domain knowledge to collaboratively curate columns containing information they care about, then they will.* Open source software development benefits everyone, we believe open source column curation can do the same. How To Overcome The "Size" Problem ---------------------------------- Even if the greatest tool imaginable existed to version, branch, and merge columns, it would face one massive problem which if it didn't solve would kill the project: *The size of data can very easily exceeds what can fit on (most) contributors laptops or personal workstations*. This section explains how Hangar can handle working with columns which are prohibitively large to download or store on a single machine. As mentioned in `High Performance From Simplicity`_, under the hood Hangar deals with "Data" and "Bookkeeping" completely separately. We've previously covered what exactly we mean by Data in `How Hangar Thinks About Data`_, so we'll briefly cover the second major component of Hangar here. In short "Bookkeeping" describes everything about the repository. By everything, we do mean that the Bookkeeping records describe everything: all commits, parents, branches, columns, samples, data descriptors, schemas, commit message, etc. Though complete, these records are fairly small (tens of MB in size for decently sized repositories with decent history), and are highly compressed for fast transfer between a Hangar client/server. **A brief technical interlude** There is one very important (and rather complex) property which gives Hangar Bookeeping massive power: **Existence of some data piece is always known to Hangar and stored immutably once committed. However, the access pattern, backend, and locating information for this data piece may (and over time, will) be unique in every hangar repository instance**. Though the details of how this works is well beyond the scope of this document, the following example may provide some insight into the implications of this property: If you ``clone`` some hangar repository, Bookeeping says that "some number of data pieces exist" and they should retrieved from the server. However, the bookeeping records transfered in a ``fetch`` / ``push`` / ``clone`` operation do not include information about where that piece of data existed on the client (or server) computer. Two synced repositories can use completely different backends to store the data, in completly different locations, and it does not matter - Hangar only guarantees that when collaborators ask for a data sample in some checkout, that they will be provided with identical arrays, not that they will come from the same place or be stored in the same way. Only when data is actually retrieved the "locating information" is set for that repository instance. Because Hangar makes no assumptions about how/where it should retrieve some piece of data, or even an assumption that it exists on the local machine, and because records are small and completely describe history, once a machine has the Bookkeeping, it can decide what data it actually wants to materialize on it's local disk! These ``partial fetch`` / ``partial clone`` operations can materialize any desired data, whether it be for a few records at the head branch, for all data in a commit, or for the entire historical data. A future release will even include the ability to stream data directly to a Hangar checkout and materialize the data in memory without having to save it to disk at all! More importantly: **Since Bookkeeping describes all history, merging can be performed between branches which may contain partial (or even no) actual data.** Aka **you don't need data on disk to merge changes into it.** It's an odd concept which will be explained more in depth in the future. ..note :: To try this out for yourself, please refer to the the API Docs (:ref:`ref-api`) on working with Remotes, especially the ``fetch()`` and ``fetch-data()`` methods. Otherwise look for through our tutorials & examples for more practical info! What Does it Mean to "Merge" Data? ---------------------------------- We'll start this section, once again, with a comparison to source code version control systems. When dealing with source code text, merging is performed in order to take a set of changes made to a document, and logically insert the changes into some other version of the document. The goal is to generate a new version of the document with all changes made to it in a fashion which conforms to the "change author's" intentions. Simply put: the new version is valid and what is expected by the authors. This concept of what it means to merge text does not generally map well to changes made in a column we'll explore why through this section, but look back to the philosophy of Data outlined in `How Hangar Thinks About Data`_ for inspiration as we begin. Remember, in the Hangar design a Sample is the smallest array which contains useful information. As any smaller selection of the sample array is meaningless, Hangar does not support subarray-slicing or per-index updates *when writing* data. (subarray-slice queries are permitted for read operations, though regular use is discouraged and may indicate that your samples are larger than they should be). Diffing Hangar Checkouts ^^^^^^^^^^^^^^^^^^^^^^^^ To understand merge logic, we first need to understand diffing, and the actors operations which can occur. :Addition: An operation which creates a column, sample, or some metadata which did not previously exist in the relevant branch history. :Removal: An operation which removes some column, a sample, or some metadata which existed in the parent of the commit under consideration. (Note: removing a column also removes all samples contained in it). :Mutation: An operation which sets: data to a sample, the value of some metadata key, or a column schema, to a different value than what it had previously been created with (Note: a column schema mutation is observed when a column is removed, and a new column with the same name is created with a different dtype/shape, all in the same commit). Merging Changes ^^^^^^^^^^^^^^^ Merging diffs solely consisting of additions and removals between branches is trivial, and performs exactly as one would expect from a text diff. Where things diverge from text is when we consider how we will merge diffs containing mutations. Say we have some sample in commit A, a branch is created, the sample is updated, and commit C is created. At the same time, someone else checks out branch whose HEAD is at commit A, and commits a change to the sample as well. If these changes are identical, they are compatible, but what if they are not? In the following example, we diff and merge each element of the sample array like we would text: :: Merge ?? commit A commit B Does combining mean anything? [[0, 1, 2], [[0, 1, 2], [[1, 1, 1], [0, 1, 2], -----> [2, 2, 2], ------------> [2, 2, 2], [0, 1, 2]] [3, 3, 3]] / [3, 3, 3]] \ / \ commit C / \ / \ [[1, 1, 1], / -------> [0, 1, 2], [0, 1, 2]] We see that a result can be generated, and can agree if this was a piece of text, the result would be correct. Don't be fooled, this is an abomination and utterly wrong/meaningless. Remember we said earlier ``"the result of a merge should conform to the intentions of each author"``. This merge result conforms to neither author's intention. The value of an array element is not isolated, every value affects how the entire sample is understood. The values at Commit B or commit C may be fine on their own, but if two samples are mutated independently with non-identical updates, it is a conflict that needs to be handled by the authors. This is the actual behavior of Hangar. :: commit A commit B [[0, 1, 2], [[0, 1, 2], [0, 1, 2], -----> [2, 2, 2], ----- MERGE CONFLICT [0, 1, 2]] [3, 3, 3]] / \ / \ commit C / \ / \ [[1, 1, 1], / -------> [0, 1, 2], [0, 1, 2]] When a conflict is detected, the merge author must either pick a sample from one of the commits or make changes in one of the branches such that the conflicting sample values are resolved. How Are Conflicts Detected? ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Any merge conflicts can be identified and addressed ahead of running a ``merge`` command by using the built in ``diff`` tools. When diffing commits, Hangar will provide a list of conflicts which it identifies. In general these fall into 4 categories: 1) **Additions** in both branches which created new keys (samples / columns / metadata) with non-compatible values. For samples & metadata, the hash of the data is compared, for columns, the schema specification is checked for compatibility in a method custom to the internal workings of Hangar. 2) **Removal** in ``Master Commit / Branch`` **& Mutation** in ``Dev Commit / Branch``. Applies for samples, columns, and metadata identically. 3) **Mutation** in ``Dev Commit / Branch`` **& Removal** in ``Master Commit / Branch``. Applies for samples, columns, and metadata identically. 4) **Mutations** on keys both branches to non-compatible values. For samples & metadata, the hash of the data is compared, for columns, the schema specification is checked for compatibility in a method custom to the internal workings of Hangar. ************ What's Next? ************ * Get started using Hangar today: :ref:`ref_installation`. * Read the tutorials: :ref:`ref-tutorial`. * Dive into the details: :ref:`ref-hangar-under-the-hood`. ================================================ FILE: docs/conf.py ================================================ # -*- coding: utf-8 -*- from __future__ import unicode_literals import os extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.coverage', 'sphinx.ext.doctest', 'sphinx.ext.extlinks', 'sphinx.ext.ifconfig', 'sphinx.ext.napoleon', 'sphinx.ext.todo', 'sphinx.ext.intersphinx', 'sphinx_click.ext', 'nbsphinx', 'sphinx_copybutton', 'sphinx.ext.mathjax', 'recommonmark', 'IPython.sphinxext.ipython_console_highlighting', ] if os.getenv('SPELLCHECK'): extensions += 'sphinxcontrib.spelling', spelling_show_suggestions = True spelling_lang = 'en_US' # to exclude traditional Python prompts from your copied code copybutton_prompt_text = ">>> " # All lines of the code blocks will be copied after the prompts are stripped. copybutton_only_copy_prompt_lines = False nbsphinx_execute = 'never' autodoc_mock_imports = ['torch', 'tensorflow'] autosummary_generate = True source_suffix = { '.rst': 'restructuredtext', '.txt': 'markdown', '.md': 'markdown', } master_doc = 'index' project = 'Hangar' year = '2019-2020' author = 'Richard Izzo' copyright = '{0}, {1}'.format(year, author) version = release = '0.5.2' pygments_style = 'default' pygments_lexer = 'PythonConsoleLexer' highlight_options = { 'python3': True } templates_path = ['.'] exclude_patterns = ['_build', '**.ipynb_checkpoints'] extlinks = { 'issue': ('https://github.com/tensorwerk/hangar-py/issues/%s', '#'), 'pr': ('https://github.com/tensorwerk/hangar-py/pull/%s', 'PR #'), } intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), 'torch': ('https://pytorch.org/docs/master', None), 'numpy': ('http://docs.scipy.org/doc/numpy', None), } # Regular expressions that match URIs that should not be checked # when doing a linkcheck build linkcheck_ignore = [ r'http://localhost:\d+/?', 'http://localhost/', 'https://github.com/tensorwerk/hangar-py', r'https://github.com/tensorwerk/hangar-py/.*', r'http://tensorwerk.com/hangar-benchmarks/', r'https://tensorwerk.com/hangar-benchmarks', ] linkcheck_retries = 3 # on_rtd is whether we are on readthedocs.org on_rtd = os.environ.get('READTHEDOCS', None) == 'True' # if not on_rtd: # only set the theme if we're building docs locally # html_theme = 'sphinx_rtd_theme' html_theme = 'sphinx_material' html_sidebars = { "**": ["logo-text.html", "globaltoc.html", "localtoc.html", "searchbox.html"] } html_short_title = '%s-%s' % (project, version) napoleon_use_ivar = True napoleon_use_rtype = True napoleon_use_param = True napoleon_include_init_with_doc = True add_module_names = False doctest_test_doctest_blocks = None autoclass_content = 'class' # Material theme options (see theme.conf for more information) html_theme_options = { # Set the name of the project to appear in the navigation. 'nav_title': 'Hangar', # Set the color and the accent color 'color_primary': 'deep-purple', 'color_accent': 'blue', # Set the repo location to get a badge with stats 'repo_url': 'https://github.com/tensorwerk/hangar-py/', 'repo_name': 'Hangar', 'repo_type': 'github', # Visible levels of the global TOC; -1 means unlimited 'globaltoc_depth': -1, # If False, expand all TOC entries 'globaltoc_collapse': True, # If True, show hidden TOC entries 'globaltoc_includehidden': True, } ================================================ FILE: docs/contributing.rst ================================================ .. include:: ../CONTRIBUTING.rst ================================================ FILE: docs/contributingindex.rst ================================================ .. _ref-contributing: ###################### Contributing to Hangar ###################### .. toctree:: :maxdepth: 2 contributing codeofconduct benchmarking ================================================ FILE: docs/design.rst ================================================ .. _ref-hangar-under-the-hood: ===================== Hangar Under The Hood ===================== At its core, Hangar is a content addressable data store whose design requirements were inspired by the Git version control system. Things In Life Change, Your Data Shouldn't ========================================== When designing a high performance data version control system, achieving performance goals while ensuring consistency is incredibly difficult. Memory is fast, disk is slow; not much we can do about it. But since Hangar should deal with any numeric data in an array of any size (with an enforced limit of 31 dimensions in a sample...) we have to find ways to work *with* the disk, not against it. Upon coming to terms with this face, we are actually presented with a problem once we realize that we live in the real world, and real world is ugly. Computers crash, processes get killed, and people do * *interesting* * things. Because of this, It is a foundational design principle for us to **guarantee that once Hangar says data has been successfully added to the repository, it is actually persisted.** This essentially means that any process which interacts with data records on disk must be stateless. If (for example) we were to keep a record of all data added to the staging area in an in-memory list, and the process gets killed, we may have just lost references to all of the array data, and may not even be sure that the arrays were flushed to disk properly. These situations are a NO-GO from the start, and will always remain so. So, we come to the first design choice: **read and write actions are atomic**. Once data is added to a Hangar repository, the numeric array along with the necessary book-keeping records will *always* occur transactionally, ensuring that when something unexpected happens, the data and records are committed to disk. .. note:: The atomicity of interactions is completely hidden from a normal user; they shouldn't have to care about this or even know this exists. However, this is also why using the context-manager style column interaction scheme can result in ~2x times speedup on writes/reads. We can just pass on most of the work to the Python ``contextlib`` package instead of having to begin and commit/abort (depending on interaction mode) transactions with every call to an `add` or `get` method. Data Is Large, We Don't Waste Space =================================== From the very beginning we knew that while it would be easy to just store all data in every commit as independent arrays on disk, such a naive implementation would just absolutely eat up disk space for any repository with a non-trivial history. Hangar commits should be fast and use minimal disk space, duplicating data just doesn't make sense for such a system. And so we decided on implementing a content addressable data store backend. When a user requests to add data to a Hangar repository, one of the first operations which occur is to generate a hash of the array contents. If the hash does not match a piece of data already placed in the Hangar repository, the data is sent to the appropriate storage backend methods. On success, the backend sends back some arbitrary specification which can be used to retrieve that same piece of data from that particular backend. The record backend then stores a key/value pair of (`hash`, `backend_specification`). .. note:: The record backend stores hash information in a separate location from the commit references (which associate a `(columnname, sample name/id)` to a `sample_hash`). This let's us separate the historical repository information from a particular computer's location of a data piece. All we need in the public history is to know that some data with a particular hash is associated with a commit. No one but the system which actually needs to access the data needs to know where it can be found. On the other hand, if a data sample is added to a repository which already has a record of some hash, we don't even involve the storage backend. All we need to do is just record that a new sample in a column was added with that hash. It makes no sense to write the same data twice. This method can actually result in massive space savings for some common use cases. For the MNIST column, the training label data is typically a 1D-array of size 50,000. Because there are only 10 labels, we only need to store 10 ints on disk, and just keep references to the rest. The Basics of Collaboration: Branching and Merging ================================================== Up to this point, we haven't actually discussed much about how data and records are treated on disk. We'll leave an entire walkthrough of the backend record structure for another tutorial, but let's introduce the basics here, and see how we enable the types of branching and merging operations you might be used to with source code (at largely the same speed!). Here's a few core principles to keep in mind: Numbers == Numbers ------------------ Hangar has no concept of what a piece of data is outside of a string of bytes / numerical array, and most importantly, *hangar does not care*; Hangar is a tool, and we leave it up to you to know what your data actually means)! At the end of the day when the data is placed into *some* collection on disk, the storage backend we use won't care either. In fact, this is the entire reason why Hangar can do what it can; we don't attempt to treat data as anything other then a series of bytes on disk! The fact that *Hangar does not care about what your data represents* is a fundamental underpinning of how the system works under the hood. It is the *designed and intended behavior* of Hangar to dump arrays to disk in what would seem like completely arbitrary buffers/locations to an outside observer. And for the most part, they would be essentially correct in their observation that data samples on disk are in strange locations. While there is almost no organization or hierarchy for the actual data samples when they are stored on disk, that is not to say that they are stored without care! We may not care about global trends, but we do care a great deal about the byte order/layout, sequentiality, chunking/compression and validations operations which are applied across the bytes which make up a data sample. In other words, we optimize for utility and performance on the backend, not so that a human can understand the file format without a computer! After the array has been saved to disk, all we care about is that bookkeeper can record some unique information about where some piece of content is, and how we can read it. **None of that information is stored alongside the data itself - Remember: numbers are just numbers - they don't have any concept of what they are**. Records != Numbers ------------------ *The form numerical data takes once dumped on disk is completely irrelevant to the specifications of records in the repository history.* Now, let's unpack this for a bit. We know from `Numbers == Numbers`_ that data is saved to disk in some arbitrary locations with some arbitrary backend. We also know from `Data Is Large, We Don't Waste Space`_ that the permanent repository information only contains a record which links a sample name to a hash. We also assert that there is also a mapping of hash to storage backend specification kept somewhere (doesn't matter what that mapping is for the moment). With those 3 pieces of information, it's obvious that once data is placed in the repository, we don't actually need to interact with it to understand the accounting of what was added when! In order to make a commit, we just pack up all the records which existed in the staging area, create a hash of the records (including the hash of any parent commits), and then store the commit hash mapping alongside details such as the commit user/email and commit message, and a compressed version of the full commit records as they existed at that point in time. .. note:: That last point "storing a compressed version of the full commit records", is semi inefficient, and will be changed in the future so that unchanged records are note duplicated across commits. An example is given below of the keys -> values mapping which stores each of the staged records, and which are packed up / compressed on commit (and subsequently unpacked on checkout!). :: Num asets 'a.' -> '2' --------------------------------------------------------------------------- Name of aset -> num samples || 'a.train_images' -> '10' Name of data -> hash || 'a.train_images.0' -> BAR_HASH_1' Name of data -> hash || 'a.train_images.1' -> BAR_HASH_2' Name of data -> hash || 'a.train_images.2' -> BAR_HASH_3' Name of data -> hash || 'a.train_images.3' -> BAR_HASH_4' Name of data -> hash || 'a.train_images.4' -> BAR_HASH_5' Name of data -> hash || 'a.train_images.5' -> BAR_HASH_6' Name of data -> hash || 'a.train_images.6' -> BAR_HASH_7' Name of data -> hash || 'a.train_images.7' -> BAR_HASH_8' Name of data -> hash || 'a.train_images.8' -> BAR_HASH_9' Name of data -> hash || 'a.train_images.9' -> BAR_HASH_0' --------------------------------------------------------------------------- Name of aset -> num samples || 'a.train_labels' -> '10' Name of data -> hash || 'a.train_labels.0' -> BAR_HASH_11' Name of data -> hash || 'a.train_labels.1' -> BAR_HASH_12' Name of data -> hash || 'a.train_labels.2' -> BAR_HASH_13' Name of data -> hash || 'a.train_labels.3' -> BAR_HASH_14' Name of data -> hash || 'a.train_labels.4' -> BAR_HASH_15' Name of data -> hash || 'a.train_labels.5' -> BAR_HASH_16' Name of data -> hash || 'a.train_labels.6' -> BAR_HASH_17' Name of data -> hash || 'a.train_labels.7' -> BAR_HASH_18' Name of data -> hash || 'a.train_labels.8' -> BAR_HASH_19' Name of data -> hash || 'a.train_labels.9' -> BAR_HASH_10' --------------------------------------------------------------------------- 's.train_images' -> '{"schema_hash": "RM4DefFsjRs=", "schema_dtype": 2, "schema_is_var": false, "schema_max_shape": [784], "schema_is_named": true}' 's.train_labels' -> '{"schema_hash": "ncbHqE6Xldg=", "schema_dtype": 7, "schema_is_var": false, "schema_max_shape": [1], "schema_is_named": true}' History is Relative ------------------- Though it may be a bit obvious to state, it is of critical importance to realize that it is only because we store the full contents of the repository staging area as it existed in the instant just prior to a commit, that the integrity of full repository history can be verified from a single commit's contents and expected hash value. More so, any single commit has only a topical relationship to a commit at any other point in time. It is only our imposition of a commit's ancestry tree which actualizes any subsequent insights or interactivity While the general process of topological ordering: create branch, checkout branch, commit a few times, and merge, follows the `git` model fairly well at a conceptual level, there are some important differences we want to highlight due to their implementation differences: 1) Multiple commits can simultaneously checked out in "read-only" mode on a single machine. Checking out a commit for reading does not touch the staging area status. 2) Only one process can interact with the a write-enabled checkout at a time. 3) A detached head CANNOT exist for write enabled checkouts. A staging area must begin with an identical state to the most recent commit of a/any branch. 4) A staging area which has had changes made in it cannot switch base branch without either a commit, hard-reset, or (soon to be developed) stash operation. When a repository is initialized, a record is created which indicates the staging area's `HEAD` branch. in addition, a branch is created with the name `master`, and which is the only commit in the entire repository which will have no parent. The record key/value pairs resemble the following: :: 'branch.master' -> '' # No parent commit. 'head' -> 'branch.master' # Staging area head branch # Commit Hash | Parent Commit ------------------------------------- .. warning:: Much like git, odd things can happen before the `'initial commit'` is made. We recommend creating the initial commit as quickly as possible to prevent undefined behavior during repository setup. In the future, we may decide to create the "initial commit" automatically upon repository initialization. Once the initial commit is made, a permanent commit record in made which specifies the records (not shown below) and the parent commit. The branch head pointer is then updated to point to that commit as it's base. :: 'branch.master' -> '479b4cfff6219e3d' 'head' -> 'branch.master' # Commit Hash | Parent Commit ------------------------------------- '479b4cfff6219e3d' -> '' Branches can be created as cheaply as a single line of text can be written, and they simply require a "root" commit hash (or a branch name, in which case the branch's current HEAD commit will be used as the root HEAD). Likewise a branch can be merged with just a single write operation (once the merge logic has completed - a process which is explained separately from this section; just trust that it happens for now). A more complex example which creates 4 different branches and merges them in a complicated order can be seen below. Please note that the `` << `` symbol is used to indicate a merge commit where `X << Y` reads: ``'merging dev branch Y into master branch X'``. :: 'branch.large_branch' -> '8eabd22a51c5818c' 'branch.master' -> '2cd30b98d34f28f0' 'branch.test_branch' -> '1241a36e89201f88' 'branch.trydelete' -> '51bec9f355627596' 'head' -> 'branch.master' # Commit Hash | Parent Commit ------------------------------------- '1241a36e89201f88' -> '8a6004f205fd7169' '2cd30b98d34f28f0' -> '9ec29571d67fa95f << 51bec9f355627596' '51bec9f355627596' -> 'd683cbeded0c8a89' '69a09d87ea946f43' -> 'd683cbeded0c8a89' '8a6004f205fd7169' -> 'a320ae935fc3b91b' '8eabd22a51c5818c' -> 'c1d596ed78f95f8f' '9ec29571d67fa95f' -> '69a09d87ea946f43 << 8eabd22a51c5818c' 'a320ae935fc3b91b' -> 'e3e79dd897c3b120' 'c1d596ed78f95f8f' -> '' 'd683cbeded0c8a89' -> 'fe0bcc6a427d5950 << 1241a36e89201f88' 'e3e79dd897c3b120' -> 'c1d596ed78f95f8f' 'fe0bcc6a427d5950' -> 'e3e79dd897c3b120' Because the raw commit hash logs can be quite dense to parse, a graphical logging utility is included as part of the repository. Running the ``Repository.log()`` method will pretty print a graph representation of the commit history: .. code:: python >>> from hangar import Repository >>> repo = Repository(path='/foo/bar/path/') ... # make some commits >>> repo.log() .. image:: ./img/repo_graph_log.png ================================================ FILE: docs/externals.rst ================================================ .. _ref-external: =============== Hangar External =============== High level interaction interface between hangar and everything external. High Level Methods ================== .. automodule:: hangar.external._external :members: Plugin System ============= .. automodule:: hangar.external.base_plugin :members: ================================================ FILE: docs/faq.rst ================================================ .. _ref-faq: ========================== Frequently Asked Questions ========================== The following documentation are taken from questions and comments on the `Hangar User Group Slack Channel `_ and over various Github issues. How can I get an Invite to the Hangar User Group? ================================================== Just click on `This Signup Link `_ to get started. Data Integrity ============== Being a young project did you encounter some situations where the disaster was not a compilation error but dataset corruption? This is the most fearing aspect of using young projects but every project will start from a phase before becoming mature and production ready. An absolute requirement of a system right this is to protect user data at all costs (I’ll refer to this as preserving data "integrity" from here). During our initial design of the system, we made the decision that preserving integrity comes above all other system parameters: including performance, disk size, complexity of the Hangar core, and even features should we not be able to make them absolutely safe for the user. And to be honest, the very first versions of Hangar were quite slow and difficult to use as a result of this. The initial versions of Hangar (which we put together in ~2 weeks) had essentially most of the features we have today. We’ve improved the API, made things clearer, and added some visualization/reporting utilities, but not much has changed. Essentially the entire development effort has been addressing issues stemming from a fundamental need to protect user data at all costs. That work has been very successful, and performance is extremely promising (and improving all the time). To get into the details here: There have been only 3 instances in the entire time I’ve developed Hangar where we lost data irrecoverably: 1. We used to move data around between folders with some regularity (as a convenient way to mark some files as containing data which have been “committed”, and can no longer be opened in anything but read-only mode). There was a bug (which never made it past a local dev version) at one point where I accidentally called ``shutil.rmtree(path)`` with a directory one level too high… that wasn’t great. Just to be clear, we don’t do this anymore (since disk IO costs are way too high), but remnants of it’s intention are still very much alive and well. Once data has been added to the repository, and is “committed”, the file containing that data will never be opened in anything but read-only mode again. This reduces the chance of disk corruption massively from the start. ---- 2. When I was implementing the numpy memmap array storage backend, I was totally surprised during an early test when I: .. code:: text - opened a write-enabled checkout - added some data - without committing, retrieved the same data again via the user facing API - overwrote some slice of the return array with new data and did some processing - asked Hangar for that same array key again, and instead of returning the contents got a fatal RuntimeError raised by Hangar with the code/message indicating "'DATA CORRUPTION ERROR: Checksum {cksum} != recorded for {hashVal}" What had happened was that when opening a ``numpy.memmap`` array on disk in ``w+`` mode, the default behavior when returning a subarray is to return a subclass of ``np.ndarray`` of type ``np.memmap``. Though the numpy docs state: "The memmap object can be used anywhere an ndarray is accepted. Given a ``memmap fp``, ``isinstance(fp, numpy.ndarray)`` returns ``True``". I did not anticipate that updates to the subarray slice would also update the memmap on disk. A simple mistake to make; this has since been remedied by manually instantiating a new ``np.ndarray`` instance from the ``np.memmap`` subarray slice buffer. However, the nice part is that this was a real world proof that our system design worked (and not just in tests). When you add data to a Hangar checkout (or receive it on a fetch/clone operation) we calculate a hash digest of the data via ``blake2b`` (a cryptographically secure algorithm in the python standard library). While this allows us to cryptographically verify full integrity checks and history immutability, cryptographic hashes are slow by design. When we want to read local data (which we’ve already ensured was correct when it was placed on disk) it would be prohibitively slow to do a full cryptographic verification on every read. However, since its NOT acceptable to provide no integrity verification (even for local writes) we compromise with a much faster (though non cryptographic) hash digest/checksum. This operation occurs on EVERY read of data from disk. The theory here is that even though Hangar makes every effort to guarantee safe operations itself, in the real world we have to deal with systems which break. We’ve planned for cases where some OS induced disk corruption occurs, or where some malicious actor modifies the file contents manually; we can’t stop that from happening, but Hangar can make sure that you will know about it when it happens! ---- 3. Before we got smart with the HDF5 backend low level details, it was an issue for us to have a write-enabled checkout attempt to write an array to disk and immediately read it back in. I’ll gloss over the details for the sake of simplicity here, but basically I was presented with an CRC32 Checksum Verification Failed error in some edge cases. The interesting bit was that if I closed the checkout, and reopened it, it data was secure and intact on disk, but for immediate reads after writes, we weren’t propagating changes to the HDF5 chunk metadata cache to ``rw`` operations appropriately. This was fixed very early on by taking advantage of a new feature in HDF5 1.10.4 referred to as Single Writer Multiple Reader (SWMR). The long and short is that by being careful to handle the order in which a new HDF5 file is created on disk and opened in w and r mode with SWMR enabled, the HDF5 core guarantees the integrity of the metadata chunk cache at all times. Even if a fatal system crash occurs in the middle of a write, the data will be preserved. This solved this issue completely for us There are many many many more details which I could cover here, but the long and short of it is that in order to ensure data integrity, Hangar is designed to not let the user do anything they aren’t allowed to at any time - Read checkouts have no ability to modify contents on disk via any method. It’s not possible for them to actually delete or overwrite anything in any way. - Write checkouts can only ever write data. The only way to remove the actual contents of written data from disk is if changes have been made in the staging area (but not committed) and the ``reset_staging_area()`` method is called. And even this has no ability to remove any data which had previously existed in some commit in the repo’s history In addition, a Hangar checkout object is not what it appears to be (at first glance, use, or even during common introspection operations). If you try to operate on it after closing the checkout, or holding it while another checkout is started, you won’t be able to (there’s a whole lot of invisible “magic” going on with ``weakrefs``, ``objectproxies``, and instance attributes). I would encourage you to do the following: .. code:: pycon >>> co = repo.checkout(write=True) >>> co.metadata['hello'] = 'world' >>> # try to hold a reference to the metadata object: >>> mRef = co.metadata >>> mRef['hello'] 'world' >>> co.commit('first commit') >>> co.close() >>> # what happens when you try to access the `co` or `mRef` object? >>> mRef['hello'] ReferenceError: weakly-referenced object no longer exists >>> print(co) # or any other operation PermissionError: Unable to operate on past checkout objects which have been closed. No operation occurred. Please use a new checkout. The last bit I’ll leave you with is a note on context managers and performance (how we handle record data safety and effectively .. seealso:: - :ref:`ref-tutorial` (Part 1, In section: "performance") - :ref:`ref-hangar-under-the-hood` How Can a Hangar Repository be Backed Up? ========================================= Two strategies exist: 1. Use a remote server and Hangar’s built in ability to just push data to a remote! (tutorial coming soon, see :ref:`ref-api` for more details. 2. A Hangar repository is self contained in it’s .hangar directory. To back up the data, just copy/paste or rsync it to another machine! (edited) On Determining ``Column`` Schema Sizes ======================================= Say I have a data group that specifies a data array with one dimension, three elements (say height, width, num channels) and later on I want to add bit depth. Can I do that, or do I need to make a new data group? Should it have been three scalar data groups from the start? So right now it’s not possible to change the schema (shape, dtype) of a column. I’ve thought about such a feature for a while now, and while it will require a new user facing API option, its (almost) trivial to make it work in the core. It just hasn’t seemed like a priority yet... And no, I wouldn’t specify each of those as scalar data groups, they are a related piece of information, and generally would want to be accessed together Access patterns should generally dictate how much info is placed in a column Is there a performance/space penalty for having lots of small data groups? -------------------------------------------------------------------------- As far as a performance / space penalty, this is where it gets good :) - Using fewer columns means that there are fewer records (the internal locating info, kind-of like a git tree) to store, since each record points to a sample containing more information. - Using more columns means that the likelihood of samples having the same value increases, meaning fewer pieces of data are actually stored on disk (remember it’s a content addressable file store) However, since the size of a record (40 bytes or so before compression, and we generally see compression ratios around 15-30% of the original size once the records are committed) is generally negligible compared to the size of data on disk, optimizing for number of records is just way overkill. For this case, it really doesn’t matter. **Optimize for ease of use** ================================================ FILE: docs/index.rst ================================================ .. include:: ../README.rst .. toctree:: :maxdepth: 3 readme quickstart installation concepts api tutorial design cli externals faq backends contributingindex authors changelog Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/installation.rst ================================================ .. _ref_installation: ============ Installation ============ For general usage it is recommended that you use a pre-built version of Hangar, either from a Python Distribution, or a pre-built wheel from PyPi. Pre-Built Installation ====================== Python Distributions -------------------- If you do not already use a Python Distribution, we recommend the `Anaconda `_ (or `Miniconda `_) distribution, which supports all major operating systems (Windows, MacOSX, & the typical Linux variations). Detailed usage instructions are available `on the anaconda website `_. To install Hangar via the Anaconda Distribution (from the `conda-forge conda channel `_):: conda install -c conda-forge hangar Wheels (PyPi) ------------- If you have an existing python installation on your computer, pre-built Hangar Wheels can be installed via pip from the Python Package Index (PyPi):: pip install hangar Source Installation =================== To install Hangar from source, clone the repository from `Github `_:: git clone https://github.com/tensorwerk/hangar-py.git cd hangar-py python setup.py install Or use pip on the local package if you want to install all dependencies automatically in a development environment:: pip install -e . Source installation in Google colab ----------------------------------- Google colab comes with an older version of ``h5py`` pre-installed which is not compatible with hangar. If you need to install hangar from the source in google colab, make sure to uninstall the existing ``h5py`` :: !pip uninstall h5py Then follow the Source Installation steps given above. ================================================ FILE: docs/noindexapi/apiinit.rst ================================================ .. automethod:: hangar.checkout.WriterCheckout.add_ndarray_column :noindex: .. automethod:: hangar.checkout.WriterCheckout.add_str_column :noindex: .. automethod:: hangar.checkout.WriterCheckout.add_bytes_column :noindex: ================================================ FILE: docs/noindexapi/apiremotefetchdata.rst ================================================ .. automethod:: hangar.repository.Remotes.fetch_data :noindex: ================================================ FILE: docs/quickstart.rst ================================================ ===== Usage ===== To use Hangar in a project:: from hangar import Repository Please refer to the :ref:`ref-tutorial` for examples, or :ref:`ref-concepts` to review the core concepts of the Hangar system. ================================================ FILE: docs/readme.rst ================================================ .. include:: ../README.rst ================================================ FILE: docs/requirements.txt ================================================ sphinx>=2.0 sphinx-material sphinx-click nbsphinx sphinx-copybutton recommonmark IPython Cython ================================================ FILE: docs/requirements_rtd.txt ================================================ https://files.pythonhosted.org/packages/84/ad/ee890cbea43dd97cbb05aa30b9b08ff908efa8407f514e9d447dd365ef15/tensorflow_cpu-2.1.0-cp37-cp37m-manylinux2010_x86_64.whl https://download.pytorch.org/whl/cpu/torch-1.3.1%2Bcpu-cp37-cp37m-linux_x86_64.whl Cython ================================================ FILE: docs/spelling_wordlist.txt ================================================ builtin builtins classmethod staticmethod classmethods staticmethods args kwargs callstack Changelog Indices ================================================ FILE: docs/tutorial.rst ================================================ .. _ref-tutorial: ############### Hangar Tutorial ############### .. toctree:: :maxdepth: 2 :titlesonly: Tutorial-QuickStart Tutorial-001 Tutorial-002 Tutorial-003 Tutorial-Dataloader Tutorial-RealQuickStart ================================================ FILE: hangar.yml ================================================ # Metadata file for Zenoodo source code upload # This is part of the Escape 2020 project and was originally # requested by Filippo Quarenghi (Orobix). # # Metadata version - do not change metadata-version: 0.2 # Mandatory entries title: hangar authors: - Rick Izzo - Luca Antiga contact: - name: Rick Izzo - email: rick@tensorwerk.com - name: Luca Antiga - email: luca.antiga@orobix.com license: Apache License 2.0 url: https://github.com/tensorwerk/hangar-py description: Hangar is version control for tensor data. Commit, branch, merge, revert, and collaborate in the data-defined software era. #Optional entries doi: null keywords: Data versioning type: source grant: Tensorwerk Inc. language: python hardware: - machine: [local, server, hpc] - CPU: null - RAM: 2GB - drive: - type: [SSD, HDD] - volume: 500MB - GPU: null dependencies: - python>=3.6 - HDF5 - cython>=0.27 - setuptools>=40.0 - wheel>=0.30 - blosc>=1.8 - click - protobuf - h5py>=2.9 - hdf5plugin>=2.0 - lmdb>=0.94 - tqdm - wrapt - xxhash - numpy - grpcio os: - 'win-64' - 'linux' - 'osx-64' compiler: - gcc>=4.7 - manylinux2014 multi-thread: true container: - null ================================================ FILE: mypy.ini ================================================ # ------------------------- Global Options ------------------------------------ [mypy] warn_unused_configs = True # ------------------------- Per Module Configuration -------------------------- [mypy-lmdb] ignore_missing_imports = True [mypy-numpy] ignore_missing_imports = True [mypy-grpc] ignore_missing_imports = True ================================================ FILE: scripts/run_proto_codegen.py ================================================ import os from shutil import move from grpc_tools import protoc # ------------------------- output locations ---------------------------------- toolsPath = os.path.dirname(__file__) srcPath = os.path.normpath(os.path.join(toolsPath, os.path.pardir, 'src')) hangarProtoDir = os.path.join(srcPath, 'hangar', 'remote') hangarProtoPath = os.path.join(hangarProtoDir, 'hangar_service.proto') if not os.path.isfile(hangarProtoPath): raise FileNotFoundError(f'Cannot access hangar_service.proto at: {hangarProtoPath}') # ------------------------ hangar service ------------------------------------- os.environ.putenv('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION', 'cpp') # generates hangar service protobuf for python protoc.main(( '', f'-I{hangarProtoDir}', f'--python_out={hangarProtoDir}', f'--grpc_python_out={hangarProtoDir}', f'--mypy_out={hangarProtoDir}', hangarProtoPath, )) """ Because python3 requires explicit relative imports (which is not yet supported in the Google protoc compiler), we have to replace the 'import foo_grpc' line with the 'from . import foo' line in the generated grpc code. """ hangar_service_grpc_path_orig = os.path.join(hangarProtoDir, 'hangar_service_pb2_grpc.py') hangar_service_grpc_path_old = os.path.join(hangarProtoDir, 'hangar_service_pb2_grpc.py.old') move(hangar_service_grpc_path_orig, hangar_service_grpc_path_old) with open(hangar_service_grpc_path_orig, 'w') as new_file: with open(hangar_service_grpc_path_old, 'r+') as old_file: for old_line in old_file: if old_line == 'import hangar_service_pb2 as hangar__service__pb2\n': newline = old_line.replace('import', 'from . import') else: newline = old_line new_file.writelines(newline) os.remove(hangar_service_grpc_path_old) ================================================ FILE: setup.cfg ================================================ [bdist_wheel] universal = 0 [flake8] max-line-length = 150 exclude = */migrations/* [tool:pytest] norecursedirs = .git .tox .env dist build migrations python_files = test_*.py *_test.py tests.py addopts = -ra --strict --ignore=docs/conf.py --ignore=setup.py --ignore=.eggs --tb=auto [isort] force_single_line = True line_length = 120 known_first_party = hangar default_section = THIRDPARTY forced_separate = test_hangar not_skip = __init__.py skip = migrations ================================================ FILE: setup.py ================================================ #!/usr/bin/env python # -*- encoding: utf-8 -*- import os import platform import sys from os.path import join from distutils.sysconfig import get_config_var from distutils.version import LooseVersion from setuptools import setup, Extension, find_packages # Use `setup.py [] --debug` for a debug build of hangar HANGAR_DEBUG_BUILD = False # Set deployment target for mac # # Need to ensure that extensions are built for macos 10.9 when compiling on a # 10.9 system or above, overriding distutils behavior which is to target # the version used to build the current python binary. # # TO OVERRIDE: # set MACOSX_DEPLOYMENT_TARGET before calling setup.py # # From https://github.com/pandas-dev/pandas/pull/24274 # 3-Clause BSD License: https://github.com/pandas-dev/pandas/blob/master/LICENSE if sys.platform == 'darwin': if 'MACOSX_DEPLOYMENT_TARGET' not in os.environ: current_system = LooseVersion(platform.mac_ver()[0]) python_target = LooseVersion(get_config_var('MACOSX_DEPLOYMENT_TARGET')) if python_target < '10.9' and current_system >= '10.9': os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.9' class LazyCommandClass(dict): """ Lazy command class that defers operations requiring Cython and numpy until they've actually been downloaded and installed by setup_requires. """ def __contains__(self, key): return key in ['build_ext', 'bdist_wheel', 'sdist'] or super().__contains__(key) def __setitem__(self, key, value): if key == 'build_ext': raise AssertionError("build_ext overridden!") super().__setitem__(key, value) def __getitem__(self, key): if key == 'build_ext': return self.make_build_ext_cmd() elif key == 'bdist_wheel': return self.make_bdist_wheel_cmd() elif key == 'sdist': return self.make_sdist_cmd() else: return super().__getitem__(key) def make_build_ext_cmd(self): """Returns a command class implementing 'build_ext'. """ from Cython.Distutils.build_ext import new_build_ext as cython_build_ext from Cython.Compiler.Main import default_options default_options['language_level'] = 3 default_options['compiler_directives']['embedsignature'] = True default_options['compiler_directives']['emit_code_comments'] = True if HANGAR_DEBUG_BUILD is True: default_options['annotate'] = True default_options['emit_linenums'] = True default_options['gdb_debug'] = True class build_ext(cython_build_ext): def build_extensions(self): cython_build_ext.build_extensions(self) return build_ext def make_bdist_wheel_cmd(self): """Returns a command class implementing 'bdist_wheel'. """ from wheel.bdist_wheel import bdist_wheel class bdist_wheel_cmd(bdist_wheel): def run(self): # This may modify package_data: bdist_wheel.run(self) return bdist_wheel_cmd def make_sdist_cmd(self): """A command class implementing 'sdist'. """ from distutils.command.sdist import sdist as _sdist class sdist(_sdist): def run(self): # Make sure the compiled Cython files in the distribution are up-to-date # so we generate .c files correctly (.so will be removed) _sdist.run(self) return sdist # Pass command line flags to setup.py script # handle --lflags=[FLAGS] --cflags=[FLAGS] args = sys.argv[:] for arg in args: if arg.find('--debug') == 0: HANGAR_DEBUG_BUILD = True sys.argv.remove(arg) # Source files for build CYTHON_SOURCES = [ join('src', 'hangar', 'optimized_utils.pyx'), join('src', 'hangar', 'backends', 'specs.pyx'), join('src', 'hangar', 'backends', 'specparse.pyx'), join('src', 'hangar', 'records', 'recordstructs.pyx'), join('src', 'hangar', 'records', 'column_parsers.pyx'), join('src', 'hangar', 'records', 'hashmachine.pyx'), ] CYTHON_HEADERS = [ join('src', 'hangar', 'external_cpython.pxd'), join('src', 'hangar', 'optimized_utils.pxd'), join('src', 'hangar', 'backends', 'specs.pxd'), join('src', 'hangar', 'records', 'recordstructs.pxd'), ] __extensions = [] for source in CYTHON_SOURCES: module_name = os.path.splitext(source)[0] if module_name + '.pxd' in CYTHON_HEADERS: deps = module_name + '.pxd' else: deps = None if module_name.startswith(f'src{os.sep}'): module_name = module_name.lstrip(f'src{os.sep}') module_name = module_name.replace(os.sep, '.') ext = Extension(module_name, include_dirs=[], define_macros=[], sources=[source], depends=[deps] if deps else [], library_dirs=[], libraries=[], extra_link_args=[], extra_compile_args=[], language="c") __extensions.append(ext) with open('README.rst') as f: README_RST = f.read() SHORT_DESCRIPTION = ( 'Hangar is version control for tensor data. Commit, branch, merge, ' 'revert, and collaborate in the data-defined software era.' ) SETUP_REQUIRES = [ 'cython>=0.27', 'setuptools>=40.0', 'wheel>=0.30', ] INSTALL_REQUIRES = [ 'blosc>=1.8', 'cloudpickle>=1.4', 'click', 'grpcio', 'protobuf', 'h5py>=2.9', 'hdf5plugin>=2.0', 'lmdb>=0.94', 'numpy', 'tqdm', 'wrapt', 'xxhash', ] setup( name='hangar', version='0.5.2', license='Apache 2.0', # Package Meta Info (for PyPi) description=SHORT_DESCRIPTION, long_description=README_RST, long_description_content_type='text/x-rst', author='Richard Izzo', author_email='rick@tensorwerk.com', maintainer='Richard Izzo', maintainer_email='rick@tensorwerk.com', url='https://github.com/tensorwerk/hangar-py', project_urls={ 'Documentation': 'https://hangar-py.readthedocs.io/', 'Changelog': 'https://hangar-py.readthedocs.io/en/latest/changelog.html', 'Issue Tracker': 'https://github.com/tensorwerk/hangar-py/issues', }, platforms=['any'], # Module Source Files ext_modules=__extensions, packages=find_packages('src'), package_dir={'': 'src'}, package_data={'': ['*.ini', '*.proto']}, include_package_data=True, zip_safe=False, entry_points={ 'console_scripts': ['hangar = hangar.cli:main'] }, # Requirements python_requires='>= 3.6.0', install_requires=INSTALL_REQUIRES, setup_requires=SETUP_REQUIRES, # hooks into `sdist`, `bdist_wheel`, `bdist_ext` commands. cmdclass=LazyCommandClass(), # PyPi classifiers # http://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved', 'Operating System :: MacOS', 'Operating System :: Microsoft :: Windows', 'Operating System :: POSIX :: Linux', 'Operating System :: Unix', 'Programming Language :: Cython', 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Topic :: Database', 'Topic :: Scientific/Engineering', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Version Control', 'Topic :: Utilities', ], ) ================================================ FILE: src/hangar/__init__.py ================================================ __version__ = '0.5.2' __all__ = ('Repository',) from .repository import Repository ================================================ FILE: src/hangar/__main__.py ================================================ """ Entrypoint module, in case you use `python -m hangar`. Why does this file exist, and why __main__? For more info, read: - https://www.python.org/dev/peps/pep-0338/ - https://docs.python.org/2/using/cmdline.html#cmdoption-m - https://docs.python.org/3/using/cmdline.html#cmdoption-m """ from hangar.cli import main if __name__ == "__main__": main() ================================================ FILE: src/hangar/_version.py ================================================ # -*- coding: utf-8 -*- """ Portions of this code have been taken and modified from the "packaging" project. URL: https://github.com/pypa/packaging Files: packaging/_structures.py packaging/version.py Commit: 6a09d4015b54f80762ff3ef1597a8b6740563c19 Accessed: 11 DEC 2019 packaging License ------------------------------------------------------------------------------- License: Dual licensed under the terms of the Apache License, Version 2.0, and the BSD License. URL: https://github.com/pypa/packaging/blob/6a09d4015b/LICENSE https://github.com/pypa/packaging/blob/6a09d4015b/LICENSE.APACHE https://github.com/pypa/packaging/blob/6a09d4015b/LICENSE.BSD """ import re import typing from collections import namedtuple from itertools import dropwhile from typing import Callable, Optional, SupportsInt, Tuple, Union from operator import lt, le, eq, ge, gt, ne __all__ = ["parse", "Version", "InvalidVersion", "VERSION_PATTERN"] _Version = namedtuple( "_Version", ["epoch", "release", "dev", "pre", "post", "local"] ) class InfinityType(object): __slots__ = () def __repr__(self) -> str: return "Infinity" def __hash__(self) -> int: return hash(repr(self)) def __lt__(self, other: object) -> bool: return False def __le__(self, other: object) -> bool: return False def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) def __ne__(self, other: object) -> bool: return not isinstance(other, self.__class__) def __gt__(self, other: object) -> bool: return True def __ge__(self, other: object) -> bool: return True def __neg__(self) -> 'NegativeInfinityType': return NegativeInfinity Infinity = InfinityType() class NegativeInfinityType(object): __slots__ = () def __repr__(self) -> str: return "-Infinity" def __hash__(self) -> int: return hash(repr(self)) def __lt__(self, other: object) -> bool: return True def __le__(self, other: object) -> bool: return True def __eq__(self, other: object) -> bool: return isinstance(other, self.__class__) def __ne__(self, other: object) -> bool: return not isinstance(other, self.__class__) def __gt__(self, other: object) -> bool: return False def __ge__(self, other: object) -> bool: return False def __neg__(self) -> InfinityType: return Infinity NegativeInfinity = NegativeInfinityType() # -------------------- Type Definitions --------------------------------------- if typing.TYPE_CHECKING: InfiniteTypes = Union[InfinityType, NegativeInfinityType] PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] SubLocalType = Union[InfiniteTypes, int, str] LocalType = Union[ NegativeInfinityType, Tuple[ Union[ SubLocalType, Tuple[SubLocalType, str], Tuple[NegativeInfinityType, SubLocalType], ], ..., ], ] CmpKey = Tuple[ int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType ] VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] # ---------------------------- Version Parsing -------------------------------- def parse(version: str) -> Union['Version']: """ Parse the given version string and return a :class:`Version` object if the given version is a valid PEP 440 version, else raises InvalidVersionError """ return Version(version) class InvalidVersion(ValueError): """ An invalid version was found, users should refer to PEP 440. """ __slots__ = () class _BaseVersion(object): __slots__ = ('_key',) def __init__(self): self._key: 'CmpKey' = None def __hash__(self) -> int: return hash(self._key) def __lt__(self, other: '_BaseVersion') -> bool: return self._compare(other, lt) def __le__(self, other: '_BaseVersion') -> bool: return self._compare(other, le) def __eq__(self, other: object) -> bool: return self._compare(other, eq) def __ge__(self, other: '_BaseVersion') -> bool: return self._compare(other, ge) def __gt__(self, other: '_BaseVersion') -> bool: return self._compare(other, gt) def __ne__(self, other: object) -> bool: return self._compare(other, ne) def _compare(self, other: object, method: 'VersionComparisonMethod' ) -> Union[bool, type(NotImplemented)]: if isinstance(other, _BaseVersion): return method(self._key, other._key) return NotImplemented # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch (?P[0-9]+(?:\.[0-9]+)*) # release segment (?P
                                          # pre-release
            [-_\.]?
            (?P(a|b|c|rc|alpha|beta|pre|preview))
            [-_\.]?
            (?P[0-9]+)?
        )?
        (?P                                         # post release
            (?:-(?P[0-9]+))
            |
            (?:
                [-_\.]?
                (?Ppost|rev|r)
                [-_\.]?
                (?P[0-9]+)?
            )
        )?
        (?P                                          # dev release
            [-_\.]?
            (?Pdev)
            [-_\.]?
            (?P[0-9]+)?
        )?
    )
    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
"""

_REGEX = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)


class Version(_BaseVersion):  # lgtm [py/missing-equals]

    __slots__ = ('_version',)

    def __init__(self, version: str) -> None:
        super().__init__()

        # Validate the version and parse it into pieces
        match = _REGEX.search(version)
        if not match:
            raise InvalidVersion(f"Invalid version: '{version}'")

        # Store the parsed out pieces of the version
        self._version = _Version(
            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
            release=tuple(int(i) for i in match.group("release").split(".")),
            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
            post=_parse_letter_version(
                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
            ),
            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
            local=_parse_local_version(match.group("local")),
        )

        # Generate a key which will be used for sorting
        self._key = _cmpkey(
            self._version.epoch,
            self._version.release,
            self._version.pre,
            self._version.post,
            self._version.dev,
            self._version.local,
        )

    def __repr__(self) -> str:
        return f""

    def __str__(self) -> str:
        parts = []

        # Epoch
        if self.epoch != 0:
            parts.append(f"{self.epoch}!")

        # Release segment
        parts.append(".".join(str(x) for x in self.release))

        # Pre-release
        if self.pre is not None:
            parts.append("".join(str(x) for x in self.pre))

        # Post-release
        if self.post is not None:
            parts.append(f".post{self.post}")

        # Development release
        if self.dev is not None:
            parts.append(f".dev{self.dev}")

        # Local version segment
        if self.local is not None:
            parts.append(f"+{self.local}")

        return "".join(parts)

    @property
    def epoch(self) -> int:
        _epoch: int = self._version.epoch
        return _epoch

    @property
    def release(self) -> Tuple[int, ...]:
        _release: Tuple[int, ...] = self._version.release
        return _release

    @property
    def pre(self) -> Optional[Tuple[str, int]]:
        _pre: Optional[Tuple[str, int]] = self._version.pre
        return _pre

    @property
    def post(self) -> Optional[Tuple[str, int]]:
        return self._version.post[1] if self._version.post else None

    @property
    def dev(self) -> Optional[Tuple[str, int]]:
        return self._version.dev[1] if self._version.dev else None

    @property
    def local(self) -> Optional[str]:
        if self._version.local:
            return ".".join(str(x) for x in self._version.local)
        else:
            return None

    @property
    def public(self) -> str:
        return str(self).split("+", 1)[0]

    @property
    def base_version(self) -> str:
        parts = []

        # Epoch
        if self.epoch != 0:
            parts.append(f"{self.epoch}!")

        # Release segment
        parts.append(".".join(str(x) for x in self.release))

        return "".join(parts)

    @property
    def is_prerelease(self) -> bool:
        return self.dev is not None or self.pre is not None

    @property
    def is_postrelease(self) -> bool:
        return self.post is not None

    @property
    def is_devrelease(self) -> bool:
        return self.dev is not None

    @property
    def major(self) -> int:
        return self.release[0] if len(self.release) >= 1 else 0

    @property
    def minor(self) -> int:
        return self.release[1] if len(self.release) >= 2 else 0

    @property
    def micro(self) -> int:
        return self.release[2] if len(self.release) >= 3 else 0


def _parse_letter_version(
    letter: str,
    number: Union[str, bytes, SupportsInt],
) -> Optional[Tuple[str, int]]:

    if letter:
        # We consider there to be an implicit 0 in a pre-release if there is
        # not a numeral associated with it.
        if number is None:
            number = 0

        # We normalize any letters to their lower case form
        letter = letter.lower()

        # We consider some words to be alternate spellings of other words and
        # in those cases we want to normalize the spellings to our preferred
        # spelling.
        if letter == "alpha":
            letter = "a"
        elif letter == "beta":
            letter = "b"
        elif letter in ["c", "pre", "preview"]:
            letter = "rc"
        elif letter in ["rev", "r"]:
            letter = "post"

        return letter, int(number)
    if not letter and number:
        # We assume if we are given a number, but we are not given a letter
        # then this is using the implicit post release syntax (e.g. 1.0-1)
        letter = "post"

        return letter, int(number)

    return None


_local_version_separators = re.compile(r"[\._-]")


def _parse_local_version(local: str) -> Optional['LocalType']:
    """
    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
    """
    if local is not None:
        return tuple(
            part.lower() if not part.isdigit() else int(part)
            for part in _local_version_separators.split(local)
        )
    return None


def _cmpkey(
        epoch: int,
        release: Tuple[int, ...],
        pre: Optional[Tuple[str, int]],
        post: Optional[Tuple[str, int]],
        dev: Optional[Tuple[str, int]],
        local: Optional[Tuple['SubLocalType']],
) -> 'CmpKey':

    # When we compare a release version, we want to compare it with all of the
    # trailing zeros removed. So we'll use a reverse the list, drop all the now
    # leading zeros until we come to something non zero, then take the rest
    # re-reverse it back into the correct order and make it a tuple and use
    # that for our sorting key.
    _release = tuple(
        reversed(list(dropwhile(lambda x: x == 0, reversed(release))))
    )

    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
    # We'll do this by abusing the pre segment, but we _only_ want to do this
    # if there is not a pre or a post segment. If we have one of those then
    # the normal sorting rules will handle this case correctly.
    if pre is None and post is None and dev is not None:
        _pre: PrePostDevType = NegativeInfinity
    # Versions without a pre-release (except as noted above) should sort after
    # those with one.
    elif pre is None:
        _pre = Infinity
    else:
        _pre = pre

    # Versions without a post segment should sort before those with one.
    if post is None:
        _post: PrePostDevType = NegativeInfinity

    else:
        _post = post

    # Versions without a development segment should sort after those with one.
    if dev is None:
        _dev: PrePostDevType = Infinity

    else:
        _dev = dev

    if local is None:
        # Versions without a local segment should sort before those with one.
        _local: LocalType = NegativeInfinity
    else:
        # Versions with a local segment need that segment parsed to implement
        # the sorting rules in PEP440.
        # - Alpha numeric segments sort before numeric segments
        # - Alpha numeric segments sort lexicographically
        # - Numeric segments sort numerically
        # - Shorter versions sort before longer versions when the prefixes
        #   match exactly
        _local = tuple(
            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
        )

    return epoch, _release, _pre, _post, _dev, _local


================================================
FILE: src/hangar/backends/__init__.py
================================================
"""Definition and dynamic routing to Hangar backend implementations.

This module defines the available backends for a Hangar installation & provides
dynamic routing of method calls to the appropriate backend from a stored record
specification.

Identification
--------------

A two character ascii code identifies which backend/version some record belongs
to. Valid characters are the union of ``ascii_lowercase``, ``ascii_uppercase``,
and ``ascii_digits``:

.. centered:: ``abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789``

Though stored as bytes in the backend, we use human readable characters (and not
unprintable bytes) to aid in human tasks like developer database dumps and
debugging. The characters making up the two digit code have the following
symantic meanings:

   *  First Character (element 0) indicates the ``backend type`` used.

   *  Second character (element 1) indicates the ``version`` of the backend type
      which should be used to parse the specification & accesss data (more on
      this later)

The number of codes possible (a 2-choice permutation with repetition) is: 3844
which we anticipate to be more then sufficient long into the future. As a
convention, the range of values in which the first digit of the code falls into
can be used to identify the storage medium location:

   *  Lowercase ``ascii_letters`` & digits ``[0, 1, 2, 3, 4]`` -> reserved for
      backends handling data on the local disk.

   *  Uppercase ``ascii_letters`` & digits ``[5, 6, 7, 8, 9]`` -> reserved for
      backends referring to data residing on a remote server.

This is not a hard and fast rule though, and can be changed in the future if the
need arises.

Process & Guarantees
--------------------

In order to maintain backwards compatibility across versions of Hangar into the
future the following ruleset is specified and MUST BE HONORED:

*  When a new backend is proposed, the contributor(s) provide the class with a
   meaningful name (``HDF5``, ``NUMPY``, ``TILEDB``, etc) identifying the
   backend to Hangar developers. The review team will provide:

   -  ``backend type`` code
   -  ``version`` code

   which all records related to that implementation identify themselves with. In
   addition, Externally facing classes / methods go by a canonical name which is
   the concatenation of the ``meaningful name`` and assigned ``"format code"``
   ie. for ``backend name: 'NUMPY'`` assigned ``type code: '1'`` and ``version
   code: '0'`` must start external method/class names with: ``NUMPY_10_foo``

*  Once a new backend is accepted, the code assigned to it is PERMANENT &
   UNCHANGING. The same code cannot be used in the future for other backends.

*  Each backend independently determines the information it needs to log/store
   to uniquely identify and retrieve a sample stored by it. There is no standard
   format, each is free to define whatever fields they find most convenient.
   Unique encode/decode methods are defined in order to serialize this
   information to bytes and then reconstruct the information later. These bytes
   are what are passed in when a retrieval request is made, and returned when a
   storage request for some piece of data is performed.

*  Once accepted, The record format specified (ie. the byte representation
   described above) cannot be modified in any way. This must remain permanent!

*  Backend (internal) methods can be updated, optimized, and/or changed at any
   time so long as:

   *  No changes to the record format specification are introduced

   *  Data stored via any previous iteration of the backend's accessor methods
      can be retrieved bitwise exactly by the "updated" version.

Before proposing a new backend or making changes to this file, please consider
reaching out to the Hangar core development team so we can guide you through the
process.
"""
import string
from typing import Dict

from .specs import (
    HDF5_00_DataHashSpec,
    HDF5_01_DataHashSpec,
    NUMPY_10_DataHashSpec,
    LMDB_30_DataHashSpec,
    LMDB_31_DataHashSpec,
    REMOTE_50_DataHashSpec,
)
from .specparse import backend_decoder

from .hdf5_00 import HDF5_00_FileHandles, HDF5_00_Options
from .hdf5_01 import HDF5_01_FileHandles, HDF5_01_Options
from .lmdb_30 import LMDB_30_FileHandles, LMDB_30_Options
from .lmdb_31 import LMDB_31_FileHandles, LMDB_31_Options
from .numpy_10 import NUMPY_10_FileHandles, NUMPY_10_Options
from .remote_50 import REMOTE_50_Handler, REMOTE_50_Options


BACKEND_ACCESSOR_MAP = {
    # LOCALS -> [00:50] + ['aa':'zz']
    '00': HDF5_00_FileHandles,
    '01': HDF5_01_FileHandles,
    '10': NUMPY_10_FileHandles,
    '30': LMDB_30_FileHandles,
    '31': LMDB_31_FileHandles,
    # REMOTES -> [50:99] + ['AA':'ZZ']
    '50': REMOTE_50_Handler,
}

BACKEND_OPTIONS_MAP = {
    '00': HDF5_00_Options,
    '01': HDF5_01_Options,
    '10': NUMPY_10_Options,
    '30': LMDB_30_Options,
    '31': LMDB_31_Options,
    '50': REMOTE_50_Options,
}

_local_prefixes = string.digits[0:5] + string.ascii_lowercase

BACKEND_IS_LOCAL_MAP: Dict[str, bool] = {
    k: bool(k[0] in _local_prefixes) for k in BACKEND_ACCESSOR_MAP.keys()
}

__all__ = [
    'backend_decoder', 'HDF5_00_DataHashSpec', 'HDF5_01_DataHashSpec',
    'NUMPY_10_DataHashSpec', 'LMDB_30_DataHashSpec', 'REMOTE_50_DataHashSpec',
    'LMDB_31_DataHashSpec', 'BACKEND_OPTIONS_MAP', 'BACKEND_ACCESSOR_MAP',
    'BACKEND_IS_LOCAL_MAP',
]


================================================
FILE: src/hangar/backends/chunk.py
================================================
"""
Portions of this code have been taken and modified from the "PyTables" project.

URL:      https://github.com/PyTables/PyTables
File:     tables/leaf.py
Commit:   1e7b14e87507c2392265321fe18b2f1f5920ea7f
Accessed: 23 JAN 2020

PyTables License
-------------------------------------------------------------------------------
License: BSD
URL:     https://github.com/PyTables/PyTables/blob/1e7b14e875/LICENSE.txt
"""
import numpy as np
import math


SizeType = np.int64


def _csformula(expected_mb):
    """Return the fitted chunksize for expected_mb."""

    # For a basesize of 8 KB, this will return:
    # 8 KB for datasets <= 1 MB
    # 1 MB for datasets >= 10 TB
    basesize = 8 * 1024  # 8 KB is a good minimum
    return basesize * int(2 ** math.log10(expected_mb))


def _limit_es(expected_mb):
    """Protection against creating too small or too large chunks."""

    if expected_mb < 1:  # < 1 MB
        expected_mb = 1
    elif expected_mb > 10 ** 7:  # > 10 TB
        expected_mb = 10 ** 7
    return expected_mb


def _calc_chunksize(expected_mb):
    """Compute the optimum HDF5 chunksize for I/O purposes.

    Rational: HDF5 takes the data in bunches of chunksize length to write the
    on disk. A BTree in memory is used to map structures on disk. The more
    chunks that are allocated for a dataset the larger the B-tree. Large
    B-trees take memory and causes file storage overhead as well as more disk
    I/O and higher contention for the meta data cache.  You have to balance
    between memory and I/O overhead (small B-trees) and time to access to data
    (big B-trees). The tuning of the chunksize parameter affects the
    performance and the memory consumed. This is based on my own experiments
    and, as always, your mileage may vary.
    """

    expected_mb = _limit_es(expected_mb)
    zone = int(math.log10(expected_mb))
    expected_mb = 10 ** zone
    chunksize = _csformula(expected_mb)
    # XXX: Multiply by 8 seems optimal for sequential access
    return chunksize * 24


def _rowsize(shape, maindim, itemsize):
    """"The size of the rows in bytes in dimensions orthogonal to *maindim*."

    shape:
        Shape of the sample to fit in the row

    maindim:
        The dimension along which iterators work. Its value is 0 (i.e. the first
        dimension) when the dataset is not extendable, and self.extdim (where
        available) for extendable ones.

    itemsize:
        nbytes of each element

    The meaning of *atomic* is that individual elements of a cell can not be
    extracted directly by indexing (i.e.  __getitem__()) the dataset; e.g. if a
    dataset has shape (2, 2) and its atoms have shape (3,), to get the third
    element of the cell at (1, 0) one should use dataset[1,0][2] instead of
    dataset[1,0,2].
    """
    rowsize = itemsize
    for i, dim in enumerate(shape):
        if i != maindim:
            rowsize *= dim
    return rowsize


def calc_chunkshape(shape, expectedrows, itemsize, maindim):
    """Calculate the shape for the HDF5 chunk.

    shape:
        Shape of the sample to fit in the row

    expectedrows:
        how many samples will fit into the file container

    itemsize:
        nbytes of each element

    maindim:
        The dimension along which iterators work. Its value is 0 (i.e. the first
        dimension) when the dataset is not extendable, and self.extdim (where
        available) for extendable ones.

        may want to set to shape.index(max(shape))
    """

    # In case of a scalar shape, return the unit chunksize
    if shape == ():
        return (SizeType(1),)

    MB = 1024 * 1024
    # if shape is sufficiently small, no need to further chunk
    # At time of writing, set to be less than 1MB since that is
    # the limit to hdf5 chunk cache.
    if ((np.prod(shape) * itemsize) < MB) and (shape != ()):
        return shape

    # Compute the chunksize
    rsize = _rowsize(shape, maindim, itemsize)
    expected_mb = (expectedrows * rsize) // MB
    chunksize = _calc_chunksize(expected_mb)

    # Compute the chunknitems
    chunknitems = chunksize // itemsize
    # Safeguard against itemsizes being extremely large
    if chunknitems == 0:
        chunknitems = 1
    chunkshape = list(shape)
    # Check whether trimming the main dimension is enough
    chunkshape[maindim] = 1
    newchunknitems = np.prod(chunkshape, dtype=SizeType)
    if newchunknitems <= chunknitems:
        chunkshape[maindim] = chunknitems // newchunknitems
    else:
        # No, so start trimming other dimensions as well
        for j in range(len(chunkshape)):
            # Check whether trimming this dimension is enough
            chunkshape[j] = 1
            newchunknitems = np.prod(chunkshape, dtype=SizeType)
            if newchunknitems <= chunknitems:
                chunkshape[j] = chunknitems // newchunknitems
                break
        else:
            # Ops, we ran out of the loop without a break
            # Set the last dimension to chunknitems
            chunkshape[-1] = chunknitems

    # safeguard outputing chunks which are larger than shape
    if chunkshape[maindim] > shape[maindim]:
        chunkshape[maindim] = shape[maindim]

    return tuple(SizeType(s) for s in chunkshape)


================================================
FILE: src/hangar/backends/hdf5_00.py
================================================
"""Local HDF5 Backend Implementation, Identifier: ``HDF5_00``

Backend Identifiers
===================

*  Backend: ``0``
*  Version: ``0``
*  Format Code: ``00``
*  Canonical Name: ``HDF5_00``

Storage Method
==============

*  Data is written to specific subarray indexes inside an HDF5 "dataset" in a
   single HDF5 File.

*  In each HDF5 File there are ``COLLECTION_COUNT`` "datasets" (named ``["0" :
   "{COLLECTION_COUNT}"]``). These are referred to as ``"dataset number"``

*  Each dataset is a zero-initialized array of:

   *  ``dtype: {schema_dtype}``; ie ``np.float32`` or ``np.uint8``

   *  ``shape: (COLLECTION_SIZE, *{schema_shape.size})``; ie ``(500, 10)`` or
      ``(500, 300)``. The first index in the dataset is referred to as a
      ``collection index``. See technical note below for detailed explanation
      on why the flatten operaiton is performed.

*  Compression Filters, Chunking Configuration/Options are applied globally for
   all ``datasets`` in a file at dataset creation time.

*  On read and write of all samples the xxhash64_hexdigest is calculated for
   the raw array bytes. This is to ensure that all data in == data out of the
   hdf5 files. That way even if a file is manually edited (bypassing fletcher32
   filter check) we have a quick way to tell that things are not as they should
   be.

Compression Options
===================

Accepts dictionary containing keys

*  ``backend`` == ``"00"``
*  ``complib``
*  ``complevel``
*  ``shuffle``

Blosc-HDF5

*  ``complib`` valid values:

   *  ``'blosc:blosclz'``,
   *  ``'blosc:lz4'``,
   *  ``'blosc:lz4hc'``,
   *  ``'blosc:zlib'``,
   *  ``'blosc:zstd'``

*  ``complevel`` valid values: [0, 9] where 0 is "no compression" and 9 is
   "most compression"

*  ``shuffle`` valid values:

   *  ``None``
   *  ``'none'``
   *  ``'byte'``
   *  ``'bit'``


LZF Filter

*  ``'complib' == 'lzf'``
*  ``'shuffle'`` one of ``[False, None, 'none', True, 'byte']``
*  ``'complevel'`` one of ``[False, None, 'none']``

GZip Filter

*  ``'complib' == 'gzip'``
*  ``'shuffle'`` one of ``[False, None, 'none', True, 'byte']``
*  ``complevel`` valid values: [0, 9] where 0 is "no compression" and 9 is
   "most compression"


Record Format
=============

Fields Recorded for Each Array
------------------------------

*  Format Code
*  File UID
*  xxhash64_hexdigest (ie. checksum)
*  Dataset Number (``0:COLLECTION_COUNT`` dataset selection)
*  Dataset Index (``0:COLLECTION_SIZE`` dataset subarray selection)
*  Subarray Shape


Examples
--------

1)  Adding the first piece of data to a file:

    *  Array shape (Subarray Shape): (10, 10)
    *  File UID: "rlUK3C"
    *  xxhash64_hexdigest: 8067007c0f05c359
    *  Dataset Number: 16
    *  Collection Index: 105

    ``Record Data => "00:rlUK3C:8067007c0f05c359:16:105:10 10"``

1)  Adding to a piece of data to a the middle of a file:

    *  Array shape (Subarray Shape): (20, 2, 3)
    *  File UID: "rlUK3C"
    *  xxhash64_hexdigest: b89f873d3d153a9c
    *  Dataset Number: "3"
    *  Collection Index: 199

    ``Record Data => "00:rlUK3C:b89f873d3d153a9c:8:199:20 2 3"``


Technical Notes
===============

*  Files are read only after initial creation/writes. Only a write-enabled
   checkout can open a HDF5 file in ``"w"`` or ``"a"`` mode, and writer
   checkouts create new files on every checkout, and make no attempt to fill in
   unset locations in previous files. This is not an issue as no disk space is
   used until data is written to the initially created "zero-initialized"
   collection datasets

*  On write: Single Writer Multiple Reader (``SWMR``) mode is set to ensure that
   improper closing (not calling ``.close()``) method does not corrupt any data
   which had been previously flushed to the file.

*  On read: SWMR is set to allow multiple readers (in different threads /
   processes) to read from the same file. File handle serialization is handled
   via custom python ``pickle`` serialization/reduction logic which is
   implemented by the high level ``pickle`` reduction ``__set_state__()``,
   ``__get_state__()`` class methods.

*  An optimization is performed in order to increase the read / write
   performance of variable shaped datasets. Due to the way that we initialize
   an entire HDF5 file with all datasets pre-created (to the size of the max
   subarray shape), we need to ensure that storing smaller sized arrays (in a
   variable sized Hangar Column) would be effective. Because we use chunked
   storage, certain dimensions which are incomplete could have potentially
   required writes to chunks which do are primarily empty (worst case "C" index
   ordering), increasing read / write speeds significantly.

   To overcome this, we create HDF5 datasets which have ``COLLECTION_SIZE``
   first dimension size, and only ONE second dimension of size
   ``schema_shape.size()`` (ie. product of all dimensions). For example an
   array schema with shape (10, 10, 3) would be stored in a HDF5 dataset of
   shape (COLLECTION_SIZE, 300). Chunk sizes are chosen to align on the first
   dimension with a second dimension of size which fits the total data into L2
   CPU Cache (< 256 KB). On write, we use the ``np.ravel`` function to
   construct a "view" (not copy) of the array as a 1D array, and then on read
   we reshape the array to the recorded size (a copyless "view-only"
   operation). This is part of the reason that we only accept C ordered arrays
   as input to Hangar.
"""
import logging
import os
from collections import ChainMap
from contextlib import suppress
from functools import partial
from pathlib import Path
from typing import MutableMapping, Tuple, Optional, Union, Callable

import h5py
import numpy as np

try:
    # hdf5plugin warns if a filter is already loaded.
    _logger = logging.getLogger('hdf5plugin')
    _initialLevel = _logger.getEffectiveLevel()
    _logger.setLevel(logging.ERROR)
    import hdf5plugin
    if 'blosc' not in hdf5plugin.FILTERS:
        raise ImportError(f'BLOSC unavailable via hdf5plugin: {hdf5plugin.FILTERS}')
finally:
    _logger.setLevel(_initialLevel)
from xxhash import xxh64_hexdigest

from .specs import HDF5_00_DataHashSpec
from .. import __version__
from ..optimized_utils import SizedDict
from ..constants import DIR_DATA_REMOTE, DIR_DATA_STAGE, DIR_DATA_STORE, DIR_DATA
from ..utils import random_string, set_blosc_nthreads
from ..optimized_utils import find_next_prime
from ..op_state import reader_checkout_only, writer_checkout_only
from ..typesystem import Descriptor, OneOf, DictItems, SizedIntegerTuple, checkedmeta

set_blosc_nthreads()

# ----------------------------- Configuration ---------------------------------

_FmtCode = '00'

# contents of a single hdf5 file
COLLECTION_SIZE = 250
COLLECTION_COUNT = 100

# chunking options for compression schemes
CHUNK_MAX_NBYTES = 255_000  # < 256 KB to fit in L2 CPU Cache
CHUNK_MAX_RDCC_NBYTES = 100_000_000
CHUNK_RDCC_W0 = 0.75

# -------------------------------- Parser Implementation ----------------------


def hdf5_00_encode(uid: str, cksum: str, dset: int, dset_idx: int, shape: Tuple[int]) -> bytes:
    """converts the hdf5 data has spec to an appropriate db value

    Parameters
    ----------
    uid : str
        the file name prefix which the data is written to.
    cksum : int
        xxhash_64.hex_digest checksum of the data bytes in numpy array form.
    dset : int
        collection (ie. hdf5 dataset) name to find this data piece.
    dset_idx : int
        collection first axis index in which this data piece resides.
    shape : Tuple[int]
        shape of the data sample written to the collection idx. ie:
        what subslices of the hdf5 dataset should be read to retrieve
        the sample as recorded.

    Returns
    -------
    bytes
        hash data db value recording all input specifications.
    """
    shape_str = " ".join([str(i) for i in shape])
    return f'00:{uid}:{cksum}:{dset}:{dset_idx}:{shape_str}'.encode()


# ------------------------- Accessor Object -----------------------------------


@DictItems(
    expected_keys_required={'complib': True, 'complevel': True, 'shuffle': True},
    expected_values={
        'complib': ['blosc:blosclz', 'blosc:lz4','blosc:lz4hc', 'blosc:zlib', 'blosc:zstd'],
        'complevel': [i for i in range(10)],
        'shuffle': [None, 'none', 'byte', 'bit']})
class BloscCompressionOptions(Descriptor):
    pass


@DictItems(
    expected_keys_required={'complib': True, 'complevel': True, 'shuffle': True},
    expected_values={
        'complib': ['gzip'], 'complevel': [i for i in range(10)], 'shuffle': [True, False]})
class GzipCompressionOptions(Descriptor):
    pass


@DictItems(
    expected_keys_required={'complib': True, 'complevel': False, 'shuffle': True},
    expected_values={
        'complib': ['lzf'], 'complevel': ['none', None], 'shuffle': [True, False]})
class LzfCompressionOptions(Descriptor):
    pass


@OneOf(list(map(lambda x: np.dtype(x).name, [
        np.bool, np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16,
        np.int32, np.int64, np.float16, np.float32, np.float64, np.longdouble])))
class AllowedDtypes(Descriptor):
    """
    Note. np.longdouble since np.float128 not guaranteed to be available on
    all system. this is a particular issue with some windows numpy builds
    """
    pass


class HDF5_00_Options(metaclass=checkedmeta):
    _shape = SizedIntegerTuple(size=32)
    _dtype = AllowedDtypes()
    _lzf = LzfCompressionOptions()
    _gzip = GzipCompressionOptions()
    _blosc = BloscCompressionOptions()
    _avail_filters = ('_lzf', '_gzip', '_blosc')

    def __init__(self, backend_options, dtype, shape, *args, **kwargs):
        self._shape = shape
        self._dtype = dtype
        self._selected_filter = None
        if backend_options is None:
            backend_options = self.default_options

        for filter_attr in self._avail_filters:
            with suppress((KeyError, ValueError)):
                setattr(self, filter_attr, backend_options)
                self._selected_filter = filter_attr
                break
        else:  # N.B. for-else loop (ie. "no-break")
            raise ValueError(f'Invalid backend_options {backend_options}')
        self._verify_data_nbytes_larger_than_clib_min()

    def _verify_data_nbytes_larger_than_clib_min(self):
        """blosc clib should not be used if data buffer size < 16 bytes.

        Raises
        ------
        ValueError:
            if the data size is not valid for the clib
        """
        if self._selected_filter in ['_blosc', None]:
            num_items = np.prod(self._shape)
            itemsize = np.dtype(self._dtype).itemsize
            nbytes = itemsize * num_items
            if nbytes <= 16:
                raise ValueError(f'blosc clib requires data buffer size > 16 bytes')

    @property
    def default_options(self):
        if 'blosc' in hdf5plugin.FILTERS:
            try:
                self._verify_data_nbytes_larger_than_clib_min()
                return {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'}
            except ValueError:
                pass
        return {'complib': 'lzf', 'complevel': None, 'shuffle': True}

    @property
    def backend_options(self):
        return getattr(self, self._selected_filter)

    @property
    def init_requires(self):
        return ('repo_path', 'schema_shape', 'schema_dtype')


HDF5_00_MapTypes = MutableMapping[str, Union[h5py.File, Callable[[], h5py.File]]]


class HDF5_00_FileHandles(object):
    """Manage HDF5 file handles.

    When in SWMR-write mode, no more than a single file handle can be in the
    "writeable" state. This is an issue where multiple columns may need to
    write to the same column schema.
    """

    def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype):
        self.path: Path = repo_path
        self.schema_shape: tuple = schema_shape
        self.schema_dtype: np.dtype = schema_dtype
        self._dflt_backend_opts: Optional[dict] = None

        self.rFp: HDF5_00_MapTypes = {}
        self.wFp: HDF5_00_MapTypes = {}
        self.Fp: HDF5_00_MapTypes = ChainMap(self.rFp, self.wFp)
        self.rDatasets = SizedDict(maxsize=100)
        self.wdset: Optional[h5py.Dataset] = None

        self.mode: Optional[str] = None
        self.hIdx: Optional[int] = None
        self.w_uid: Optional[str] = None
        self.hMaxSize: Optional[int] = None
        self.hNextPath: Optional[int] = None
        self.hColsRemain: Optional[int] = None

        self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode)
        self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True)

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        if self.w_uid in self.wFp:
            self.wFp[self.w_uid]['/'].attrs.modify('next_location', (self.hNextPath, self.hIdx))
            self.wFp[self.w_uid]['/'].attrs.modify('collections_remaining', self.hColsRemain)
            self.wFp[self.w_uid].flush()

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        self.close()
        state = self.__dict__.copy()
        del state['rFp']
        del state['wFp']
        del state['Fp']
        del state['rDatasets']
        del state['wdset']
        return state

    def __setstate__(self, state: dict) -> None:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.__dict__.update(state)
        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)
        self.rDatasets = {}
        self.wdset = None
        self.open(mode=self.mode)

    @property
    def backend_opts(self):
        return self._dflt_backend_opts

    @writer_checkout_only
    def _backend_opts_set(self, val):
        """Nonstandard descriptor method. See notes in ``backend_opts.setter``.
        """
        self._dflt_backend_opts = val
        return

    @backend_opts.setter
    def backend_opts(self, value):
        """
        Using seperate setter method (with ``@writer_checkout_only`` decorator
        applied) due to bug in python <3.8.

        From: https://bugs.python.org/issue19072
            > The classmethod decorator when applied to a function of a class,
            > does not honour the descriptor binding protocol for whatever it
            > wraps. This means it will fail when applied around a function which
            > has a decorator already applied to it and where that decorator
            > expects that the descriptor binding protocol is executed in order
            > to properly bind the function to the class.
        """
        return self._backend_opts_set(value)

    def open(self, mode: str, *, remote_operation: bool = False):
        """Open an hdf5 file handle in the Handler Singleton

        Parameters
        ----------
        mode : str
            one of `r` or `a` for read only / read-write.
        remote_operation : optional, kwarg only, bool
            if this hdf5 data is being created from a remote fetch operation, then
            we don't open any files for reading, and only open files for writing
            which exist in the remote data dir. (default is false, which means that
            write operations use the stage data dir and read operations use data store
            dir)
        """
        self.mode = mode
        if self.mode == 'a':
            process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
            process_dir.mkdir(exist_ok=True)
            for uidpth in process_dir.iterdir():
                if uidpth.suffix == '.hdf5':
                    file_pth = self.DATADIR.joinpath(uidpth.name)
                    self.rFp[uidpth.stem] = partial(
                        h5py.File, file_pth, 'r', swmr=True, libver='latest')

        if not remote_operation:
            if not self.STOREDIR.is_dir():
                return
            for uidpth in self.STOREDIR.iterdir():
                if uidpth.suffix == '.hdf5':
                    file_pth = self.DATADIR.joinpath(uidpth.name)
                    self.rFp[uidpth.stem] = partial(
                        h5py.File, file_pth, 'r', swmr=True, libver='latest')

    def close(self):
        """Close a file handle after writes have been completed

        behavior changes depending on write-enable or read-only file

        Returns
        -------
        bool
            True if success, otherwise False.
        """
        if self.mode == 'a':
            if self.w_uid in self.wFp:
                self.wFp[self.w_uid]['/'].attrs.modify('next_location', (self.hNextPath, self.hIdx))
                self.wFp[self.w_uid]['/'].attrs.modify('collections_remaining', self.hColsRemain)
                self.wFp[self.w_uid].flush()
            for uid in list(self.wFp.keys()):
                with suppress(AttributeError):
                    self.wFp[uid].close()
                del self.wFp[uid]
            self.wdset = None
            self.hMaxSize = None
            self.hNextPath = None
            self.hIdx = None
            self.hColsRemain = None
            self.w_uid = None

        for uid in list(self.rFp.keys()):
            with suppress(AttributeError):
                self.rFp[uid].close()
            del self.rFp[uid]
        self.rDatasets = {}

    @staticmethod
    def delete_in_process_data(repo_path: Path, *, remote_operation=False) -> None:
        """Removes some set of files entirely from the stage/remote directory.

        DANGER ZONE. This should essentially only be used to perform hard resets
        of the repository state.

        Parameters
        ----------
        repo_path : Path
            path to the repository on disk
        remote_operation : optional, kwarg only, bool
            If true, modify contents of the remote_dir, if false (default) modify
            contents of the staging directory.
        """
        data_dir = Path(repo_path, DIR_DATA, _FmtCode)
        PDIR = DIR_DATA_STAGE if not remote_operation else DIR_DATA_REMOTE
        process_dir = Path(repo_path, PDIR, _FmtCode)
        if not process_dir.is_dir():
            return

        for uidpth in process_dir.iterdir():
            if uidpth.suffix == '.hdf5':
                os.remove(process_dir.joinpath(uidpth.name))
                os.remove(data_dir.joinpath(uidpth.name))
        os.rmdir(process_dir)

    @staticmethod
    def _dataset_opts(complib: str, complevel: int, shuffle: Union[bool, str]) -> dict:
        """specify compression options for the hdf5 dataset.

        .. seealso:: :function:`_blosc_opts`

        to enable blosc compression, use the conda-forge `blosc-hdf5-plugin` package.

        .. seealso::

        * https://github.com/conda-forge/staged-recipes/pull/7650
        * https://github.com/h5py/h5py/issues/611

        Parameters
        ----------
        complib : str
            the compression lib to use, one of ['lzf', 'gzip', 'blosc:blosclz',
            'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib', 'blosc:zstd']
        complevel : int
            compression level to specify (accepts values [0, 9] for all except 'lzf'
            where no complevel is accepted)
        shuffle : bool
            if True or `byte`, enable byte shuffle filter, if blosc
            compression, pass through 'bits' is accepted as well. False, or
            None indicates no shuffle should be applied.
        """
        # ---- blosc hdf5 plugin filters ----
        _blosc_compression = {
            'blosc:blosclz': 0,
            'blosc:lz4': 1,
            'blosc:lz4hc': 2,
            # Not built 'snappy': 3,
            'blosc:zlib': 4,
            'blosc:zstd': 5}
        _blosc_shuffle = {None: 0, 'none': 0, 'byte': 1, 'bit': 2}
        _blosc_complevel = {**{i: i for i in range(10)}, None: 9, 'none': 9}

        # ---- h5py built in filters ----
        _lzf_gzip_shuffle = {None: False, False: False, 'none': False, True: True, 'byte': True}
        _lzf_complevel = {False: None, None: None, 'none': None}
        _gzip_complevel = {**{i: i for i in range(10)}, None: 4, 'none': 4}

        if complib.startswith('blosc'):
            args = {
                'compression': 32001,
                'compression_opts': (
                    0, 0, 0, 0,
                    _blosc_complevel[complevel],
                    _blosc_shuffle[shuffle],
                    _blosc_compression[complib]),
                'shuffle': False}
        elif complib == 'lzf':
            args = {
                'shuffle': _lzf_gzip_shuffle[shuffle],
                'compression': complib,
                'compression_opts': _lzf_complevel[complevel]}
        elif complib == 'gzip':
            args = {
                'shuffle': _lzf_gzip_shuffle[shuffle],
                'compression': complib,
                'compression_opts': _gzip_complevel[complevel]}
        elif complib in (None, False, 'none'):
            args = {
                'shuffle': False,
                'compression': None,
                'compression_opts': None}
        else:
            raise ValueError(f'unknown value for opt arg `complib`: {complib}')
        return args

    @staticmethod
    def _chunk_opts(sample_array: np.ndarray, max_chunk_nbytes: int) -> Tuple[list, int]:
        """Determine the chunk shape so each array chunk fits into configured nbytes.

        Currently the chunk nbytes are not user configurable. Instead the constant
        `HDF5_MAX_CHUNK_NBYTES` is sued to determine when to split.

        Parameters
        ----------
        sample_array : `np.array`
            Sample array whose shape and dtype should be used as the basis of the
            chunk shape determination
        max_chunk_nbytes : int
            how many bytes the array chunks should be limited to.

        Returns
        -------
        list
            list of ints of length == rank of `sample_array` specifying chunk sizes
            to split `sample_array` into nbytes
        int
            nbytes which the chunk will fit in. Will be <= `HDF5_MAX_CHUNK_NBYTES`
        """
        chunk_size = int(np.floor(max_chunk_nbytes / sample_array.itemsize))
        if chunk_size > sample_array.size:
            chunk_size = sample_array.size
        chunk_shape = [chunk_size]
        chunk_nbytes = np.zeros(shape=chunk_shape, dtype=sample_array.dtype).nbytes

        return (chunk_shape, chunk_nbytes)

    def _create_schema(self, *, remote_operation: bool = False):
        """stores the shape and dtype as the schema of a column.

        Parameters
        ----------
        remote_operation : optional, kwarg only, bool
            if this schema is being created from a remote fetch operation, then do not
            place the file symlink in the staging directory. Instead symlink it
            to a special remote staging directory. (default is False, which places the
            symlink in the stage data directory.)

        Notes
        -----

        Parameters set for raw-data-chunk-cache (rdcc) values:

        * rdcc_nbytes: sets the total size (measured in bytes) of the raw data chunk
          cache for each dataset. This should be set to the size of each chunk times
          the number of chunks that are likely to be needed in cache.
        * rdcc_w0: sets the policy for chunks to be removed from the cache when more
          space is needed. If set to 0, always evict the least recently used chunk in
          cache. If set to 1, always evict the least recently used chunk which has
          been fully read or written. If the value is between 0 and 1, the behavior
          will be a blend of the two.
        * rdcc_nslots: The number of chunk slots in the cache for this entire file.
          In order for quick lookup, a hash map is used for each chunk value. For
          maximum performance, this value should be set approximately 100 times that
          number of chunks.

        .. seealso::

            http://docs.h5py.org/en/stable/high/file.html#chunk-cache

        """
        # -------------------- Chunk & RDCC Vals ------------------------------

        sample_array = np.zeros(self.schema_shape, dtype=self.schema_dtype)
        chunk_shape, chunk_nbytes = self._chunk_opts(
            sample_array=sample_array, max_chunk_nbytes=CHUNK_MAX_NBYTES)

        rdcc_nbytes_val = sample_array.nbytes * COLLECTION_SIZE
        if rdcc_nbytes_val < CHUNK_MAX_NBYTES:
            rdcc_nbytes_val = CHUNK_MAX_NBYTES
        elif rdcc_nbytes_val > CHUNK_MAX_RDCC_NBYTES:
            rdcc_nbytes_val = CHUNK_MAX_RDCC_NBYTES

        rdcc_nslots_guess = np.math.ceil(rdcc_nbytes_val / chunk_nbytes) * 100
        rdcc_nslots_prime_val = find_next_prime(rdcc_nslots_guess)

        # ---------------------------- File Creation --------------------------

        uid = random_string()
        file_path = self.DATADIR.joinpath(f'{uid}.hdf5')
        self.wFp[uid] = h5py.File(file_path,
                                  mode='w',
                                  libver='latest',
                                  rdcc_nbytes=rdcc_nbytes_val,
                                  rdcc_w0=CHUNK_RDCC_W0,
                                  rdcc_nslots=rdcc_nslots_prime_val)
        self.w_uid = uid
        self.wdset = None
        self.hNextPath = 0
        self.hIdx = 0
        self.hColsRemain = COLLECTION_COUNT
        self.hMaxSize = COLLECTION_SIZE

        process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
        Path(process_dir, f'{uid}.hdf5').touch()

        # ----------------------- Dataset Creation ----------------------------

        optKwargs = self._dataset_opts(**self._dflt_backend_opts)
        for dset_num in range(COLLECTION_COUNT):
            self.wFp[uid].create_dataset(
                f'/{dset_num}',
                shape=(COLLECTION_SIZE, sample_array.size),
                dtype=sample_array.dtype,
                maxshape=(COLLECTION_SIZE, sample_array.size),
                chunks=(1, *chunk_shape),
                **optKwargs)

        # ---------------------- Attribute Config Vals ------------------------

        self.wFp[self.w_uid]['/'].attrs['HANGAR_VERSION'] = __version__
        self.wFp[self.w_uid]['/'].attrs['schema_shape'] = sample_array.shape
        self.wFp[self.w_uid]['/'].attrs['schema_dtype_num'] = sample_array.dtype.num
        self.wFp[self.w_uid]['/'].attrs['next_location'] = (0, 0)
        self.wFp[self.w_uid]['/'].attrs['collection_max_size'] = COLLECTION_SIZE
        self.wFp[self.w_uid]['/'].attrs['collection_total'] = COLLECTION_COUNT
        self.wFp[self.w_uid]['/'].attrs['collections_remaining'] = COLLECTION_COUNT
        self.wFp[self.w_uid]['/'].attrs['rdcc_nbytes'] = rdcc_nbytes_val
        self.wFp[self.w_uid]['/'].attrs['rdcc_w0'] = CHUNK_RDCC_W0
        self.wFp[self.w_uid]['/'].attrs['rdcc_nslots'] = rdcc_nslots_prime_val
        self.wFp[self.w_uid]['/'].attrs['chunk_shape'] = chunk_shape
        if optKwargs['compression_opts'] is not None:
            self.wFp[self.w_uid]['/'].attrs['compression_opts'] = optKwargs['compression_opts']
        else:
            self.wFp[self.w_uid]['/'].attrs['compression_opts'] = False

        self.wFp[self.w_uid].flush()
        try:
            self.wFp[self.w_uid].swmr_mode = True
        except ValueError:
            assert self.wFp[self.w_uid].swmr_mode is True
        self.wdset = self.wFp[self.w_uid][f'/{self.hNextPath}']

    def read_data(self, hashVal: HDF5_00_DataHashSpec) -> np.ndarray:
        """Read data from an hdf5 file handle at the specified locations

        Parameters
        ----------
        hashVal : HDF5_00_DataHashSpec
            record specification parsed from its serialized store val in lmdb.

        Returns
        -------
        np.array
            requested data.
        """
        arrSize = 1
        for dim in hashVal.shape:
            arrSize *= dim
        srcSlc = (hashVal.dataset_idx, slice(0, arrSize))
        dsetCol = f'/{hashVal.dataset}'
        rdictkey = f'{hashVal.uid}{dsetCol}'

        if self.schema_dtype:  # if is not None
            destArr = np.empty((arrSize,), self.schema_dtype)
            if rdictkey in self.rDatasets:
                self.rDatasets[rdictkey].read_direct(destArr, srcSlc, None)
            else:
                try:
                    self.Fp[hashVal.uid][dsetCol].read_direct(destArr, srcSlc, None)
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                except TypeError:
                    self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                    self.rDatasets[rdictkey].read_direct(destArr, srcSlc, None)
                except KeyError:
                    process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
                    if Path(process_dir, f'{hashVal.uid}.hdf5').is_file():
                        file_pth = self.DATADIR.joinpath(f'{hashVal.uid}.hdf5')
                        self.rFp[hashVal.uid] = h5py.File(file_pth, 'r', swmr=True, libver='latest')
                        self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                        self.rDatasets[rdictkey].read_direct(destArr, srcSlc, None)
                    else:
                        raise
        else:
            if rdictkey in self.rDatasets:
                destArr = self.rDatasets[rdictkey][srcSlc]
            else:
                try:
                    destArr = self.Fp[hashVal.uid][dsetCol][srcSlc]
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                except TypeError:
                    self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
                    destArr = self.Fp[hashVal.uid][dsetCol][srcSlc]
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                except KeyError:
                    process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
                    if Path(process_dir, f'{hashVal.uid}.hdf5').is_file():
                        file_pth = self.DATADIR.joinpath(f'{hashVal.uid}.hdf5')
                        self.rFp[hashVal.uid] = h5py.File(file_pth, 'r', swmr=True, libver='latest')
                        destArr = self.Fp[hashVal.uid][dsetCol][srcSlc]
                        self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                    else:
                        raise

        out = destArr.reshape(hashVal.shape)
        if xxh64_hexdigest(out) != hashVal.checksum:
            # try casting to check if dtype does not match for all zeros case
            out = out.astype(np.typeDict[self.Fp[hashVal.uid]['/'].attrs['schema_dtype_num']])
            if xxh64_hexdigest(out) != hashVal.checksum:
                raise RuntimeError(
                    f'DATA CORRUPTION Checksum {xxh64_hexdigest(out)} != recorded {hashVal}')
        return out

    def write_data(self, array: np.ndarray, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        array : np.ndarray
            tensor to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        checksum = xxh64_hexdigest(array)
        if self.w_uid in self.wFp:
            self.hIdx += 1
            if self.hIdx >= self.hMaxSize:
                self.wdset.flush()
                self.hIdx = 0
                self.hNextPath += 1
                self.hColsRemain -= 1
                self.wdset = self.wFp[self.w_uid][f'/{self.hNextPath}']
                if self.hColsRemain <= 1:
                    self.wFp[self.w_uid]['/'].attrs.modify('next_location', (self.hNextPath, self.hIdx))
                    self.wFp[self.w_uid]['/'].attrs.modify('collections_remaining', self.hColsRemain)
                    self.wFp[self.w_uid].flush()
                    self._create_schema(remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)

        destSlc = (self.hIdx, slice(0, array.size))
        flat_arr = np.ravel(array)
        self.wdset.write_direct(flat_arr, None, destSlc)
        self.wdset.flush()
        return hdf5_00_encode(self.w_uid, checksum, self.hNextPath, self.hIdx, array.shape)


================================================
FILE: src/hangar/backends/hdf5_01.py
================================================
"""Local HDF5 Backend Implementation, Identifier: ``HDF5_01``

Backend Identifiers
===================

*  Backend: ``0``
*  Version: ``1``
*  Format Code: ``01``
*  Canonical Name: ``HDF5_01``

Storage Method
==============

*  This module is meant to handle larger datasets which are of fixed size. IO
   and significant compression optimization is achieved by storing arrays at
   their appropriate top level index in the same shape they naturally assume
   and chunking over the entire subarray domain making up a sample (rather than
   having to subdivide chunks when the sample could be variably shaped.)

*  Data is written to specific subarray indexes inside an HDF5 "dataset" in a
   single HDF5 File.

*  In each HDF5 File there are ``COLLECTION_COUNT`` "datasets" (named ``["0" :
   "{COLLECTION_COUNT}"]``). These are referred to as ``"dataset number"``

*  Each dataset is a zero-initialized array of:

   *  ``dtype: {schema_dtype}``; ie ``np.float32`` or ``np.uint8``

   *  ``shape: (COLLECTION_SIZE, *{schema_shape})``; ie ``(500, 10, 10)`` or
      ``(500, 512, 512, 320)``. The first index in the dataset is referred to as a
      ``collection index``.

*  Compression Filters, Chunking Configuration/Options are applied globally for
   all ``datasets`` in a file at dataset creation time.

*  On read and write of all samples the xxhash64_hexdigest is calculated for
   the raw array bytes. This is to ensure that all data in == data out of the
   hdf5 files. That way even if a file is manually edited (bypassing fletcher32
   filter check) we have a quick way to tell that things are not as they should
   be.

Compression Options
===================

Accepts dictionary containing keys

*  ``backend`` == ``"01"``
*  ``complib``
*  ``complevel``
*  ``shuffle``

Blosc-HDF5

*  ``complib`` valid values:

   *  ``'blosc:blosclz'``,
   *  ``'blosc:lz4'``,
   *  ``'blosc:lz4hc'``,
   *  ``'blosc:zlib'``,
   *  ``'blosc:zstd'``

*  ``complevel`` valid values: [0, 9] where 0 is "no compression" and 9 is
   "most compression"

*  ``shuffle`` valid values:

   *  ``None``
   *  ``'none'``
   *  ``'byte'``
   *  ``'bit'``


LZF Filter

*  ``'complib' == 'lzf'``
*  ``'shuffle'`` one of ``[False, None, 'none', True, 'byte']``
*  ``'complevel'`` one of ``[False, None, 'none']``

GZip Filter

*  ``'complib' == 'gzip'``
*  ``'shuffle'`` one of ``[False, None, 'none', True, 'byte']``
*  ``complevel`` valid values: [0, 9] where 0 is "no compression" and 9 is
   "most compression"


Record Format
=============

Fields Recorded for Each Array
------------------------------

*  Format Code
*  File UID
*  xxhash64_hexdigest (ie. checksum)
*  Dataset Number (``0:COLLECTION_COUNT`` dataset selection)
*  Dataset Index (``0:COLLECTION_SIZE`` dataset subarray selection)
*  Subarray Shape

Examples
--------

1)  Adding the first piece of data to a file:

    *  Array shape (Subarray Shape): (10, 10)
    *  File UID: "rlUK3C"
    *  xxhash64_hexdigest: 8067007c0f05c359
    *  Dataset Number: 16
    *  Collection Index: 105

    ``Record Data => "01:rlUK3C:8067007c0f05c359:16:105:10 10"``

1)  Adding to a piece of data to a the middle of a file:

    *  Array shape (Subarray Shape): (20, 2, 3)
    *  File UID: "rlUK3C"
    *  xxhash64_hexdigest: b89f873d3d153a9c
    *  Dataset Number: "3"
    *  Collection Index: 199

    ``Record Data => "01:rlUK3C:b89f873d3d153a9c:8:199:20 2 3"``


Technical Notes
===============

*  The majority of methods not directly related to "chunking" and the "raw data
   chunk cache" are either identical to HDF5_00, or only slightly modified.

*  Files are read only after initial creation/writes. Only a write-enabled
   checkout can open a HDF5 file in ``"w"`` or ``"a"`` mode, and writer
   checkouts create new files on every checkout, and make no attempt to fill in
   unset locations in previous files. This is not an issue as no disk space is
   used until data is written to the initially created "zero-initialized"
   collection datasets

*  On write: Single Writer Multiple Reader (``SWMR``) mode is set to ensure that
   improper closing (not calling ``.close()``) method does not corrupt any data
   which had been previously flushed to the file.

*  On read: SWMR is set to allow multiple readers (in different threads /
   processes) to read from the same file. File handle serialization is handled
   via custom python ``pickle`` serialization/reduction logic which is
   implemented by the high level ``pickle`` reduction ``__set_state__()``,
   ``__get_state__()`` class methods.

*  An optimization is performed in order to increase the read / write
   performance of fixed size datasets. Due to the way that we initialize an
   entire HDF5 file with all datasets pre-created (to the size of the fixed
   subarray shape), and the fact we absolutely know the size / shape /
   access-pattern of the arrays, inefficient IO due to wasted chunk processing
   is not a concern. It is far more efficient for us to completely blow off the
   metadata chunk cache, and chunk each subarray as a single large item item.

   This method of processing tends to have a number of significant effects as
   compared to chunked storage methods:

      1. **Compression rations improve** (by a non-trivial factor). This is
         simply due to the fact that a larger amount of raw data is being passed
         into the compressor at a time. While the exact improvement seen is
         highly dependent on both the data size and compressor used, there
         should be no case where compressing the full tensor uses more disk
         space then chunking the tensor, compressing each chunk individually,
         and then saving each chunk to disk.

      2. **Read performance improves** (so long as a suitable compressor /
         option set was chosen). Instead of issuing (potentially) many read
         requests - one for each chunk - to the storage hardware, signifiantly
         few IOPS are used to retrieve the entire set of compressed raw data
         from disk. Fewer IOPS means much less time waiting on the hard disk.
         Moreso, only a single decompression step is needed to reconstruct
         the numeric array, completly decoupling performance from HDF5's
         ability to parallelize internal filter pipeline operations.

         Additionally, since the entire requested chunk is retrieved in a
         single decompression pipeline run, there is no need for the HDF5 core
         to initialize an intermediate buffer which holds data chunks as each
         decompression operation completes. Futher, by preinitializing an empty
         ``numpy.ndarray`` container and using the low level HDF5
         ``read_direct`` method, the decompressed data buffer is passes
         directly into the returned ``ndarray.__array_interface__.data``
         field with no intermediate copy or processing steps.

      3. **Shuffle filters are favored.**. With much more data to work with in
         a single compression operation, the use of "byte shuffle" filters in
         the compressor spec has been seen to both markedly decrease read time
         and increase compression ratios. Shuffling can significantly reduce
         disk space required to store some piece of data on disk, further
         reducing the time spent waiting on hard disk IO while incuring a
         negligible cost to decompression speed.

   Taking all of these effects into account, there can be up to an order of
   magnitude increase in read performance as compared to the subarray chunking
   strategy employed by the ``HDF5_00`` backend.

*  Like all other backends at the time of writing, only 'C' ordered arrays
   are accepted by this method.
"""
import logging
import math
import os
from collections import ChainMap
from contextlib import suppress
from functools import partial
from pathlib import Path
from typing import MutableMapping, Tuple, Optional, Union, Callable

import h5py
import numpy as np

try:
    # hdf5plugin warns if a filter is already loaded.
    _logger = logging.getLogger('hdf5plugin')
    _initialLevel = _logger.getEffectiveLevel()
    _logger.setLevel(logging.ERROR)
    import hdf5plugin
    if 'blosc' not in hdf5plugin.FILTERS:
        raise ImportError(f'BLOSC unavailable via hdf5plugin: {hdf5plugin.FILTERS}')
finally:
    _logger.setLevel(_initialLevel)
from xxhash import xxh64_hexdigest


from .chunk import calc_chunkshape
from .specs import HDF5_01_DataHashSpec
from .. import __version__
from ..optimized_utils import SizedDict
from ..constants import DIR_DATA_REMOTE, DIR_DATA_STAGE, DIR_DATA_STORE, DIR_DATA
from ..op_state import writer_checkout_only, reader_checkout_only
from ..utils import random_string, set_blosc_nthreads
from ..optimized_utils import find_next_prime
from ..typesystem import Descriptor, OneOf, DictItems, SizedIntegerTuple, checkedmeta

set_blosc_nthreads()

# ----------------------------- Configuration ---------------------------------

_FmtCode = '01'

# contents of a single hdf5 file
COLLECTION_SIZE = 100
COLLECTION_COUNT = 100
CHUNK_MAX_RDCC_NBYTES = 250_000_000
CHUNK_RDCC_W0 = 0.75

# -------------------------------- Parser Implementation ----------------------


def hdf5_01_encode(uid: str, cksum: str, dset: int, dset_idx: int,
                   shape: Tuple[int]) -> bytes:
    """converts the hdf5 data has spec to an appropriate db value

    Parameters
    ----------
    uid : str
        the file name prefix which the data is written to.
    cksum : int
        xxhash_64.hex_digest checksum of the data bytes in numpy array form.
    dset : int
        collection (ie. hdf5 dataset) name to find this data piece.
    dset_idx : int
        collection first axis index in which this data piece resides.
    shape : Tuple[int]
        shape of the data sample written to the collection idx. ie:
        what subslices of the hdf5 dataset should be read to retrieve
        the sample as recorded.

    Returns
    -------
    bytes
        hash data db value recording all input specifications.
    """
    shape_str = " ".join([str(i) for i in shape])
    return f'01:{uid}:{cksum}:{dset}:{dset_idx}:{shape_str}'.encode()


# ------------------------- Accessor Object -----------------------------------


@DictItems(
    expected_keys_required={'complib': True, 'complevel': True, 'shuffle': True},
    expected_values={
        'complib': ['blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib', 'blosc:zstd'],
        'complevel': [i for i in range(10)],
        'shuffle': [None, 'none', 'byte', 'bit']})
class BloscCompressionOptions(Descriptor):
    pass


@DictItems(
    expected_keys_required={'complib': True, 'complevel': True, 'shuffle': True},
    expected_values={
        'complib': ['gzip'], 'complevel': [i for i in range(10)], 'shuffle': [True, False]})
class GzipCompressionOptions(Descriptor):
    pass


@DictItems(
    expected_keys_required={'complib': True, 'complevel': False, 'shuffle': True},
    expected_values={
        'complib': ['lzf'], 'complevel': ['none', None], 'shuffle': [True, False]})
class LzfCompressionOptions(Descriptor):
    pass


@OneOf(list(map(lambda x: np.dtype(x).name, [
        np.bool, np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16,
        np.int32, np.int64, np.float16, np.float32, np.float64, np.longdouble])))
class AllowedDtypes(Descriptor):
    # Note. np.longdouble since np.float128 not guaranteed to be available on
    # all system. this is a particular issue with some windows numpy builds.
    pass


class HDF5_01_Options(metaclass=checkedmeta):
    _shape = SizedIntegerTuple(size=32)
    _dtype = AllowedDtypes()
    _lzf = LzfCompressionOptions()
    _gzip = GzipCompressionOptions()
    _blosc = BloscCompressionOptions()
    _avail_filters = ('_lzf', '_gzip', '_blosc')

    def __init__(self, backend_options, dtype, shape, *args, **kwargs):
        self._shape = shape
        self._dtype = dtype
        self._selected_filter = None
        if backend_options is None:
            backend_options = self.default_options

        for filter_attr in self._avail_filters:
            with suppress((KeyError, ValueError)):
                setattr(self, filter_attr, backend_options)
                self._selected_filter = filter_attr
                break
        else:  # N.B. for-else loop (ie. "no-break")
            raise ValueError(f'Invalid backend_options {backend_options}')
        self._verify_data_nbytes_larger_than_clib_min()

    def _verify_data_nbytes_larger_than_clib_min(self):
        """blosc clib should not be used if data buffer size < 16 bytes.

        Raises
        ------
        ValueError:
            if the data size is not valid for the clib
        """
        if self._selected_filter in ['_blosc', None]:
            num_items = np.prod(self._shape)
            itemsize = np.dtype(self._dtype).itemsize
            nbytes = itemsize * num_items
            if nbytes <= 16:
                raise ValueError(f'blosc clib requires data buffer size > 16 bytes')

    @property
    def default_options(self):
        if 'blosc' in hdf5plugin.FILTERS:
            try:
                self._verify_data_nbytes_larger_than_clib_min()
                return {'complib': 'blosc:lz4hc', 'complevel': 5, 'shuffle': 'byte'}
            except ValueError:
                pass
        return {'complib': 'lzf', 'complevel': None, 'shuffle': True}

    @property
    def backend_options(self):
        return getattr(self, self._selected_filter)

    @property
    def init_requires(self):
        return ('repo_path', 'schema_shape', 'schema_dtype')


HDF5_01_MapTypes = MutableMapping[str, Union[h5py.File, Callable[[], h5py.File]]]


class HDF5_01_FileHandles(object):
    """Manage HDF5 file handles.

    When in SWMR-write mode, no more than a single file handle can be in the
    "writeable" state. This is an issue where multiple columns may need to
    write to the same column schema.
    """

    def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype):
        self.path: Path = repo_path
        self.schema_shape: tuple = schema_shape
        self.schema_dtype: np.dtype = schema_dtype
        self._dflt_backend_opts: Optional[dict] = None

        self.rFp: HDF5_01_MapTypes = {}
        self.wFp: HDF5_01_MapTypes = {}
        self.Fp: HDF5_01_MapTypes = ChainMap(self.rFp, self.wFp)
        self.rDatasets = SizedDict(maxsize=100)
        self.wdset: h5py.Dataset = None

        self.mode: Optional[str] = None
        self.hIdx: Optional[int] = None
        self.w_uid: Optional[str] = None
        self.hMaxSize: Optional[int] = None
        self.hNextPath: Optional[int] = None
        self.hColsRemain: Optional[int] = None

        self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode)
        self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode)
        self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True)

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        if self.w_uid in self.wFp:
            self.wFp[self.w_uid]['/'].attrs.modify('next_location', (self.hNextPath, self.hIdx))
            self.wFp[self.w_uid]['/'].attrs.modify('collections_remaining', self.hColsRemain)
            self.wFp[self.w_uid].flush()

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        self.close()
        state = self.__dict__.copy()
        del state['rFp']
        del state['wFp']
        del state['Fp']
        del state['rDatasets']
        del state['wdset']
        return state

    def __setstate__(self, state: dict) -> None:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.__dict__.update(state)
        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)
        self.rDatasets = {}
        self.wdset = None
        self.open(mode=self.mode)

    @property
    def backend_opts(self):
        return self._dflt_backend_opts

    @writer_checkout_only
    def _backend_opts_set(self, val):
        """Nonstandard descriptor method. See notes in ``backend_opts.setter``.
        """
        self._dflt_backend_opts = val
        return

    @backend_opts.setter
    def backend_opts(self, value):
        """
        Using seperate setter method (with ``@writer_checkout_only`` decorator
        applied) due to bug in python <3.8.

        From: https://bugs.python.org/issue19072
            > The classmethod decorator when applied to a function of a class,
            > does not honour the descriptor binding protocol for whatever it
            > wraps. This means it will fail when applied around a function which
            > has a decorator already applied to it and where that decorator
            > expects that the descriptor binding protocol is executed in order
            > to properly bind the function to the class.
        """
        return self._backend_opts_set(value)

    def open(self, mode: str, *, remote_operation: bool = False):
        """Open an hdf5 file handle in the Handler Singleton

        Parameters
        ----------
        mode : str
            one of `r` or `a` for read only / read-write.
        remote_operation : optional, kwarg only, bool
            if this hdf5 data is being created from a remote fetch operation, then
            we don't open any files for reading, and only open files for writing
            which exist in the remote data dir. (default is false, which means that
            write operations use the stage data dir and read operations use data store
            dir)
        """
        self.mode = mode
        if self.mode == 'a':
            process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
            process_dir.mkdir(exist_ok=True)
            for uidpth in process_dir.iterdir():
                if uidpth.suffix == '.hdf5':
                    file_pth = self.DATADIR.joinpath(uidpth.name)
                    self.rFp[uidpth.stem] = partial(
                        h5py.File, file_pth, 'r', swmr=True, libver='latest')

        if not remote_operation:
            if not self.STOREDIR.is_dir():
                return
            for uidpth in self.STOREDIR.iterdir():
                if uidpth.suffix == '.hdf5':
                    file_pth = self.DATADIR.joinpath(uidpth.name)
                    self.rFp[uidpth.stem] = partial(
                        h5py.File, file_pth, 'r', swmr=True, libver='latest')

    def close(self):
        """Close a file handle after writes have been completed

        behavior changes depending on write-enable or read-only file

        Returns
        -------
        bool
            True if success, otherwise False.
        """
        if self.mode == 'a':
            if self.w_uid in self.wFp:
                self.wFp[self.w_uid]['/'].attrs.modify('next_location', (self.hNextPath, self.hIdx))
                self.wFp[self.w_uid]['/'].attrs.modify('collections_remaining', self.hColsRemain)
                self.wFp[self.w_uid].flush()
            for uid in list(self.wFp.keys()):
                with suppress(AttributeError):
                    self.wFp[uid].close()
                del self.wFp[uid]
            self.wdset = None
            self.hMaxSize = None
            self.hNextPath = None
            self.hIdx = None
            self.hColsRemain = None
            self.w_uid = None

        for uid in list(self.rFp.keys()):
            with suppress(AttributeError):
                self.rFp[uid].close()
            del self.rFp[uid]
        self.rDatasets = {}

    @staticmethod
    def delete_in_process_data(repo_path: Path, *, remote_operation=False) -> None:
        """Removes some set of files entirely from the stage/remote directory.

        DANGER ZONE. This should essentially only be used to perform hard resets
        of the repository state.

        Parameters
        ----------
        repo_path : Path
            path to the repository on disk
        remote_operation : optional, kwarg only, bool
            If true, modify contents of the remote_dir, if false (default) modify
            contents of the staging directory.
        """
        data_dir = Path(repo_path, DIR_DATA, _FmtCode)
        PDIR = DIR_DATA_STAGE if not remote_operation else DIR_DATA_REMOTE
        process_dir = Path(repo_path, PDIR, _FmtCode)
        if not process_dir.is_dir():
            return

        for uidpth in process_dir.iterdir():
            if uidpth.suffix == '.hdf5':
                os.remove(process_dir.joinpath(uidpth.name))
                os.remove(data_dir.joinpath(uidpth.name))
        os.rmdir(process_dir)

    @staticmethod
    def _dataset_opts(complib: str, complevel: int, shuffle: Union[bool, str]) -> dict:
        """specify compression options for the hdf5 dataset.

        .. seealso:: :function:`_blosc_opts`

        to enable blosc compression, use the conda-forge `blosc-hdf5-plugin` package.

        .. seealso::

        * https://github.com/conda-forge/staged-recipes/pull/7650
        * https://github.com/h5py/h5py/issues/611

        Parameters
        ----------
        complib : str
            the compression lib to use, one of ['lzf', 'gzip', 'blosc:blosclz',
            'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib', 'blosc:zstd']
        complevel : int
            compression level to specify (accepts values [0, 9] for all except 'lzf'
            where no complevel is accepted)
        shuffle : bool
            if True or `byte`, enable byte shuffle filter, if blosc
            compression, pass through 'bits' is accepted as well. False, or
            None indicates no shuffle should be applied.
        """
        # ---- blosc hdf5 plugin filters ----
        _blosc_compression = {
            'blosc:blosclz': 0,
            'blosc:lz4': 1,
            'blosc:lz4hc': 2,
            # Not built 'snappy': 3,
            'blosc:zlib': 4,
            'blosc:zstd': 5}
        _blosc_shuffle = {None: 0, 'none': 0, 'byte': 1, 'bit': 2}
        _blosc_complevel = {**{i: i for i in range(10)}, None: 9, 'none': 9}

        # ---- h5py built in filters ----
        _lzf_gzip_shuffle = {None: False, False: False, 'none': False, True: True, 'byte': True}
        _lzf_complevel = {False: None, None: None, 'none': None}
        _gzip_complevel = {**{i: i for i in range(10)}, None: 4, 'none': 4}

        if complib.startswith('blosc'):
            args = {
                'compression': 32001,
                'compression_opts': (
                    0, 0, 0, 0,
                    _blosc_complevel[complevel],
                    _blosc_shuffle[shuffle],
                    _blosc_compression[complib]),
                'shuffle': False}
        elif complib == 'lzf':
            args = {
                'shuffle': _lzf_gzip_shuffle[shuffle],
                'compression': complib,
                'compression_opts': _lzf_complevel[complevel]}
        elif complib == 'gzip':
            args = {
                'shuffle': _lzf_gzip_shuffle[shuffle],
                'compression': complib,
                'compression_opts': _gzip_complevel[complevel]}
        elif complib in (None, False, 'none'):
            args = {
                'shuffle': False,
                'compression': None,
                'compression_opts': None}
        else:
            raise ValueError(f'unknown value for opt arg `complib`: {complib}')
        return args

    def _create_schema(self, *, remote_operation: bool = False):
        """stores the shape and dtype as the schema of a column.

        Parameters
        ----------
        remote_operation : optional, kwarg only, bool
            if this schema is being created from a remote fetch operation, then do not
            place the file symlink in the staging directory. Instead symlink it
            to a special remote staging directory. (default is False, which places the
            symlink in the stage data directory.)

        Notes
        -----

        Parameters set for raw-data-chunk-cache (rdcc) values:

        * rdcc_nbytes: sets the total size (measured in bytes) of the raw data chunk
          cache for each dataset. This should be set to the size of each chunk times
          the number of chunks that are likely to be needed in cache.
        * rdcc_w0: sets the policy for chunks to be removed from the cache when more
          space is needed. If set to 0, always evict the least recently used chunk in
          cache. If set to 1, always evict the least recently used chunk which has
          been fully read or written. If the value is between 0 and 1, the behavior
          will be a blend of the two.
        * rdcc_nslots: The number of chunk slots in the cache for this entire file.
          In order for quick lookup, a hash map is used for each chunk value. For
          maximum performance, this value should be set approximately 100 times that
          number of chunks.

        .. seealso::

            http://docs.h5py.org/en/stable/high/file.html#chunk-cache

        """

        # -------------------- Chunk & RDCC Vals ------------------------------
        schema_shape = self.schema_shape
        itemsize = np.dtype(self.schema_dtype).itemsize
        expectedrows = COLLECTION_SIZE * COLLECTION_COUNT
        maindim = 0

        chunk_shape = calc_chunkshape(schema_shape, expectedrows, itemsize, maindim)
        if chunk_shape == (1,) and schema_shape == ():
            schema_shape = (1,)
        req_chunks_per_dim = [math.ceil(i / j) for i, j in zip(schema_shape, chunk_shape)]
        req_shape = [i * j for i, j in zip(req_chunks_per_dim, chunk_shape)]
        chunk_nbytes = np.prod(chunk_shape) * itemsize
        nchunks = np.prod(req_chunks_per_dim)

        rdcc_nbytes_val = chunk_nbytes * nchunks * COLLECTION_SIZE
        if rdcc_nbytes_val >= CHUNK_MAX_RDCC_NBYTES:
            rdcc_nbytes_val = CHUNK_MAX_RDCC_NBYTES
        rdcc_nslots_guess = nchunks * expectedrows * 100
        rdcc_nslots_prime_val = find_next_prime(rdcc_nslots_guess)

        # ---------------------------- File Creation --------------------------

        uid = random_string()
        file_path = Path(self.DATADIR, f'{uid}.hdf5')
        self.wFp[uid] = h5py.File(file_path,
                                  mode='w',
                                  libver='latest',
                                  rdcc_nbytes=rdcc_nbytes_val,
                                  rdcc_w0=CHUNK_RDCC_W0,
                                  rdcc_nslots=rdcc_nslots_prime_val)
        self.w_uid = uid
        self.wdset = None
        self.hNextPath = 0
        self.hIdx = 0
        self.hColsRemain = COLLECTION_COUNT
        self.hMaxSize = COLLECTION_SIZE

        process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
        Path(process_dir, f'{uid}.hdf5').touch()

        # ----------------------- Dataset Creation ----------------------------

        optKwargs = self._dataset_opts(**self._dflt_backend_opts)
        for dset_num in range(COLLECTION_COUNT):
            self.wFp[uid].create_dataset(
                f'/{dset_num}',
                shape=(COLLECTION_SIZE, *req_shape),
                dtype=self.schema_dtype,
                chunks=(1, *chunk_shape),
                **optKwargs)

        # ---------------------- Attribute Config Vals ------------------------

        self.wFp[self.w_uid]['/'].attrs['HANGAR_VERSION'] = __version__
        self.wFp[self.w_uid]['/'].attrs['schema_shape'] = self.schema_shape
        self.wFp[self.w_uid]['/'].attrs['schema_dtype_num'] = np.dtype(self.schema_dtype).num
        self.wFp[self.w_uid]['/'].attrs['next_location'] = (0, 0)
        self.wFp[self.w_uid]['/'].attrs['collection_max_size'] = COLLECTION_SIZE
        self.wFp[self.w_uid]['/'].attrs['collection_total'] = COLLECTION_COUNT
        self.wFp[self.w_uid]['/'].attrs['collections_remaining'] = COLLECTION_COUNT
        self.wFp[self.w_uid]['/'].attrs['rdcc_nbytes'] = rdcc_nbytes_val
        self.wFp[self.w_uid]['/'].attrs['rdcc_w0'] = CHUNK_RDCC_W0
        self.wFp[self.w_uid]['/'].attrs['rdcc_nslots'] = rdcc_nslots_prime_val
        self.wFp[self.w_uid]['/'].attrs['chunk_shape'] = chunk_shape
        if optKwargs['compression_opts'] is not None:
            self.wFp[self.w_uid]['/'].attrs['compression_opts'] = optKwargs['compression_opts']
        else:
            self.wFp[self.w_uid]['/'].attrs['compression_opts'] = False

        self.wFp[self.w_uid].flush()
        try:
            self.wFp[self.w_uid].swmr_mode = True
        except ValueError:
            assert self.wFp[self.w_uid].swmr_mode is True
        self.wdset = self.wFp[self.w_uid][f'/{self.hNextPath}']

    def read_data(self, hashVal: HDF5_01_DataHashSpec) -> np.ndarray:
        """Read data from an hdf5 file handle at the specified locations

        Parameters
        ----------
        hashVal : HDF5_01_DataHashSpec
            record specification parsed from its serialized store val in lmdb.

        Returns
        -------
        np.array
            requested data
        """
        dsetCol = f'/{hashVal.dataset}'
        srcSlc = (hashVal.dataset_idx, *[slice(0, dim) for dim in hashVal.shape])
        rdictkey = f'{hashVal.uid}{dsetCol}'

        if self.schema_dtype:  # if is not None
            destArr = np.empty(hashVal.shape, self.schema_dtype)
            if rdictkey in self.rDatasets:
                self.rDatasets[rdictkey].read_direct(destArr, srcSlc, None)
            else:
                try:
                    self.Fp[hashVal.uid][dsetCol].read_direct(destArr, srcSlc, None)
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                except TypeError:
                    self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                    self.rDatasets[rdictkey].read_direct(destArr, srcSlc, None)
                except KeyError:
                    process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
                    if Path(process_dir, f'{hashVal.uid}.hdf5').is_file():
                        file_pth = self.DATADIR.joinpath(f'{hashVal.uid}.hdf5')
                        self.rFp[hashVal.uid] = h5py.File(file_pth, 'r', swmr=True, libver='latest')
                        self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                        self.rDatasets[rdictkey].read_direct(destArr, srcSlc, None)
                    else:
                        raise
        else:
            if rdictkey in self.rDatasets:
                destArr = self.rDatasets[rdictkey][srcSlc]
            else:
                try:
                    destArr = self.Fp[hashVal.uid][dsetCol][srcSlc]
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                except TypeError:
                    self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
                    destArr = self.Fp[hashVal.uid][dsetCol][srcSlc]
                    self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                except KeyError:
                    process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
                    if Path(process_dir, f'{hashVal.uid}.hdf5').is_file():
                        file_pth = self.DATADIR.joinpath(f'{hashVal.uid}.hdf5')
                        self.rFp[hashVal.uid] = h5py.File(file_pth, 'r', swmr=True, libver='latest')
                        destArr = self.Fp[hashVal.uid][dsetCol][srcSlc]
                        self.rDatasets[rdictkey] = self.Fp[hashVal.uid][dsetCol]
                    else:
                        raise

        if xxh64_hexdigest(destArr) != hashVal.checksum:
            # try casting to check if dtype does not match for all zeros case
            destArr = destArr.astype(np.typeDict[self.Fp[hashVal.uid]['/'].attrs['schema_dtype_num']])
            if xxh64_hexdigest(destArr) != hashVal.checksum:
                raise RuntimeError(
                    f'DATA CORRUPTION Checksum {xxh64_hexdigest(destArr)} != recorded {hashVal}')
        return destArr

    def write_data(self, array: np.ndarray, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        array : np.ndarray
            tensor to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        checksum = xxh64_hexdigest(array)
        if self.w_uid in self.wFp:
            self.hIdx += 1
            if self.hIdx >= self.hMaxSize:
                self.wdset.flush()
                self.hIdx = 0
                self.hNextPath += 1
                self.hColsRemain -= 1
                self.wdset = self.wFp[self.w_uid][f'/{self.hNextPath}']
                if self.hColsRemain <= 1:
                    self.wFp[self.w_uid]['/'].attrs.modify('next_location', (self.hNextPath, self.hIdx))
                    self.wFp[self.w_uid]['/'].attrs.modify('collections_remaining', self.hColsRemain)
                    self.wFp[self.w_uid].flush()
                    self._create_schema(remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)

        destSlc = (self.hIdx, *[slice(0, dim) for dim in array.shape])
        self.wdset.write_direct(array, None, destSlc)
        self.wdset.flush()
        res = hdf5_01_encode(self.w_uid, checksum, self.hNextPath, self.hIdx, array.shape)
        return res


================================================
FILE: src/hangar/backends/lmdb_30.py
================================================
"""Local LMDB Backend Implementation, Identifier: ``LMDB_30``

Backend Identifiers
===================

*  Backend: ``3``
*  Version: ``0``
*  Format Code: ``30``
*  Canonical Name: ``LMDB_30``

Storage Method
==============

*  This module is meant to handle string typed data which is of any size. IO
   is performed via the LMDB storage system.

*  This module does not compress values upon writing, the full (uncompressed)
   value of the text is written to the DB for each key.

*  For each LMDB file generated, data is indexed by keys which are generated
   in lexicographically sorted order of key length 4. Keys consist of 4 characters
   chosen from an alphabet consisting of ASCII digits, lowercase letters, and
   upercase letters. Within a single write instance (when an LMDB file is created
   and written to), lexicographically sorted permutations of the chosen characters
   are used as key indexes.

   This means that for each LMDB file written in a repo, the sequence of generated
   index keys will be identical, even though two databases with the same key will
   store different values. As such, the File UID is crucial in order to identify
   a unique db/index key combo to access a particular value by.

*  There is no limit to the size which each record can occupy. Data is stored
   "as-is" and is uncompressed. Reading the data back will return the exact
   data stored (regardless of how large the data record is).

*  On read and write of all samples the xxhash64_hexdigest is calculated for
   the raw data bytes. This is to ensure that all data in == data out of the
   lmdb files. That way even if a file is manually edited we have a quick way
   to tell that things are not as they should be. (full data hash digests may
   not be calculated every time a read is performed).

Compression Options
===================

None

Record Format
=============

Fields Recorded for Each Array
------------------------------

*  Format Code
*  File UID
*  Row Index

Examples
--------

1)  Adding the first piece of data to a file:

    *  File UID: "rlUK3C"
    *  Row Index: "0123"
    *  xxhash64_hexdigest: 8067007c0f05c359

    ``Record Data => "30:rlUK3C:0123:8067007c0f05c359"``

2)  Adding a second piece of data:

    *  File UID: "rlUK3C"
    *  Row Index: "0124"
    *  xxhash64_hexdigest: b89f873d3d153a9c

    ``Record Data => "30:rlUK3C:0124:b89f873d3d153a9c"``

3)  Adding a the 500th piece of data:

    *  File UID: "rlUK3C"
    *  Row Index: "01AU"
    *  xxhash64_hexdigest: cf3fc53cad153a5a

    ``Record Data => "30:rlUK3C:01AU:cf3fc53cad153a5a"``
"""
import os
import shutil
import string
from collections import ChainMap
from contextlib import suppress
from functools import partial
from itertools import permutations
from pathlib import Path
from typing import Optional

import lmdb
from xxhash import xxh64_hexdigest

from .specs import LMDB_30_DataHashSpec
from ..constants import DIR_DATA_REMOTE, DIR_DATA_STAGE, DIR_DATA_STORE, DIR_DATA
from ..op_state import reader_checkout_only, writer_checkout_only
from ..utils import random_string
from ..typesystem import Descriptor, OneOf, EmptyDict, checkedmeta


LMDB_SETTINGS = {
    'map_size': 300_000_000,
    'meminit': False,
    'subdir': True,
    'lock': False,
    'max_spare_txns': 4,
}
_FmtCode = '30'


def _lexicographic_keys():
    lexicographic_ids = ''.join([
        string.digits,
        string.ascii_uppercase,
        string.ascii_lowercase,
    ])
    # permutations generates results in lexicographic order
    # total of 14_776_336 total ids can be generated with
    # a row_id consisting of 4 characters. This is more keys than
    # we will ever allow in a single LMDB database
    p = permutations(lexicographic_ids, 4)

    for perm in p:
        res = ''.join(perm)
        yield res


def lmdb_30_encode(uid: str, row_idx: int, checksum: str) -> bytes:
    res = f'30:{uid}:{row_idx}:{checksum}'
    return res.encode()


@OneOf(['', str])
class AllowedDtypes(Descriptor):
    pass


class LMDB_30_Options(metaclass=checkedmeta):
    _dtype = AllowedDtypes()
    _backend_options = EmptyDict()

    def __init__(self, backend_options, dtype, *args, **kwargs):
        if backend_options is None:
            backend_options = self.default_options
        self._backend_options = backend_options
        self._dtype = dtype

    @property
    def default_options(self):
        return {}

    @property
    def backend_options(self):
        return self._backend_options

    @property
    def init_requires(self):
        return ('repo_path',)


class LMDB_30_FileHandles:

    def __init__(self, repo_path: Path, *args, **kwargs):

        self.path: Path = repo_path

        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)

        self.mode: Optional[str] = None
        self.w_uid: Optional[str] = None
        self.row_idx: Optional[str] = None
        self._dflt_backend_opts: Optional[dict] = None

        self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode)
        self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True)

    def __enter__(self):

        return self

    def __exit__(self, *exc):
        return

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        self.close()
        state = self.__dict__.copy()
        del state['rFp']
        del state['wFp']
        del state['Fp']
        return state

    def __setstate__(self, state: dict) -> None:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.__dict__.update(state)
        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)

    @property
    def backend_opts(self):
        return self._dflt_backend_opts

    @writer_checkout_only
    def _backend_opts_set(self, val):
        """Nonstandard descriptor method. See notes in ``backend_opts.setter``.
        """
        self._dflt_backend_opts = val
        return

    @backend_opts.setter
    def backend_opts(self, value):
        """
        Using seperate setter method (with ``@writer_checkout_only`` decorator
        applied) due to bug in python <3.8.

        From: https://bugs.python.org/issue19072
            > The classmethod decorator when applied to a function of a class,
            > does not honour the descriptor binding protocol for whatever it
            > wraps. This means it will fail when applied around a function which
            > has a decorator already applied to it and where that decorator
            > expects that the descriptor binding protocol is executed in order
            > to properly bind the function to the class.
        """
        return self._backend_opts_set(value)

    def open(self, mode: str, *, remote_operation: bool = False):
        """Open an lmdb file handle.

        Parameters
        ----------
        mode : str
            one of `r` or `a` for read only / read-write.
        remote_operation : optional, kwarg only, bool
            if this lmdb data is being created from a remote fetch operation, then
            we don't open any files for reading, and only open files for writing
            which exist in the remote data dir. (default is false, which means that
            write operations use the stage data dir and read operations use data store
            dir)
        """
        self.mode = mode
        if self.mode == 'a':
            process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
            process_dir.mkdir(exist_ok=True)
            for uidpth in process_dir.iterdir():
                if uidpth.suffix == '.lmdbdir':
                    file_pth = self.DATADIR.joinpath(uidpth.stem)
                    self.rFp[uidpth.stem] = partial(lmdb.open, str(file_pth), readonly=True,
                                                    **LMDB_SETTINGS)

        if not remote_operation:
            if not self.STOREDIR.is_dir():
                return
            for uidpth in self.STOREDIR.iterdir():
                if uidpth.suffix == '.lmdbdir':
                    file_pth = self.DATADIR.joinpath(uidpth.stem)
                    self.rFp[uidpth.stem] = partial(lmdb.open, str(file_pth), readonly=True,
                                                    **LMDB_SETTINGS)

    def close(self):
        """Close a file handle after writes have been completed

        behavior changes depending on write-enable or read-only file

        Returns
        -------
        bool
            True if success, otherwise False.
        """
        if self.mode == 'a':
            for uid in list(self.wFp.keys()):
                with suppress(AttributeError):
                    self.wFp[uid].close()
                del self.wFp[uid]
            self.w_uid = None
            self.row_idx = None

        for uid in list(self.rFp.keys()):
            with suppress(AttributeError):
                self.rFp[uid].close()
            del self.rFp[uid]

    @staticmethod
    def delete_in_process_data(repo_path: Path, *, remote_operation=False) -> None:
        """Removes some set of files entirely from the stage/remote directory.

        DANGER ZONE. This should essentially only be used to perform hard resets
        of the repository state.

        Parameters
        ----------
        repo_path : Path
            path to the repository on disk
        remote_operation : optional, kwarg only, bool
            If true, modify contents of the remote_dir, if false (default) modify
            contents of the staging directory.
        """
        data_dir = Path(repo_path, DIR_DATA, _FmtCode)
        PDIR = DIR_DATA_STAGE if not remote_operation else DIR_DATA_REMOTE
        process_dir = Path(repo_path, PDIR, _FmtCode)
        if not process_dir.is_dir():
            return

        for uidpth in process_dir.iterdir():
            if uidpth.suffix == '.lmdbdir':
                os.remove(process_dir.joinpath(uidpth.name))
                db_dir = data_dir.joinpath(uidpth.stem)
                shutil.rmtree(str(db_dir))
        os.rmdir(process_dir)

    def _create_schema(self, *, remote_operation: bool = False):

        uid = random_string()
        db_dir_path = self.DATADIR.joinpath(f'{uid}')
        self.wFp[uid] = lmdb.open(str(db_dir_path), **LMDB_SETTINGS)

        self.w_uid = uid
        self.row_idx = _lexicographic_keys()

        process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
        Path(process_dir, f'{uid}.lmdbdir').touch()

    def read_data(self, hashVal: LMDB_30_DataHashSpec) -> str:
        """Read data from an hdf5 file handle at the specified locations

        Parameters
        ----------
        hashVal : LMDB_30_DataHashSpec
            record specification parsed from its serialized store val in lmdb.

        Returns
        -------
        str
            requested data.
        """
        try:
            with self.Fp[hashVal.uid].begin(write=False) as txn:
                res = txn.get(hashVal.row_idx.encode(), default=False)
                if res is False:
                    raise RuntimeError(hashVal)
        except AttributeError:
            self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
            return self.read_data(hashVal)
        except KeyError:
            process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
            if Path(process_dir, f'{hashVal.uid}.lmdbdir').is_file():
                file_pth = self.DATADIR.joinpath(hashVal.uid)
                self.rFp[hashVal.uid] = lmdb.open(str(file_pth), readonly=True, **LMDB_SETTINGS)
                return self.read_data(hashVal)
            else:
                raise

        out = res.decode()
        if xxh64_hexdigest(res) != hashVal.checksum:
            raise RuntimeError(
                f'DATA CORRUPTION Checksum {xxh64_hexdigest(res)} != recorded {hashVal}')
        return out

    def write_data(self, data: str, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: str
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        encoded_data = data.encode()
        checksum = xxh64_hexdigest(encoded_data)

        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, encoded_data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_30_encode(self.w_uid, row_idx, checksum)


================================================
FILE: src/hangar/backends/lmdb_31.py
================================================
"""Local LMDB Backend Implementation, Identifier: ``LMDB_30``

Backend Identifiers
===================

*  Backend: ``3``
*  Version: ``1``
*  Format Code: ``31``
*  Canonical Name: ``LMDB_31``

Storage Method
==============

*  This module is meant to handle bbytes typed data which is of any size.
   less than 2MB per value. IO is performed via the LMDB storage system.

*  This module does not compress values upon writing, the full (uncompressed)
   value of the text is written to the DB for each key.

*  For each LMDB file generated, data is indexed by keys which are generated
   in lexicographically sorted order of key length 4. Keys consist of 4 characters
   chosen from an alphabet consisting of ASCII digits, lowercase letters, and
   upercase letters. Within a single write instance (when an LMDB file is created
   and written to), lexicographically sorted permutations of the chosen characters
   are used as key indexes.

   This means that for each LMDB file written in a repo, the sequence of generated
   index keys will be identical, even though two databases with the same key will
   store different values. As such, the File UID is crucial in order to identify
   a unique db/index key combo to access a particular value by.

*  There is no limit to the size which each record can occupy. Data is stored
   "as-is" and is uncompressed. Reading the data back will return the exact
   data stored (regardless of how large the data record is).

*  On read and write of all samples the xxhash64_hexdigest is calculated for
   the raw data bytes. This is to ensure that all data in == data out of the
   lmdb files. That way even if a file is manually edited we have a quick way
   to tell that things are not as they should be. (full data hash digests may
   not be calculated every time a read is performed).

Compression Options
===================

None

Record Format
=============

Fields Recorded for Each Array
------------------------------

*  Format Code
*  File UID
*  Row Index

Examples
--------

1)  Adding the first piece of data to a file:

    *  File UID: "rlUK3C"
    *  Row Index: "0123"
    *  xxhash64_hexdigest: 8067007c0f05c359

    ``Record Data => "31:rlUK3C:0123:8067007c0f05c359"``

2)  Adding a second piece of data:

    *  File UID: "rlUK3C"
    *  Row Index: "0124"
    *  xxhash64_hexdigest: b89f873d3d153a9c

    ``Record Data => "31:rlUK3C:0124:b89f873d3d153a9c"``

3)  Adding a the 500th piece of data:

    *  File UID: "rlUK3C"
    *  Row Index: "01AU"
    *  xxhash64_hexdigest: cf3fc53cad153a5a

    ``Record Data => "31:rlUK3C:01AU:cf3fc53cad153a5a"``
"""
import os
import shutil
import string
from collections import ChainMap
from contextlib import suppress
from functools import partial
from itertools import permutations
from pathlib import Path
from typing import Optional

import lmdb
from xxhash import xxh64_hexdigest

from .specs import LMDB_31_DataHashSpec
from ..constants import DIR_DATA_REMOTE, DIR_DATA_STAGE, DIR_DATA_STORE, DIR_DATA
from ..op_state import reader_checkout_only, writer_checkout_only
from ..utils import random_string
from ..typesystem import Descriptor, OneOf, EmptyDict, checkedmeta


LMDB_SETTINGS = {
    'map_size': 300_000_000,
    'meminit': False,
    'subdir': True,
    'lock': False,
    'max_spare_txns': 4,
}
_FmtCode = '31'


def _lexicographic_keys():
    lexicographic_ids = ''.join([
        string.digits,
        string.ascii_uppercase,
        string.ascii_lowercase,
    ])
    # permutations generates results in lexicographic order
    # total of 14_776_336 total ids can be generated with
    # a row_id consisting of 4 characters. This is more keys than
    # we will ever allow in a single LMDB database
    p = permutations(lexicographic_ids, 4)

    for perm in p:
        res = ''.join(perm)
        yield res


def lmdb_31_encode(uid: str, row_idx: int, checksum: str) -> bytes:
    res = f'31:{uid}:{row_idx}:{checksum}'
    return res.encode()


@OneOf(['', bytes])
class AllowedDtypes(Descriptor):
    pass


class LMDB_31_Options(metaclass=checkedmeta):
    _dtype = AllowedDtypes()
    _backend_options = EmptyDict()

    def __init__(self, backend_options, dtype, *args, **kwargs):
        if backend_options is None:
            backend_options = self.default_options
        self._backend_options = backend_options
        self._dtype = dtype

    @property
    def default_options(self):
        return {}

    @property
    def backend_options(self):
        return self._backend_options

    @property
    def init_requires(self):
        return ('repo_path',)


class LMDB_31_FileHandles:

    def __init__(self, repo_path: Path, *args, **kwargs):

        self.path: Path = repo_path

        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)

        self.mode: Optional[str] = None
        self.w_uid: Optional[str] = None
        self.row_idx: Optional[str] = None
        self._dflt_backend_opts: Optional[dict] = None

        self.STAGEDIR: Path = Path(self.path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.path, DIR_DATA_REMOTE, _FmtCode)
        self.STOREDIR: Path = Path(self.path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR: Path = Path(self.path, DIR_DATA, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True)

    def __enter__(self):

        return self

    def __exit__(self, *exc):
        return

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        self.close()
        state = self.__dict__.copy()
        del state['rFp']
        del state['wFp']
        del state['Fp']
        return state

    def __setstate__(self, state: dict) -> None:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.__dict__.update(state)
        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)

    @property
    def backend_opts(self):
        return self._dflt_backend_opts

    @writer_checkout_only
    def _backend_opts_set(self, val):
        """Nonstandard descriptor method. See notes in ``backend_opts.setter``.
        """
        self._dflt_backend_opts = val
        return

    @backend_opts.setter
    def backend_opts(self, value):
        """
        Using seperate setter method (with ``@writer_checkout_only`` decorator
        applied) due to bug in python <3.8.

        From: https://bugs.python.org/issue19072
            > The classmethod decorator when applied to a function of a class,
            > does not honour the descriptor binding protocol for whatever it
            > wraps. This means it will fail when applied around a function which
            > has a decorator already applied to it and where that decorator
            > expects that the descriptor binding protocol is executed in order
            > to properly bind the function to the class.
        """
        return self._backend_opts_set(value)

    def open(self, mode: str, *, remote_operation: bool = False):
        """Open an lmdb file handle.

        Parameters
        ----------
        mode : str
            one of `r` or `a` for read only / read-write.
        remote_operation : optional, kwarg only, bool
            if this lmdb data is being created from a remote fetch operation, then
            we don't open any files for reading, and only open files for writing
            which exist in the remote data dir. (default is false, which means that
            write operations use the stage data dir and read operations use data store
            dir)
        """
        self.mode = mode
        if self.mode == 'a':
            process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
            process_dir.mkdir(exist_ok=True)
            for uidpth in process_dir.iterdir():
                if uidpth.suffix == '.lmdbdir':
                    file_pth = self.DATADIR.joinpath(uidpth.stem)
                    self.rFp[uidpth.stem] = partial(
                        lmdb.open, str(file_pth), readonly=True, **LMDB_SETTINGS)

        if not remote_operation:
            if not self.STOREDIR.is_dir():
                return
            for uidpth in self.STOREDIR.iterdir():
                if uidpth.suffix == '.lmdbdir':
                    file_pth = self.DATADIR.joinpath(uidpth.stem)
                    self.rFp[uidpth.stem] = partial(
                        lmdb.open, str(file_pth), readonly=True, **LMDB_SETTINGS)

    def close(self):
        """Close a file handle after writes have been completed

        behavior changes depending on write-enable or read-only file

        Returns
        -------
        bool
            True if success, otherwise False.
        """
        if self.mode == 'a':
            for uid in list(self.wFp.keys()):
                with suppress(AttributeError):
                    self.wFp[uid].close()
                del self.wFp[uid]
            self.w_uid = None
            self.row_idx = None

        for uid in list(self.rFp.keys()):
            with suppress(AttributeError):
                self.rFp[uid].close()
            del self.rFp[uid]

    @staticmethod
    def delete_in_process_data(repo_path: Path, *, remote_operation=False) -> None:
        """Removes some set of files entirely from the stage/remote directory.

        DANGER ZONE. This should essentially only be used to perform hard resets
        of the repository state.

        Parameters
        ----------
        repo_path : Path
            path to the repository on disk
        remote_operation : optional, kwarg only, bool
            If true, modify contents of the remote_dir, if false (default) modify
            contents of the staging directory.
        """
        data_dir = Path(repo_path, DIR_DATA, _FmtCode)
        PDIR = DIR_DATA_STAGE if not remote_operation else DIR_DATA_REMOTE
        process_dir = Path(repo_path, PDIR, _FmtCode)
        if not process_dir.is_dir():
            return

        for uidpth in process_dir.iterdir():
            if uidpth.suffix == '.lmdbdir':
                os.remove(process_dir.joinpath(uidpth.name))
                db_dir = data_dir.joinpath(uidpth.stem)
                shutil.rmtree(str(db_dir))
        os.rmdir(process_dir)

    def _create_schema(self, *, remote_operation: bool = False):

        uid = random_string()
        db_dir_path = self.DATADIR.joinpath(f'{uid}')
        self.wFp[uid] = lmdb.open(str(db_dir_path), **LMDB_SETTINGS)

        self.w_uid = uid
        self.row_idx = _lexicographic_keys()

        process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
        Path(process_dir, f'{uid}.lmdbdir').touch()

    def read_data(self, hashVal: LMDB_31_DataHashSpec) -> str:
        """Read data from an hdf5 file handle at the specified locations

        Parameters
        ----------
        hashVal : LMDB_31_DataHashSpec
            record specification parsed from its serialized store val in lmdb.

        Returns
        -------
        str
            requested data.
        """
        try:
            with self.Fp[hashVal.uid].begin(write=False) as txn:
                res = txn.get(hashVal.row_idx.encode(), default=False)
                if res is False:
                    raise RuntimeError(hashVal)
        except AttributeError:
            self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
            return self.read_data(hashVal)
        except KeyError:
            process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
            if Path(process_dir, f'{hashVal.uid}.lmdbdir').is_file():
                file_pth = self.DATADIR.joinpath(hashVal.uid)
                self.rFp[hashVal.uid] = lmdb.open(str(file_pth), readonly=True, **LMDB_SETTINGS)
                return self.read_data(hashVal)
            else:
                raise

        if xxh64_hexdigest(res) != hashVal.checksum:
            raise RuntimeError(
                f'DATA CORRUPTION Checksum {xxh64_hexdigest(res)} != recorded {hashVal}')
        return res

    def write_data(self, data: bytes, *, remote_operation: bool = False) -> bytes:
        """verifies correctness of array data and performs write operation.

        Parameters
        ----------
        data: bytes
            data to write to group.
        remote_operation : optional, kwarg only, bool
            If this is a remote process which is adding data, any necessary
            hdf5 dataset files will be created in the remote data dir instead
            of the stage directory. (default is False, which is for a regular
            access process)

        Returns
        -------
        bytes
            string identifying the collection dataset and collection dim-0 index
            which the array can be accessed at.
        """
        checksum = xxh64_hexdigest(data)
        if self.w_uid in self.wFp:
            try:
                row_idx = next(self.row_idx)
            except StopIteration:
                self._create_schema(remote_operation=remote_operation)
                return self.write_data(data, remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        encoded_row_idx = row_idx.encode()
        try:
            with self.wFp[self.w_uid].begin(write=True) as txn:
                txn.put(encoded_row_idx, data, append=True)
        except lmdb.MapFullError:
            self._create_schema(remote_operation=remote_operation)
            return self.write_data(data, remote_operation=remote_operation)

        return lmdb_31_encode(self.w_uid, row_idx, checksum)


================================================
FILE: src/hangar/backends/numpy_10.py
================================================
"""Local Numpy memmap Backend Implementation, Identifier: ``NUMPY_10``

Backend Identifiers
===================

*  Backend: ``1``
*  Version: ``0``
*  Format Code: ``10``
*  Canonical Name: ``NUMPY_10``

Storage Method
==============

* Data is written to specific subarray indexes inside a numpy memmapped array on disk.

* Each file is a zero-initialized array of

  *  ``dtype: {schema_dtype}``; ie ``np.float32`` or ``np.uint8``

  *  ``shape: (COLLECTION_SIZE, *{schema_shape})``; ie ``(500, 10)`` or ``(500,
     4, 3)``. The first index in the array is referred to as a "collection
     index".

Compression Options
===================

Does not accept any compression options. No compression is applied.

Record Format
=============

Fields Recorded for Each Array
------------------------------

*  Format Code
*  File UID
*  xxhash64_hexdigest
*  Collection Index (0:COLLECTION_SIZE subarray selection)
*  Subarray Shape

Examples
--------

1)  Adding the first piece of data to a file:

    *  Array shape (Subarray Shape): (10, 10)
    *  File UID: "K3ktxv"
    *  xxhash64_hexdigest: 94701dd9f32626e2
    *  Collection Index: 488

    ``Record Data =>  "10:K3ktxv:94701dd9f32626e2:488:10 10"``

2)  Adding to a piece of data to a the middle of a file:

    *  Array shape (Subarray Shape): (20, 2, 3)
    *  File UID: "Mk23nl"
    *  xxhash64_hexdigest: 1363344b6c051b29
    *  Collection Index: 199

    ``Record Data => "10:Mk23nl:1363344b6c051b29:199:20 2 3"``


Technical Notes
===============

*  A typical numpy memmap file persisted to disk does not retain information
   about its datatype or shape, and as such must be provided when re-opened
   after close. In order to persist a memmap in ``.npy`` format, we use the a
   special function ``open_memmap`` imported from ``np.lib.format`` which can
   open a memmap file and persist necessary header info to disk in ``.npy``
   format.

*  On each write, an ``xxhash64_hexdigest`` checksum is calculated. This is not
   for use as the primary hash algorithm, but rather stored in the local record
   format itself to serve as a quick way to verify no disk corruption occurred.
   This is required since numpy has no built in data integrity validation
   methods when reading from disk.
"""
import os
from collections import ChainMap
from functools import partial
from pathlib import Path
from typing import MutableMapping, Optional

import numpy as np
from numpy.lib.format import open_memmap
from xxhash import xxh64_hexdigest

from .specs import NUMPY_10_DataHashSpec
from ..constants import DIR_DATA_REMOTE, DIR_DATA_STAGE, DIR_DATA_STORE, DIR_DATA
from ..op_state import reader_checkout_only, writer_checkout_only
from ..utils import random_string
from ..typesystem import Descriptor, OneOf, EmptyDict, checkedmeta


# ----------------------------- Configuration ---------------------------------

_FmtCode = '10'

# number of subarray contents of a single numpy memmap file
COLLECTION_SIZE = 1000

# -------------------------------- Parser Implementation ----------------------


def numpy_10_encode(uid: str, cksum: str, collection_idx: int, shape: tuple) -> bytes:
    """converts the numpy data spect to an appropriate db value

    Parameters
    ----------
    uid : str
        file name (schema uid) of the np file to find this data piece in.
    cksum : int
        xxhash64_hexdigest checksum of the data as computed on that local machine.
    collection_idx : int
        collection first axis index in which this data piece resides.
    shape : tuple
        shape of the data sample written to the collection idx. ie: what
        subslices of the array should be read to retrieve the sample as
        recorded.

    Returns
    -------
    bytes
        hash data db value recording all input specifications
    """
    shape_str = " ".join([str(i) for i in shape])
    return f'10:{uid}:{cksum}:{collection_idx}:{shape_str}'.encode()


# ------------------------- Accessor Object -----------------------------------


@OneOf(list(map(lambda x: np.dtype(x).name, [
        np.bool, np.uint8, np.uint16, np.uint32, np.uint64, np.int8, np.int16,
        np.int32, np.int64, np.float16, np.float32, np.float64, np.longdouble])))
class AllowedDtypes(Descriptor):
    # Note. np.longdouble since np.float128 not guaranteed to be available on
    # all system. this is a particular issue with some windows numpy builds.
    pass


class NUMPY_10_Options(metaclass=checkedmeta):
    _dtype = AllowedDtypes()
    _backend_options = EmptyDict()

    def __init__(self, backend_options, dtype, *args, **kwargs):
        if backend_options is None:
            backend_options = self.default_options

        self._backend_options = backend_options
        self._dtype = dtype

    @property
    def default_options(self):
        return {}

    @property
    def backend_options(self):
        return self._backend_options

    @property
    def init_requires(self):
        return ('repo_path', 'schema_shape', 'schema_dtype')


class NUMPY_10_FileHandles(object):

    def __init__(self, repo_path: Path, schema_shape: tuple, schema_dtype: np.dtype):
        self.repo_path = repo_path
        self.schema_shape = schema_shape
        self.schema_dtype = schema_dtype
        self._dflt_backend_opts: Optional[dict] = None

        self.rFp: MutableMapping[str, np.memmap] = {}
        self.wFp: MutableMapping[str, np.memmap] = {}
        self.Fp = ChainMap(self.rFp, self.wFp)

        self.mode: str = None
        self.w_uid: str = None
        self.hIdx: int = None

        self.STAGEDIR: Path = Path(self.repo_path, DIR_DATA_STAGE, _FmtCode)
        self.REMOTEDIR: Path = Path(self.repo_path, DIR_DATA_REMOTE, _FmtCode)
        self.DATADIR: Path = Path(self.repo_path, DIR_DATA, _FmtCode)
        self.STOREDIR: Path = Path(self.repo_path, DIR_DATA_STORE, _FmtCode)
        self.DATADIR.mkdir(exist_ok=True)

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        self.close()
        state = self.__dict__.copy()
        del state['rFp']
        del state['wFp']
        del state['Fp']
        return state

    def __setstate__(self, state: dict) -> None:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.__dict__.update(state)
        self.rFp = {}
        self.wFp = {}
        self.Fp = ChainMap(self.rFp, self.wFp)
        self.open(mode=self.mode)

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        if self.w_uid in self.wFp:
            self.wFp[self.w_uid].flush()

    @property
    def backend_opts(self):
        return self._dflt_backend_opts

    @writer_checkout_only
    def _backend_opts_set(self, val):
        """Nonstandard descriptor method. See notes in ``backend_opts.setter``.
        """
        self._dflt_backend_opts = val
        return

    @backend_opts.setter
    def backend_opts(self, value):
        """
        Using seperate setter method (with ``@writer_checkout_only`` decorator
        applied) due to bug in python <3.8.

        From: https://bugs.python.org/issue19072
            > The classmethod decorator when applied to a function of a class,
            > does not honour the descriptor binding protocol for whatever it
            > wraps. This means it will fail when applied around a function which
            > has a decorator already applied to it and where that decorator
            > expects that the descriptor binding protocol is executed in order
            > to properly bind the function to the class.
        """
        return self._backend_opts_set(value)

    def open(self, mode: str, *, remote_operation: bool = False):
        """open numpy file handle coded directories

        Parameters
        ----------
        mode : str
            one of `a` for `write-enabled` mode or `r` for read-only
        remote_operation : bool, optional, kwarg only
            True if remote operations call this method. Changes the symlink
            directories used while writing., by default False
        """
        self.mode = mode
        if self.mode == 'a':
            process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
            process_dir.mkdir(exist_ok=True)
            for uidpth in process_dir.iterdir():
                if uidpth.suffix == '.npy':
                    file_pth = self.DATADIR.joinpath(uidpth.name)
                    self.rFp[uidpth.stem] = partial(open_memmap, file_pth, 'r')

        if not remote_operation:
            if not self.STOREDIR.is_dir():
                return
            for uidpth in self.STOREDIR.iterdir():
                if uidpth.suffix == '.npy':
                    file_pth = self.DATADIR.joinpath(uidpth.name)
                    self.rFp[uidpth.stem] = partial(open_memmap, file_pth, 'r')

    def close(self, *args, **kwargs):
        """Close any open file handles.
        """
        if self.mode == 'a':
            if self.w_uid in self.wFp:
                self.wFp[self.w_uid].flush()
                self.w_uid = None
                self.hIdx = None
            for k in list(self.wFp.keys()):
                del self.wFp[k]

        for k in list(self.rFp.keys()):
            del self.rFp[k]

    @staticmethod
    def delete_in_process_data(repo_path: Path, *, remote_operation: bool = False):
        """Removes some set of files entirely from the stage/remote directory.

        DANGER ZONE. This should essentially only be used to perform hard resets
        of the repository state.

        Parameters
        ----------
        repo_path : Path
            path to the repository on disk
        remote_operation : optional, kwarg only, bool
            If true, modify contents of the remote_dir, if false (default) modify
            contents of the staging directory.
        """
        data_dir = Path(repo_path, DIR_DATA, _FmtCode)
        pdir = DIR_DATA_STAGE if not remote_operation else DIR_DATA_REMOTE
        process_dir = Path(repo_path, pdir, _FmtCode)
        if not process_dir.is_dir():
            return

        for uidpth in process_dir.iterdir():
            if uidpth.suffix == '.npy':
                os.remove(process_dir.joinpath(uidpth.name))
                os.remove(data_dir.joinpath(uidpth.name))
        os.rmdir(process_dir)

    def _create_schema(self, *, remote_operation: bool = False):
        """stores the shape and dtype as the schema of a column.

        Parameters
        ----------
        remote_operation : optional, kwarg only, bool
            if this schema is being created from a remote fetch operation, then do not
            place the file symlink in the staging directory. Instead symlink it
            to a special remote staging directory. (default is False, which places the
            symlink in the stage data directory.)
        """
        uid = random_string()
        file_path = self.DATADIR.joinpath(f'{uid}.npy')
        m = open_memmap(file_path,
                        mode='w+',
                        dtype=self.schema_dtype,
                        shape=(COLLECTION_SIZE, *self.schema_shape))
        self.wFp[uid] = m
        self.w_uid = uid
        self.hIdx = 0

        process_dir = self.REMOTEDIR if remote_operation else self.STAGEDIR
        Path(process_dir, f'{uid}.npy').touch()

    def read_data(self, hashVal: NUMPY_10_DataHashSpec) -> np.ndarray:
        """Read data from disk written in the numpy_00 fmtBackend

        Parameters
        ----------
        hashVal : NUMPY_10_DataHashSpec
            record specification stored in the db

        Returns
        -------
        np.ndarray
            tensor data stored at the provided hashVal specification.

        Raises
        ------
        RuntimeError
            If the recorded checksum does not match the received checksum.

        Notes
        -----

        TO AVOID DATA LOSS / CORRUPTION:

        * On a read operation, we copy memmap subarray tensor data to a new
          `np.ndarray` instance so as to prevent writes on a raw memmap result
          slice (a `np.memmap` instance) from propogating to data on disk.

        * This is an issue for reads from a write-enabled checkout where data
          was just written, since the np flag "WRITEABLE" and "OWNDATA" will be
          true, and writes to the returned array would be overwrite that data
          slice on disk.

        * For read-only checkouts, modifications to the resultant array would
          perform a "copy on write"-like operation which would be propogated to
          all future reads of the subarray from that process, but which would
          not be persisted to disk.
        """
        srcSlc = (hashVal.collection_idx, *[slice(0, x) for x in hashVal.shape])
        try:
            res = self.Fp[hashVal.uid][srcSlc]
        except TypeError:
            self.Fp[hashVal.uid] = self.Fp[hashVal.uid]()
            res = self.Fp[hashVal.uid][srcSlc]
        except KeyError:
            process_dir = self.STAGEDIR if self.mode == 'a' else self.STOREDIR
            if Path(process_dir, f'{hashVal.uid}.npy').is_file():
                file_pth = self.DATADIR.joinpath(f'{hashVal.uid}.npy')
                self.rFp[hashVal.uid] = open_memmap(file_pth, 'r')
                res = self.Fp[hashVal.uid][srcSlc]
            else:
                raise

        out = np.array(res, dtype=res.dtype, order='C')
        if xxh64_hexdigest(out) != hashVal.checksum:
            raise RuntimeError(
                f'DATA CORRUPTION Checksum {xxh64_hexdigest(out)} != recorded {hashVal}')
        return out

    def write_data(self, array: np.ndarray, *, remote_operation: bool = False) -> bytes:
        """writes array data to disk in the numpy_00 fmtBackend

        Parameters
        ----------
        array : np.ndarray
            tensor to write to disk
        remote_operation : bool, optional, kwarg only
            True if writing in a remote operation, otherwise False. Default is
            False

        Returns
        -------
        bytes
            db hash record value specifying location information
        """
        checksum = xxh64_hexdigest(array)
        if self.w_uid in self.wFp:
            self.hIdx += 1
            if self.hIdx >= COLLECTION_SIZE:
                self.wFp[self.w_uid].flush()
                self._create_schema(remote_operation=remote_operation)
        else:
            self._create_schema(remote_operation=remote_operation)

        destSlc = (self.hIdx, *[slice(0, x) for x in array.shape])
        self.wFp[self.w_uid][destSlc] = array
        self.wFp[self.w_uid].flush()
        return numpy_10_encode(self.w_uid, checksum, self.hIdx, array.shape)


================================================
FILE: src/hangar/backends/remote_50.py
================================================
"""Remote server location unknown backend, Identifier: ``REMOTE_50``

Backend Identifiers
===================

*  Backend: ``5``
*  Version: ``0``
*  Format Code: ``50``
*  Canonical Name: ``REMOTE_50``

Storage Method
==============

*  This backend merely acts to record that there is some data sample with some
   ``hash`` and ``schema_shape`` present in the repository. It does not store the
   actual data on the local disk, but indicates that if it should be retrieved,
   you need to ask the remote hangar server for it. Once present on the local
   disk, the backend locating info will be updated with one of the `local` data
   backend specifications.

Record Format
=============

Fields Recorded for Each Array
------------------------------

*  Format Code
*  Schema Hash

Separators used
---------------

* ``SEP_KEY: ":"``

Examples
--------

1)  Adding the first piece of data to a file:

    *  Schema Hash: "ae43A21a"

    ``Record Data => '50:ae43A21a'``

1)  Adding to a piece of data to a the middle of a file:

    *  Schema Hash: "ae43A21a"

    ``Record Data => '50:ae43A21a'``

Technical Notes
===============

*  The schema_hash field is required in order to allow effective placement of
   actual retrieved data into suitable sized collections on a ``fetch-data()``
   operation
"""
from pathlib import Path
from typing import Optional

from .specs import REMOTE_50_DataHashSpec
from ..op_state import writer_checkout_only, reader_checkout_only
from ..typesystem import EmptyDict, checkedmeta


# -------------------------------- Parser Implementation ----------------------

_FmtCode = '50'


def remote_50_encode(schema_hash: str = '') -> bytes:
    """returns an db value saying that this hash exists somewhere on a remote

    Returns
    -------
    bytes
        hash data db value
    """
    return f'50:{schema_hash}'.encode()


# ------------------------- Accessor Object -----------------------------------


class REMOTE_50_Options(metaclass=checkedmeta):
    _backend_options = EmptyDict()

    def __init__(self, backend_options, *args, **kwargs):
        if backend_options is None:
            backend_options = self.default_options
        self._backend_options = backend_options

    @property
    def default_options(self):
        return {}

    @property
    def backend_options(self):
        return self._backend_options

    @property
    def init_requires(self):
        return ('repo_path',)


class REMOTE_50_Handler(object):

    def __init__(self, repo_path: Path, *args, **kwargs):
        self.repo_path = repo_path
        self._dflt_backend_opts: Optional[dict] = None
        self._mode: Optional[str] = None

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        return

    @reader_checkout_only
    def __getstate__(self) -> dict:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.close()
        state = self.__dict__.copy()
        return state

    def __setstate__(self, state: dict) -> None:  # pragma: no cover
        """ensure multiprocess operations can pickle relevant data.
        """
        self.__dict__.update(state)
        self.open(mode=self._mode)

    @property
    def backend_opts(self):
        return self._dflt_backend_opts

    @writer_checkout_only
    def _backend_opts_set(self, val):
        """Nonstandard descriptor method. See notes in ``backend_opts.setter``.
        """
        self._dflt_backend_opts = val
        return

    @backend_opts.setter
    def backend_opts(self, value):
        """
        Using seperate setter method (with ``@writer_checkout_only`` decorator
        applied) due to bug in python <3.8.

        From: https://bugs.python.org/issue19072
            > The classmethod decorator when applied to a function of a class,
            > does not honour the descriptor binding protocol for whatever it
            > wraps. This means it will fail when applied around a function which
            > has a decorator already applied to it and where that decorator
            > expects that the descriptor binding protocol is executed in order
            > to properly bind the function to the class.
        """
        return self._backend_opts_set(value)

    def open(self, mode, *args, **kwargs):
        self._mode = mode
        return

    def close(self, *args, **kwargs):
        return

    @staticmethod
    def delete_in_process_data(*args, **kwargs) -> None:
        """mockup of clearing staged directory for upstream calls.
        """
        return

    def read_data(self, hashVal: REMOTE_50_DataHashSpec) -> None:
        raise FileNotFoundError(
            f'data hash spec: {REMOTE_50_DataHashSpec} does not exist on this machine. '
            f'Perform a `data-fetch` operation to retrieve it from the remote server.')

    def write_data(self, schema_hash: str, *args, **kwargs) -> bytes:
        """Provide a formatted byte representation for storage as a remote reference

        Parameters
        ----------
        schema_hash : str
            schema hash which the referenced data sample should be accessed under

        Returns
        -------
        bytes
            formated raw values encoding lookup information
        """
        return remote_50_encode(schema_hash=schema_hash)


================================================
FILE: src/hangar/backends/specparse.pyx
================================================
# decoding methods to convert from byte string -> spec struct.

from .specs cimport HDF5_01_DataHashSpec, \
    HDF5_00_DataHashSpec, \
    NUMPY_10_DataHashSpec, \
    LMDB_30_DataHashSpec, \
    LMDB_31_DataHashSpec, \
    REMOTE_50_DataHashSpec


cdef HDF5_01_DataHashSpec HDF5_01_Parser(str inp):
    cdef str fmt, uid, cksum, dset, dset_idx
    cdef tuple shape_tup
    cdef list shape_list = []
    cdef int dataset_idx_int
    cdef unsigned char i, c, cc
    cdef unsigned char n = len(inp)
    cdef HDF5_01_DataHashSpec res

    c = 0
    cc = 0
    for i in range(n):
        if inp[i] == ':':
            if cc == 0:
                fmt = inp[c:i]
            elif cc == 1:
                uid = inp[c:i]
            elif cc == 2:
                cksum = inp[c:i]
            elif cc == 3:
                dset = inp[c:i]
            elif cc == 4:
                dset_idx = inp[c:i]
            c = i + 1
            cc = cc + 1
    shape_vs = inp[c:n]

    c = 0
    n = len(shape_vs)
    for i in range(n):
        if shape_vs[i] == ' ':
            shape_list.append(int(shape_vs[c:i]))
            c = i + 1
    if shape_vs[c:n] != '':
        shape_list.append(int(shape_vs[c:]))

    shape_tup = tuple(shape_list)
    dataset_idx_int = int(dset_idx)
    res = HDF5_01_DataHashSpec(fmt, uid, cksum, dset, dataset_idx_int, shape_tup)
    return res


cdef HDF5_00_DataHashSpec HDF5_00_Parser(str inp):
    cdef str fmt, uid, cksum, dset, dset_idx
    cdef tuple shape_tup
    cdef list shape_list = []
    cdef int dataset_idx_int
    cdef unsigned char i, c, cc
    cdef unsigned char n = len(inp)
    cdef HDF5_00_DataHashSpec res

    c = 0
    cc = 0
    for i in range(n):
        if inp[i] == ':':
            if cc == 0:
                fmt = inp[c:i]
            elif cc == 1:
                uid = inp[c:i]
            elif cc == 2:
                cksum = inp[c:i]
            elif cc == 3:
                dset = inp[c:i]
            elif cc == 4:
                dset_idx = inp[c:i]
            c = i + 1
            cc = cc + 1
    shape_vs = inp[c:n]

    c = 0
    n = len(shape_vs)
    for i in range(n):
        if shape_vs[i] == ' ':
            shape_list.append(int(shape_vs[c:i]))
            c = i + 1
    if shape_vs[c:n] != '':
        shape_list.append(int(shape_vs[c:]))

    shape_tup = tuple(shape_list)
    dataset_idx_int = int(dset_idx)
    res = HDF5_00_DataHashSpec(fmt, uid, cksum, dset, dataset_idx_int, shape_tup)
    return res


cdef NUMPY_10_DataHashSpec NUMPY_10_Parser(str inp):
    cdef str fmt, uid, cksum, collection_idx
    cdef tuple shape_tup
    cdef list shape_list = []
    cdef int collection_idx_int
    cdef unsigned char i, c, cc
    cdef unsigned char n = len(inp)
    cdef NUMPY_10_DataHashSpec res

    c = 0
    cc = 0
    for i in range(n):
        if inp[i] == ':':
            if cc == 0:
                fmt = inp[c:i]
            elif cc == 1:
                uid = inp[c:i]
            elif cc == 2:
                cksum = inp[c:i]
            elif cc == 3:
                collection_idx = inp[c:i]
            c = i + 1
            cc = cc + 1
    shape_vs = inp[c:n]

    c = 0
    n = len(shape_vs)
    for i in range(n):
        if shape_vs[i] == ' ':
            shape_list.append(int(shape_vs[c:i]))
            c = i + 1
    if shape_vs[c:n] != '':
        shape_list.append(int(shape_vs[c:]))

    shape_tup = tuple(shape_list)
    collection_idx_int = int(collection_idx)
    res = NUMPY_10_DataHashSpec(fmt, uid, cksum, collection_idx_int, shape_tup)
    return res


cdef LMDB_30_DataHashSpec LMDB_30_Parser(str inp):
    cdef str fmt, uid, row_idx, checksum
    cdef unsigned char i, c, cc
    cdef unsigned char n = len(inp)
    cdef LMDB_30_DataHashSpec res

    c = 0
    cc = 0
    for i in range(n):
        if inp[i] == ':':
            if cc == 0:
                fmt = inp[c:i]
            elif cc == 1:
                uid = inp[c:i]
            elif cc == 2:
                row_idx = inp[c:i]
            c = i + 1
            cc = cc + 1
    checksum = inp[c:n]

    res = LMDB_30_DataHashSpec(fmt, uid, row_idx, checksum)
    return res


cdef LMDB_31_DataHashSpec LMDB_31_Parser(str inp):
    cdef str fmt, uid, row_idx, checksum
    cdef unsigned char i, c, cc
    cdef unsigned char n = len(inp)
    cdef LMDB_31_DataHashSpec res

    c = 0
    cc = 0
    for i in range(n):
        if inp[i] == ':':
            if cc == 0:
                fmt = inp[c:i]
            elif cc == 1:
                uid = inp[c:i]
            elif cc == 2:
                row_idx = inp[c:i]
            c = i + 1
            cc = cc + 1
    checksum = inp[c:n]

    res = LMDB_31_DataHashSpec(fmt, uid, row_idx, checksum)
    return res


cdef REMOTE_50_DataHashSpec REMOTE_50_Parser(str inp):
    cdef str fmt, schema_hash
    cdef unsigned char i, c
    cdef unsigned char n = len(inp)
    cdef REMOTE_50_DataHashSpec res

    c = 0
    for i in range(n):
        if inp[i] == ':':
            fmt = inp[c:i]
            c = i + 1
    schema_hash = inp[c:]
    res = REMOTE_50_DataHashSpec(fmt, schema_hash)
    return res


cpdef object backend_decoder(bytes inp):
    cdef str backend, inp_str
    inp_str = inp.decode('utf-8')
    backend = inp_str[:2]
    if backend == '00':
        return HDF5_00_Parser(inp_str)
    elif backend == '01':
        return HDF5_01_Parser(inp_str)
    elif backend == '10':
        return NUMPY_10_Parser(inp_str)
    elif backend == '30':
        return LMDB_30_Parser(inp_str)
    elif backend == '31':
        return LMDB_31_Parser(inp_str)
    elif backend == '50':
        return REMOTE_50_Parser(inp_str)
    else:
        raise ValueError(f'unknown backend type for input str {inp_str}')


================================================
FILE: src/hangar/backends/specs.pxd
================================================
# header files for spec containers

cdef class HDF5_01_DataHashSpec:

    cdef readonly str backend
    cdef readonly str uid
    cdef readonly str checksum
    cdef readonly str dataset
    cdef readonly int dataset_idx
    cdef readonly tuple shape


cdef class HDF5_00_DataHashSpec:

    cdef readonly str backend
    cdef readonly str uid
    cdef readonly str checksum
    cdef readonly str dataset
    cdef readonly int dataset_idx
    cdef readonly tuple shape


cdef class NUMPY_10_DataHashSpec:

    cdef readonly str backend
    cdef readonly str uid
    cdef readonly str checksum
    cdef readonly int collection_idx
    cdef readonly tuple shape


cdef class LMDB_30_DataHashSpec:

    cdef readonly str backend
    cdef readonly str uid
    cdef readonly str row_idx
    cdef readonly  str checksum


cdef class LMDB_31_DataHashSpec:

    cdef readonly str backend
    cdef readonly str uid
    cdef readonly str row_idx
    cdef readonly  str checksum


cdef class REMOTE_50_DataHashSpec:

    cdef readonly str backend
    cdef readonly str schema_hash


================================================
FILE: src/hangar/backends/specs.pyx
================================================
# memory efficient container classes for data backends specs.
# Allow for attribute access similar to named tuples.

cdef class HDF5_01_DataHashSpec:

    def __init__(self, str backend, str uid, str checksum, str dataset,
                 int dataset_idx, tuple shape):

        self.backend = backend
        self.uid = uid
        self.checksum = checksum
        self.dataset = dataset
        self.dataset_idx = dataset_idx
        self.shape = shape

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'backend="{self.backend}", '
                f'uid="{self.uid}", '
                f'checksum="{self.checksum}", '
                f'dataset="{self.dataset}", '
                f'dataset_idx={self.dataset_idx}, '
                f'shape={self.shape})')

    def __iter__(self):
        for attr in ['backend', 'uid', 'checksum', 'dataset', 'dataset_idx', 'shape']:
            yield getattr(self, attr)

    @property
    def islocal(self):
        return True


cdef class HDF5_00_DataHashSpec:

    def __init__(self, str backend, str uid, str checksum,
                 str dataset, int dataset_idx, tuple shape):

        self.backend = backend
        self.uid = uid
        self.checksum = checksum
        self.dataset = dataset
        self.dataset_idx = dataset_idx
        self.shape = shape

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'backend="{self.backend}", '
                f'uid="{self.uid}", '
                f'checksum="{self.checksum}", '
                f'dataset="{self.dataset}", '
                f'dataset_idx={self.dataset_idx}, '
                f'shape={self.shape})')

    def __iter__(self):
        for attr in ['backend', 'uid', 'checksum', 'dataset', 'dataset_idx', 'shape']:
            yield getattr(self, attr)

    @property
    def islocal(self):
        return True


cdef class NUMPY_10_DataHashSpec:

    def __init__(self, str backend, str uid, str checksum,
                 int collection_idx, tuple shape):

        self.backend = backend
        self.uid = uid
        self.checksum = checksum
        self.collection_idx = collection_idx
        self.shape = shape

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'backend="{self.backend}", '
                f'uid="{self.uid}", '
                f'checksum="{self.checksum}", '
                f'collection_idx={self.collection_idx}, '
                f'shape={self.shape})')

    def __iter__(self):
        for attr in ['backend', 'uid', 'checksum', 'collection_idx', 'shape']:
            yield getattr(self, attr)

    @property
    def islocal(self):
        return True


cdef class LMDB_30_DataHashSpec:

    def __init__(self, str backend, str uid, str row_idx, str checksum):

        self.backend = backend
        self.uid = uid
        self.row_idx = row_idx
        self.checksum = checksum


    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'backend="{self.backend}", '
                f'uid="{self.uid}", '
                f'row_idx={self.row_idx}, '
                f'checksum="{self.checksum}")')

    def __iter__(self):
        for attr in ['backend', 'uid', 'row_idx', 'checksum']:
            yield getattr(self, attr)

    @property
    def islocal(self):
        return True



cdef class LMDB_31_DataHashSpec:

    def __init__(self, str backend, str uid, str row_idx, str checksum):

        self.backend = backend
        self.uid = uid
        self.row_idx = row_idx
        self.checksum = checksum


    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'backend="{self.backend}", '
                f'uid="{self.uid}", '
                f'row_idx={self.row_idx}, '
                f'checksum="{self.checksum}")')

    def __iter__(self):
        for attr in ['backend', 'uid', 'row_idx', 'checksum']:
            yield getattr(self, attr)

    @property
    def islocal(self):
        return True


cdef class REMOTE_50_DataHashSpec:

    def __init__(self, str backend, str schema_hash):

        self.backend = backend
        self.schema_hash = schema_hash

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'backend="{self.backend}", '
                f'schema_hash="{self.schema_hash}")')

    def __iter__(self):
        for attr in ['backend', 'schema_hash']:
            yield getattr(self, attr)

    @property
    def islocal(self):
        return False


================================================
FILE: src/hangar/bulk_importer.py
================================================
"""Bulk importer methods to ingest large quantities of data into Hangar.

The following module is designed to address challenges inherent to writing
massive amounts of data to a hangar repository via the standard API. Since
write-enabled checkouts are limited to processing in a single thread, the
time required to import hundreds of Gigabytes (or Terabytes) of data into
Hangar (from external sources) can become prohibitivly long. This module
implements a multi-processed importer which reduces import time nearly
linearly with the number of CPU cores allocated on a machine.

There are a number of challenges to overcome:

1. How to validate data against a column schema?

    - Does the column exist?

    - Are the key(s) valid?

    - Is the data a valid type/shape/precision valid for the the selected
      column schema?

2. How to handle duplicated data?

    -  If an identical piece of data is recorded in the repository already,
       only record the sample reference (do not write the data to disk again).

    - If the bulk import method would write identical pieces of data to the
      repository multiple times, and the data does not already exist, then that
      piece of content should only be written to disk once. Only sample
      references should be saved after that.

3. How to handle transactionality?

    - What happens if some column, sample keys, or data piece is invalid and
      cannot be written as desired?

    - How to rollback partial changes if the process is inturupted in
      the middle of a bulk import operation?

4. How to limit memory usage if many processes are trying to load and
   write large tensors?


Rough outline of steps:

    1. Validate UDF & Argument Signature

    2. Read, Validate, and Hash UDF results --> Task Recipe

    3. Prune Recipe

    4. Read, Validate, Write Data to Isolated Backend Storage

    5. Record Sample References in Isolated Environment

    6. If all successful, make isolated data known to repository core,
       otherwise abort to starting state.
"""
__all__ = ('UDF_Return', 'run_bulk_import')

import concurrent.futures
import multiprocessing as mp
import multiprocessing.queues as mpq
import os
import pickle
import queue
import random
import shutil
import warnings
from concurrent.futures import ThreadPoolExecutor
from contextlib import closing, contextmanager
from inspect import signature, isgeneratorfunction
from math import ceil
from operator import attrgetter, methodcaller
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import (
    NamedTuple, Union, Tuple, List, Iterator,
    Callable, Dict, Optional, TYPE_CHECKING
)

import cloudpickle
import numpy as np
from tqdm import tqdm

from .columns.common import open_file_handles
from .constants import DIR_DATA, DIR_DATA_REMOTE, DIR_DATA_STAGE, DIR_DATA_STORE
from .records import hashs
from .records.column_parsers import (
    hash_data_raw_key_from_db_key,
    hash_data_db_key_from_raw_key,
    flat_data_db_key_from_names,
    nested_data_db_key_from_names,
    data_record_db_val_from_digest,
)
from .txnctx import TxnRegister
from .utils import grouper, is_valid_directory_path, bound

if TYPE_CHECKING:
    import lmdb
    from . import Repository
    from .typesystem.base import ColumnBase
    from .columns import ModifierTypes


UDF_T = Callable[..., Iterator['UDF_Return']]
KeyType = Union[str, int]


# ----------------- User Facing Potions of Bulk Data Loader -------------------


# noinspection PyUnresolvedReferences
class UDF_Return(NamedTuple):
    """User-Defined Function return container for bulk importer read functions

    Attributes
    ----------
    column: str
        column name to place data into
    key: Union[KeyType, Tuple[KeyType, KeyType]]
        key to place flat sample into, or 2-tuple of keys for nested samples
    data: Union[np.ndarray, str, bytes]
        piece of data to place in the column with the provided key.
    """
    column: str
    key: Union[KeyType, Tuple[KeyType, KeyType]]
    data: Union[np.ndarray, str, bytes]

    def __eq__(self, other):
        if not self.__class__.__name__ == other.__class__.__name__:
            raise NotImplementedError

        if self.column != other.column:
            return False
        if self.key != other.key:
            return False

        if isinstance(self.data, np.ndarray):
            if not np.array_equal(self.data, other.data):
                return False
        elif self.data != other.data:
            return False
        return True


def run_bulk_import(
        repo: 'Repository',
        branch_name: str,
        column_names: List[str],
        udf: UDF_T,
        udf_kwargs: List[dict],
        *,
        ncpus: int = 0,
        autocommit: bool = True
):
    """Perform a bulk import operation from a given user-defined function.

    In order to provide for arbitrary input data sources along with ensuring
    the core promises of hangar hold we require the following from users:

    Define some arbitrary function (ie "user-defined function" / "UDF") which
    accepts some arguments and yields data. The UDF must be a generator function,
    yielding only values which are of :class:`~.UDF_Return` type. The results
    yielded by the UDF must be deterministic for a given set of  inputs. This
    includes all values of the :class:`~.UDF_Return` (``columns`` and ``keys``,
    as well as ``data``).

    A list of input arguments to the UDF must be provided, this is formatted as a
    sequence  (list / tuple) of keyword-arg dictionaries, each of which must be
    valid when unpacked and bound to the UDF signature. Additionally, all columns
    must be  specified up front. If any columns are named a :class:`~.UDF_Return`
    which were not pre-specified, the entire operation will fail.

    Notes
    -----

    *  This is an all-or-nothing operation, either all data is successfully
       read, validated, and written to the storage backends, or none of it
       is. A single maleformed key or data type/shape will cause the entire
       import operation to abort.

    *  The input kwargs should be fairly small (of no consequence to load
       into memory), data out should be large. The results of the UDF
       will only be stored in memory for a very short period (just the time
       it takes to be validated against the column schema and compressed /
       flushed to disk).

    *  Every step of the process is executed as a generator, lazily loading
       data the entire way. If possible, we recomend writing the UDF such that
       data is not allocated in memory before it is ready to be yielded.

    *  If it is possible, the task recipe will be pruned and optimized in such
       a way that iteration over the UDF will be short circuted during the
       second pass (writing data to the backend). As this can greatly reduce
       processing time, we recomend trying to yield data pieces which are likely
       to be unique first from the UDF.

    Warnings
    --------

    *  Please be aware that these methods should not be executed within a
       Jupyter Notebook / Jupyter Lab when running the bulk importer at scale.
       The internal implemenation makes significant use of multiprocess Queues
       for work distribution and recording. The heavy loads placed on the system
       have been observed to place strain on Jupyters ZeroMQ implementation,
       resulting in random failures which may or may not even display a traceback
       to indicate failure mode.

       A small sample set of data can be used within jupyter to test an
       implementation without problems, but for full scale operations it is best
       run in a script with the operations protected by a ``__main__`` block.

    Examples
    --------

    >>> import os
    >>> import numpy as np
    >>> from PIL import Image
    >>> from hangar.bulk_importer import UDF_Return

    >>> def image_loader(file_path):
    ...     im = Image.open(file_name)
    ...     arr = np.array(im.resize(512, 512))
    ...     im_record = UDF_Return(column='image', key=(category, sample), data=arr)
    ...     yield im_record
    ...
    ...     root, sample_file = os.path.split(file_path)
    ...     category = os.path.dirname(root)
    ...     sample_name, _ = os.path.splitext(sample_file)
    ...     path_record = UDF_Return(column='file_str', key=(category, sample_name), data=file_path)
    ...     yield path_record
    ...
    >>> udf_kwargs = [
    ...     {'file_path': '/foo/cat/image_001.jpeg'},
    ...     {'file_path': '/foo/cat/image_002.jpeg'},
    ...     {'file_path': '/foo/dog/image_001.jpeg'},
    ...     {'file_path': '/foo/bird/image_011.jpeg'},
    ...     {'file_path': '/foo/bird/image_003.jpeg'}
    ... ]
    >>> repo = Repository('foo/path/to/repo')
    >>> from hangar.bulk_importer import run_bulk_import
    >>> run_bulk_import(
    ...     repo, branch_name='master', column_names=['file_str', 'image'],
    ...     udf=image_loader, udf_kwargs=udf_kwargs)

    However, the following will not work, since the output is non-deterministic.

    >>> def nondeterminstic(x, y):
    ...     first = str(x * y)
    ...     yield UDF_Return(column='valstr', key=f'{x}_{y}', data=first)
    ...
    ...     second = str(x * y * random())
    ...     yield UDF_Return(column='valstr', key=f'{x}_{y}', data=second)
    ...
    >>> udf_kwargs = [
    ...     {'x': 1, 'y': 2},
    ...     {'x': 1, 'y': 3},
    ...     {'x': 2, 'y': 4},
    ... ]
    >>> run_bulk_import(
    ...     repo, branch_name='master', column_names=['valstr'],
    ...     udf=image_loader, udf_kwargs=udf_kwargs)
    Traceback (most recent call last):
      File "", line 1, in 
    TypeError: contents returned in subbsequent calls to UDF with identical
      kwargs yielded different results. UDFs MUST generate deterministic
      results for the given inputs. Input kwargs generating this result:
      {'x': 1, 'y': 2}.

    Not all columns must be returned from every input to the UDF, the number of
    data pieces yielded can also vary arbitrarily (so long as the results are
    deterministic for a particular set of inputs)

    >>> def maybe_load(x_arr, y_arr, sample_name, columns=['default']):
    ...     for column in columns:
    ...         arr = np.multiply(x_arr, y_arr)
    ...         yield UDF_Return(column=column, key=sample_name, data=arr)
    ...     #
    ...     # do some strange processing which only outputs another column sometimes
    ...     if len(columns) == 1:
    ...         other = np.array(x_arr.shape) * np.array(y_arr.shape)
    ...         yield UDF_Return(column='strange_column', key=sample_name, data=other)
    ...
    >>> udf_kwargs = [
    ...     {'x_arr': np.arange(10), 'y_arr': np.arange(10) + 1, 'sample_name': 'sample_1'},
    ...     {'x_arr': np.arange(10), 'y_arr': np.arange(10) + 1, 'sample_name': 'sample_2', 'columns': ['foo', 'bar', 'default']},
    ...     {'x_arr': np.arange(10) * 2, 'y_arr': np.arange(10), 'sample_name': 'sample_3'},
    ... ]
    >>> run_bulk_import(
    ...     repo, branch_name='master',
    ...     column_names=['default', 'foo', 'bar', 'strange_column'],
    ...     udf=maybe_load, udf_kwargs=udf_kwargs)

    Parameters
    ----------
    repo : 'Repository'
        Initialized repository object to import data into.
    branch_name : str
        Name of the branch to checkout and import data into.
    column_names : List[str]
        Names of all columns which data should be saved to.
    udf : UDF_T
        User-Defined Function (generator style; yielding an arbitrary number
        of values when iterated on) which is passed an unpacked kwarg dict as input
        and yields a single :class:`~.UDF_Return` instance at a time when iterated over.
        Cannot contain
    udf_kwargs : List[dict]
        A sequence of keyword argument dictionaries which are individually unpacked
        as inputs into the user-defined function (UDF). the keyword argument dictionaries
    ncpus : int, optional, default=0
        Number of Parallel processes to read data files & write to hangar backend stores
        in. If <= 0, then the default is set to ``num_cpus / 2``. The value of this
        parameter should never exceed the total CPU count of the system. Import time
        scales mostly linearly with ncpus. Optimal performance is achieved by balancing
        memory usage of the ``UDF`` function and backend storage writer processes against
        the total system memory.
        generally increase linearly up to
    autocommit : bool, optional, default=True
        Control whether a commit should be made after successfully importing the
        specified data to the staging area of the branch.
    """
    _BATCH_SIZE = 10  # TODO: Is this necessary?

    columns: Dict[str, 'ModifierTypes'] = {}
    column_layouts: Dict[str, str] = {}
    schemas: Dict[str, 'ColumnBase'] = {}

    with closing(repo.checkout(write=True, branch=branch_name)) as co:
        for name in column_names:
            _col = co.columns[name]
            _schema = _col._schema
            columns[name] = _col
            column_layouts[name] = _col.column_layout
            schemas[name] = _schema

        print(f'Validating Reader Function and Argument Input')
        _check_user_input_func(columns=columns, udf=udf, udf_kwargs=udf_kwargs)
        serialized_udf = _serialize_udf(udf)

        ncpu = _process_num_cpus(ncpus)
        print(f'Using {ncpu} worker processes')

        recipe = _run_prepare_recipe(
            column_layouts=column_layouts,
            schemas=schemas,
            udf=serialized_udf,
            udf_kwargs=udf_kwargs,
            ncpu=ncpu,
            batch_size=_BATCH_SIZE)
        print('Unifying naieve recipe task set.')
        unified_recipe = _unify_recipe_contents(recipe)
        print('Pruning redundant steps & eliminating tasks on data stored in hangar.')
        reduced_recipe = _reduce_recipe_on_required_digests(recipe, co._hashenv)

        nsteps_reduced_recipe = _num_steps_in_task_list(reduced_recipe)
        optim_percent = ((len(unified_recipe) - nsteps_reduced_recipe) / len(unified_recipe)) * 100
        print(f'Reduced recipe workload tasks by: {optim_percent:.2f}%')
        print(f' - Num tasks for naieve ingest  : {len(unified_recipe)}')
        print(f' - Num tasks after optimization : {nsteps_reduced_recipe}')

        hangardirpth = repo._repo_path
        if len(reduced_recipe) >= 1:
            print('Starting multiprocessed data importer.')
            with TemporaryDirectory(dir=str(hangardirpth)) as tmpdirname:
                tmpdirpth = _mock_hangar_directory_structure(tmpdirname)
                written_data_steps = _run_write_recipe_data(
                    tmp_dir=tmpdirpth,
                    columns=columns,
                    schemas=schemas,
                    udf=serialized_udf,
                    recipe_tasks=reduced_recipe,
                    ncpu=ncpu,
                    batch_size=_BATCH_SIZE)
                print(f'Finalizing written data pieces in hangar repo directory...')
                _move_tmpdir_data_files_to_repodir(repodir=hangardirpth, tmpdir=tmpdirpth)
            _write_digest_to_bespec_mapping(
                executed_steps=written_data_steps,
                hashenv=co._hashenv,
                stagehashenv=co._stagehashenv)
        else:
            print('No actions requiring the data import remain after optimizations.')

        print(f'Mapping full recipe requested via UDF to optimized task set actually processed.')
        _write_full_recipe_sample_key_to_digest_mapping(sample_steps=unified_recipe, dataenv=co._stageenv)

        if autocommit:
            print(f'autocommiting changes.')
            co.commit(f'Auto commit after bulk import of {len(unified_recipe)} samples to '
                      f'column {column_names} on branch {branch_name}')
        else:
            print(f'skipping autocommit')

        print('Buld data importer operation completed successfuly')
        return


# ---------------- Internal Implementation of Bulk Data Loader ----------------


class _ContentDescriptionPrep(NamedTuple):
    column: str
    layout: str
    key: Union[Tuple[KeyType, KeyType], KeyType]
    digest: str
    udf_iter_idx: int

    def db_record_key(self):
        if self.layout == 'nested':
            db_key = nested_data_db_key_from_names(self.column, self.key[0], self.key[1])
        elif self.layout == 'flat':
            db_key = flat_data_db_key_from_names(self.column, self.key)
        else:
            raise ValueError(f'unknown column layout value {self.layout} encountered while formating db record key')
        return db_key

    def db_record_val(self):
        return data_record_db_val_from_digest(self.digest)


class _Task(NamedTuple):
    udf_kwargs: dict
    udf_iter_indices: Tuple[int, ...]
    expected_digests: Tuple[str, ...]

    def num_steps(self):
        return len(self.udf_iter_indices)


class _WrittenContentDescription(NamedTuple):
    """Description of data content piece saved in the multprocess content writter

    Attributes
    ----------
    digest: str
        digest of the data piece written.
    bespec: bytes
        backend location spec in db formated bytes representation.
    """
    digest: str
    bespec: bytes


def _num_steps_in_task_list(task_list: List[_Task]) -> int:
    num_steps_method = methodcaller('num_steps')
    return sum(map(num_steps_method, task_list))


def _serialize_udf(udf: UDF_T) -> bytes:
    raw = cloudpickle.dumps(udf, protocol=pickle.HIGHEST_PROTOCOL)
    return raw


def _deserialize_udf(raw: bytes) -> UDF_T:
    udf = cloudpickle.loads(raw)
    return udf


def _process_num_cpus(ncpus: int) -> int:
    """Determine how many workerprocesses to spin up in bulk importer

    Parameters
    ----------
    ncpus: int
        User specified number of worker processes. If <= 0 set to num CPU cores / 2.

    Returns
    -------
    int
    """
    node_cpus = os.cpu_count()
    if ncpus <= 0:
        cpu_try = ceil(node_cpus / 2)
        ncpus = bound(1, node_cpus, cpu_try)
    elif ncpus > node_cpus:
        warnings.warn(
            f'Input number of CPUs exceeds maximum on node. {ncpus} > {node_cpus}',
            category=UserWarning
        )
    return ncpus


def _check_user_input_func(
        columns,
        udf: UDF_T,
        udf_kwargs: List[dict],
        *,
        prerun_check_percentage: float = 0.02
):
    """Perform a few sanity tests to ensure kwargs and udf produces valid data.

    Parameters
    ----------
    columns
        initialized columns object dict.
    udf : UDF_T
        user provided function which takes some kwargs and generates one data sample.
    udf_kwargs : List[dict]
        kwarg dicts to unpack into UDF via `udf(**kwargs)`
    prerun_check_percentage : float, kwargonly, default=0.02
        value between (0.0, 1.0) representing what percentage of items in the full
        work list should be selected (at random) to be processed by udf &
        verified against the column schema.

        This is meant to serve as a quick sanity check (to test if is success is even
        possible) before launching the full pipeline with multiple worker processes.
    """
    if not isgeneratorfunction(udf):
        raise TypeError(f'UDF {udf} is not a user defined generator function.')

    try:
        _raw_udf = _serialize_udf(udf)
        _deserialized = _deserialize_udf(_raw_udf)
    except (pickle.PicklingError, pickle.UnpicklingError) as e:
        my_err = RuntimeError(f'Could not pickle/unpickle UDF {udf} using cloudpickle.')
        raise my_err from e

    sig = signature(udf)
    for idx, kwargs in enumerate(tqdm(udf_kwargs, desc='Validating argument signature')):
        try:
            sig.bind(**kwargs)
        except TypeError as e:
            my_err = TypeError(f'Value {kwargs} at index {idx} of `udf_kwargs` is invalid.')
            raise my_err from e

    num_choices_by_percent = ceil(len(udf_kwargs) * prerun_check_percentage)
    num_choices = bound(2, 100, num_choices_by_percent)
    work_samples = random.choices(udf_kwargs, k=num_choices)
    for kwargs in tqdm(work_samples, desc=f'Performing pre-run sanity check'):
        first_results = []
        for first_res in udf(**kwargs):
            if not first_res.__class__.__name__ == UDF_Return.__name__:
                raise TypeError(
                    f'UDF must yield only values of type {UDF_Return}, recieved '
                    f'{type(first_res)} from input kwargs: {kwargs}')
            if first_res.column not in columns:
                raise ValueError(
                    f'UDF_Return column value {first_res.column} was not specified in bulk '
                    f'loader input. kwargs triggering this UDF_Return failure: {kwargs}')
            _col = columns[first_res.column]
            if _col.column_layout == 'flat':
                _col._set_arg_validate(first_res.key, first_res.data)
            else:
                _col._set_arg_validate(first_res.key[0], {first_res.key[1]: first_res.data})
            first_results.append(first_res)

        _DeterministicError = ValueError(
            f'contents returned in subbsequent calls to UDF with identical kwargs'
            f'yielded different results. UDFs MUST generate deterministic results '
            f'for the given inputs. Input kwargs generating this result: {kwargs}')
        second_len = 0
        for second_idx, second_res in enumerate(udf(**kwargs)):
            if not second_res == first_results[second_idx]:
                raise _DeterministicError
            second_len += 1
        if second_len != len(first_results):
            raise _DeterministicError

    return True


class _MPQueue(mpq.Queue):
    """Interuptable Multiprocess Queue class which does not throw errors.
    """

    def __init__(self, *args, **kwargs):
        ctx = mp.get_context()
        super().__init__(*args, **kwargs, ctx=ctx)

    def safe_get(self, timeout=0.5):
        try:
            if timeout is None:
                return self.get(False)
            else:
                return self.get(True, timeout)
        except queue.Empty:
            return None

    def safe_put(self, item, timeout=0.5) -> bool:
        try:
            self.put(item, False, timeout)
            return True
        except queue.Full:
            return False

    def drain(self):
        item = self.safe_get()
        while item:
            yield item
            item = self.safe_get()

    def safe_close(self) -> int:
        num_left = sum(1 for __ in self.drain())
        self.close()
        self.join_thread()
        return num_left


class _BatchProcessPrepare(mp.Process):

    def __init__(
            self,
            udf: bytes,
            schemas: Dict[str, 'ColumnBase'],
            column_layouts: Dict[str, str],
            in_queue: _MPQueue,
            out_queue: _MPQueue,
            *args, **kwargs
    ):
        """Read all data generated by all UDF(**udf_kwargs) input.

        Validates reader function works, yields correct UDF_Return type, keys/columns
        are compatible names, data schema is suitable for column, and calculates digest
        of data and index location into UDF iteration.

        Parameters
        ----------
        udf
            user provided function yielding UDF_Return instances when iterated over
        schemas
            dict mapping column names -> initialized schema objects. This is required in
            order to properly calculate the data hash digests.
        column_layouts
            dict mapping column names -> column layout string
        in_queue
            queue contianing work pieces (kwargs) to process via UDF `mp.Queue[List[dict]]`
        out_queue
            queue containing mp.Queue[List[Tuple[dict, List[_ContentDescriptionPrep]]]]
            mapping kwargs -> content description read in.
        """
        super().__init__(*args, **kwargs)
        self.column_layouts = column_layouts
        self._udf_raw: bytes = udf
        self.udf: Optional[UDF_T] = None
        self.in_queue = in_queue
        self.out_queue = out_queue
        self.schemas = schemas

    def _setup(self):
        self.udf = _deserialize_udf(self._udf_raw)

    def _input_tasks(self) -> Iterator[List[dict]]:
        udf_kwargs = self.in_queue.safe_get(timeout=2.0)
        while udf_kwargs is not None:
            yield udf_kwargs
            udf_kwargs = self.in_queue.safe_get()

    def run(self):
        self._setup()
        for udf_kwargs in self._input_tasks():
            udf_kwargs_res = (
                (kwargs, self.udf(**kwargs)) for kwargs in udf_kwargs if isinstance(kwargs, dict)
            )
            content_digests = []
            for kwargs, udf_data_generator in udf_kwargs_res:
                if kwargs is None:
                    continue

                udf_kwarg_content_digests = []
                for udf_iter_idx, udf_return in enumerate(udf_data_generator):
                    _column = udf_return.column
                    _key = udf_return.key
                    _data = udf_return.data
                    _schema = self.schemas[_column]
                    _layout = self.column_layouts[_column]

                    iscompat = _schema.verify_data_compatible(_data)
                    if not iscompat.compatible:
                        raise ValueError(f'data for key {_key} incompatible due to {iscompat.reason}')
                    digest = _schema.data_hash_digest(_data)
                    res = _ContentDescriptionPrep(_column, _layout, _key, digest, udf_iter_idx)
                    udf_kwarg_content_digests.append(res)
                content_digests.append((kwargs, udf_kwarg_content_digests))
            self.out_queue.safe_put(content_digests)


def _run_prepare_recipe(
        column_layouts: Dict[str, str],
        schemas: Dict[str, 'ColumnBase'],
        udf: bytes,
        udf_kwargs: List[dict],
        *,
        ncpu: int = 0,
        batch_size: int = 10
) -> List[Tuple[dict, List[_ContentDescriptionPrep]]]:

    # Setup & populate queue with batched arguments
    in_queue = _MPQueue()
    out_queue = _MPQueue()
    n_queue_tasks = ceil(len(udf_kwargs) / batch_size)
    for keys_kwargs in grouper(udf_kwargs, batch_size):
        in_queue.safe_put(keys_kwargs)

    out, jobs = [], []
    try:
        # start worker processes
        for _ in range(ncpu):
            t = _BatchProcessPrepare(
                udf=udf,
                schemas=schemas,
                column_layouts=column_layouts,
                in_queue=in_queue,
                out_queue=out_queue)
            jobs.append(t)
            t.start()

        # collect outputs and fill queue with more work if low
        # terminate if no more work should be done.
        with tqdm(total=len(udf_kwargs), desc='Constructing task recipe') as pbar:
            ngroups_processed = 0
            while ngroups_processed < n_queue_tasks:
                data_key_location_hash_digests = out_queue.safe_get(timeout=30)
                if data_key_location_hash_digests is None:
                    continue
                ngroups_processed += 1
                for saved in data_key_location_hash_digests:
                    pbar.update(1)
                    out.append(saved)

        in_queue.safe_close()
        out_queue.safe_close()
        for j in jobs:
            try:
                j.join(timeout=0.2)
            except mp.TimeoutError:
                j.terminate()
    except (KeyboardInterrupt, InterruptedError):
        in_queue.safe_close()
        out_queue.safe_close()
        while jobs:
            j = jobs.pop()
            if j.is_alive():
                print(f'terminating PID {j.pid}')
                j.terminate()
            else:
                exitcode = j.exitcode
                if exitcode:
                    print(f'PID {j.pid} exitcode: {exitcode}')
        raise
    return out


class _BatchProcessWriter(mp.Process):

    def __init__(
            self,
            udf: bytes,
            backends: Dict[str, str],
            schemas: Dict[str, 'ColumnBase'],
            tmp_pth: Path,
            in_queue: _MPQueue,
            out_queue: _MPQueue,
            *args, **kwargs
    ):
        """

        Parameters
        ----------
        udf
            user provided function yielding UDF_Return instances when iterated over.
        backends
            dict mapping column name -> backend code.
        schemas
            dict mapping column names -> initialized schema objects. This is required in
            order to properly calculate the data hash digests.
        tmp_pth
            tempdir path to write data to
        in_queue
            grouped task lists `mp.Queue[List[_Task]]`
        out_queue
            written content description `mp.Queue[List[_WrittenContentDescription]]`
        args
        kwargs
        """
        super().__init__(*args, **kwargs)
        self._udf_raw: bytes = udf
        self.udf: Optional[UDF_T] = None
        self.backends = backends
        self.backend_instances = {}
        self.in_queue = in_queue
        self.out_queue = out_queue
        self.schemas = schemas
        self.tmp_pth = tmp_pth

    def _setup(self):
        """
        Because backend FileHandle classes have a reader checkout only condition
        check set on __getstate__, we open individual classes (and file) in the actual
        processes they will be used in (rather than trying to pickle)
        """
        self.udf = _deserialize_udf(self._udf_raw)

        for column_name, column_backend in self.backends.items():
            be_instance_map = open_file_handles(
                backends=[column_backend],
                path=self.tmp_pth,
                mode='a',
                schema=self.schemas[column_name])
            be_instance = be_instance_map[column_backend]
            self.backend_instances[column_name] = be_instance

    def _input_tasks(self) -> Iterator[List[_Task]]:
        tasks_list = self.in_queue.safe_get(timeout=2)
        while tasks_list is not None:
            yield tasks_list
            tasks_list = self.in_queue.safe_get()

    @contextmanager
    def _enter_backends(self):
        try:
            for be in self.backend_instances.keys():
                self.backend_instances[be].__enter__()
            yield
        finally:
            for be in self.backend_instances.keys():
                self.backend_instances[be].__exit__()

    def run(self):
        self._setup()
        with self._enter_backends():
            for tasks_list in self._input_tasks():
                tasks = (
                    (task, self.udf(**task.udf_kwargs)) for task in tasks_list if isinstance(task, _Task)
                )
                written_digests_locations = []
                for task, applied_udf in tasks:
                    relevant_udf_indices = iter(task.udf_iter_indices)
                    desired_udf_idx = next(relevant_udf_indices)
                    for gen_idx, res in enumerate(applied_udf):
                        if gen_idx < desired_udf_idx:
                            continue

                        column = res.column
                        data = res.data
                        digest = self.schemas[column].data_hash_digest(data)
                        location_spec = self.backend_instances[column].write_data(data)
                        res = _WrittenContentDescription(digest, location_spec)
                        written_digests_locations.append(res)
                        try:
                            desired_udf_idx = next(relevant_udf_indices)
                        except StopIteration:
                            break
                self.out_queue.safe_put(written_digests_locations)


def _run_write_recipe_data(
        tmp_dir: Path,
        columns: Dict[str, 'ModifierTypes'],
        schemas: Dict[str, 'ColumnBase'],
        udf: bytes,
        recipe_tasks: List[_Task],
        *,
        ncpu=0,
        batch_size=10
) -> List[_WrittenContentDescription]:

    # Setup & populate queue with batched arguments
    in_queue = _MPQueue()
    out_queue = _MPQueue()
    n_queue_tasks = ceil(len(recipe_tasks) / batch_size)
    for keys_kwargs in grouper(recipe_tasks, batch_size):
        in_queue.put_nowait(keys_kwargs)

    out, jobs = [], []
    try:
        # start worker processes
        backends = {}
        for col_name, column in columns.items():
            backends[col_name] = column.backend
        for _ in range(ncpu):
            t = _BatchProcessWriter(
                udf=udf,
                backends=backends,
                schemas=schemas,
                tmp_pth=tmp_dir,
                in_queue=in_queue,
                out_queue=out_queue)
            jobs.append(t)
            t.start()

        # collect outputs and fill queue with more work if low
        # terminate if no more work should be done.
        nsteps = _num_steps_in_task_list(recipe_tasks)
        with tqdm(total=nsteps, desc='Executing Data Import Recipe') as pbar:
            ngroups_processed = 0
            while ngroups_processed < n_queue_tasks:
                data_key_location_hash_digests = out_queue.safe_get(timeout=30)
                if data_key_location_hash_digests is None:
                    continue
                ngroups_processed += 1
                for saved in data_key_location_hash_digests:
                    pbar.update(1)
                    out.append(saved)
        in_queue.safe_close()
        out_queue.safe_close()
        for j in jobs:
            try:
                j.join(timeout=0.2)
            except mp.TimeoutError:
                j.terminate()
    except (KeyboardInterrupt, InterruptedError):
        in_queue.safe_close()
        out_queue.safe_close()
        while jobs:
            j = jobs.pop()
            if j.is_alive():
                print(f'terminating PID {j.pid}')
                j.terminate()
            else:
                exitcode = j.exitcode
                if exitcode:
                    print(f'PID {j.pid} exitcode: {exitcode}')
        raise
    return out


def _unify_recipe_contents(recipe: List[Tuple[dict, List[_ContentDescriptionPrep]]]) -> List[_ContentDescriptionPrep]:
    """Flatten and isolate all ContentDescriptionPrep in flat recipe list.

    Parameters
    ----------
    recipe: List[Tuple[dict, List[_ContentDescriptionPrep]]]

    Returns
    -------
    List[_ContentDescriptionPrep]
        Flat list where each element records a sample's column name, layout, keys, & digest.
    """
    unified_content = []
    for udf_kwargs, udf_contents in recipe:
        for content in udf_contents:
            unified_content.append(content)
    return unified_content


def _reduce_recipe_on_required_digests(recipe: List[Tuple[dict, List[_ContentDescriptionPrep]]], hashenv):
    """Before writing, eliminate duplicate steps which would write identical
    data and steps which would write data already recorded in the repository.

    Parameters
    ----------
    recipe: List[Tuple[dict, List[_ContentDescriptionPrep]]]

    Returns
    -------
    List[_Task]:
        reduced recipe tasks to serve as input for the mp writer.

    Notes
    -----
    - Any number of samples may be added which have unique keys/kwargs,
      but whose udf returns identical data. To avoid writing
      identical data to disk multiple times, we select just one sample
      (at random) for each unique digest in the recipe. We write the
      data to disk alongside the digest -> backend spec mapping. Once
      all unique data sample steps are written, we use the full sample
      step recipe to record the sample name -> digest mapping without
      needing to actually process the data that a full step execution
      would have produced produces.

    - A similar exclusion is made for steps which produce data which is
      already recorded in the repository. The only difference is that
      we do not process writing of these steps at all (for any sample).
      Since the digest -> backend spec map already exists, we just need
      to process to key -> digest mapping.
    """
    recipe_contents = _unify_recipe_contents(recipe)
    digest_getter = attrgetter('digest')
    recipe_digests = set(map(digest_getter, recipe_contents))

    hq = hashs.HashQuery(hashenv)
    recipe_digests_db = set(map(hash_data_db_key_from_raw_key, recipe_digests))
    existing_digests_db = hq.intersect_keys_db(recipe_digests_db)
    missing_digests_db = recipe_digests_db.difference(existing_digests_db)
    missing_digests = set(map(hash_data_raw_key_from_db_key, missing_digests_db))

    remaining_digests = set(missing_digests)
    task_list = []
    for udf_kwargs, content_prep_recipes in recipe:
        task_udf_kwargs = None  # Set to value if kwargs should be included
        udf_indices = []
        expected_digests = []
        for content_prep in content_prep_recipes:
            _digest = content_prep.digest
            if _digest in remaining_digests:
                udf_indices.append(content_prep.udf_iter_idx)
                expected_digests.append(_digest)
                task_udf_kwargs = udf_kwargs
                remaining_digests.remove(_digest)
        if task_udf_kwargs:
            _task = _Task(udf_kwargs, tuple(udf_indices), tuple(expected_digests))
            task_list.append(_task)

    return task_list


def _write_digest_to_bespec_mapping(
        executed_steps: List[_WrittenContentDescription],
        hashenv: 'lmdb.Environment',
        stagehashenv: 'lmdb.Environment'
):
    """Write written content digests and bespec to hash and stagehash db.
    """
    digests_bespecs = []
    for spec in executed_steps:
        dbSpec = spec.bespec
        dbDigest = hash_data_db_key_from_raw_key(spec.digest)
        digests_bespecs.append((dbDigest, dbSpec))

    hashtxn = TxnRegister().begin_writer_txn(hashenv)
    stagehashtxn = TxnRegister().begin_writer_txn(stagehashenv)
    try:
        for dbDigest, dbSpec in digests_bespecs:
            stagehashtxn.put(dbDigest, dbSpec)
            hashtxn.put(dbDigest, dbSpec)
    finally:
        TxnRegister().commit_writer_txn(hashenv)
        TxnRegister().commit_writer_txn(stagehashenv)


def _write_full_recipe_sample_key_to_digest_mapping(
        sample_steps: List[_ContentDescriptionPrep],
        dataenv: 'lmdb.Environment'
):
    """Write sample name -> digest key/value pairs in checkout data (stage) db.
    """
    db_kvs = []
    for step in sample_steps:
        staging_key = step.db_record_key()
        staging_val = step.db_record_val()
        db_kvs.append((staging_key, staging_val))

    datatxn = TxnRegister().begin_writer_txn(dataenv)
    try:
        for dbk, dbv in db_kvs:
            datatxn.put(dbk, dbv)
    finally:
        TxnRegister().commit_writer_txn(dataenv)


def _mock_hangar_directory_structure(dir_name: str) -> Path:
    """Setup folder structure of hangar repo within a temporary directory path.

    Parameters
    ----------
    dir_name
        directory path to create the hangar dir structure in.

    Returns
    -------
    mocked hangar directory path.
    """
    dirpth = Path(dir_name)
    is_valid_directory_path(dirpth)

    dirpth.joinpath(DIR_DATA_STORE).mkdir()
    dirpth.joinpath(DIR_DATA_STAGE).mkdir()
    dirpth.joinpath(DIR_DATA_REMOTE).mkdir()
    dirpth.joinpath(DIR_DATA).mkdir()
    return dirpth


def _move_tmpdir_data_files_to_repodir(repodir: Path, tmpdir: Path):
    tmp_stage_dir = tmpdir.joinpath(DIR_DATA_STAGE)
    tmp_data_dir = tmpdir.joinpath(DIR_DATA)
    hangar_stage_dir = repodir.joinpath(DIR_DATA_STAGE)
    hangar_data_dir = repodir.joinpath(DIR_DATA)

    task_list = []
    for be_pth in tmp_stage_dir.iterdir():
        if be_pth.is_dir():
            for fpth in be_pth.iterdir():
                if fpth.is_file() and not fpth.stem.startswith('.'):
                    tmp_stage_fp = tmp_stage_dir.joinpath(be_pth.name, fpth.name)
                    hangar_stage_fp = hangar_stage_dir.joinpath(be_pth.name, fpth.name)
                    task_list.append((tmp_stage_fp, hangar_stage_fp))

                    if hangar_stage_fp.suffix.endswith('dir'):
                        # data directories (ie. lmdb) have a stage_file suffix ending in
                        # 'dir' (for lmdb this is a suffix of `.lmdbdir`). The stage_file
                        # stem is the directory name which needs to be moved.
                        tmp_data_fp = tmp_data_dir.joinpath(be_pth.name, fpth.stem)
                        hangar_data_fp = hangar_data_dir.joinpath(be_pth.name, fpth.stem)
                    else:
                        # files are 1:1 copy of stage_file:data_file
                        tmp_data_fp = tmp_data_dir.joinpath(be_pth.name, fpth.name)
                        hangar_data_fp = hangar_data_dir.joinpath(be_pth.name, fpth.name)
                    task_list.append((tmp_data_fp, hangar_data_fp))

    _MoveException = None
    num_workers = bound(5, 32, os.cpu_count() + 4)
    with ThreadPoolExecutor(max_workers=num_workers, thread_name_prefix='hangar_import_shutil') as e:
        future_result = [e.submit(shutil.move, str(src), str(dst)) for src, dst in task_list]
        for future in concurrent.futures.as_completed(future_result):
            if future.exception() is not None:
                _MoveException = future.exception()

    if _MoveException is not None:
        print(f'Error encountered while persisting imported data in hangar repo directory.')
        print(f'Begining change set roll back.')
        for _, dest_fp in task_list:
            if dest_fp.is_file():
                os.remove(str(dest_fp))
                print(f'- {dest_fp}')
            elif dest_fp.is_dir():
                shutil.rmtree(str(dest_fp))
                print(f'- {dest_fp}')
        print(f'Roll back completed successfully')
        raise _MoveException
    return True


================================================
FILE: src/hangar/checkout.py
================================================
import atexit
from pathlib import Path
import weakref
from contextlib import suppress, ExitStack
from uuid import uuid4
from typing import Optional, Union

import numpy as np
import lmdb

from .mixins import GetMixin, CheckoutDictIteration
from .columns import (
    ColumnTxn,
    Columns,
    generate_nested_column,
    generate_flat_column,
)
from .diff import ReaderUserDiff, WriterUserDiff
from .merger import select_merge_algorithm
from .records import commiting, hashs, heads, summarize
from .typesystem import (
    NdarrayFixedShape,
    NdarrayVariableShape,
    StringVariableShape,
    BytesVariableShape,
)
from .utils import is_suitable_user_key, is_ascii
from .records import (
    schema_db_key_from_column,
    schema_hash_record_db_val_from_spec,
    schema_hash_db_key_from_digest,
    schema_record_db_val_from_digest,
)


class ReaderCheckout(GetMixin, CheckoutDictIteration):
    """Checkout the repository as it exists at a particular branch.

    This class is instantiated automatically from a repository checkout
    operation. This object will govern all access to data and interaction methods
    the user requests.

        >>> co = repo.checkout()
        >>> isinstance(co, ReaderCheckout)
        True

    If a commit hash is provided, it will take precedent over the branch name
    parameter. If neither a branch not commit is specified, the staging
    environment's base branch ``HEAD`` commit hash will be read.

        >>> co = repo.checkout(commit='foocommit')
        >>> co.commit_hash
        'foocommit'
        >>> co.close()
        >>> co = repo.checkout(branch='testbranch')
        >>> co.commit_hash
        'someothercommithashhere'
        >>> co.close()

    Unlike :class:`WriterCheckout`, any number of :class:`ReaderCheckout`
    objects can exist on the repository independently. Like the
    ``write-enabled`` variant, the :meth:`close` method should be called after
    performing the necessary operations on the repo. However, as there is no
    concept of a ``lock`` for ``read-only`` checkouts, this is just to free up
    memory resources, rather than changing recorded access state.

    In order to reduce the chance that the python interpreter is shut down
    without calling :meth:`close`,  - a common mistake during ipython / jupyter
    sessions - an `atexit `_
    hook is registered to :meth:`close`. If properly closed by the user, the
    hook is unregistered after completion with no ill effects. So long as a the
    process is NOT terminated via non-python ``SIGKILL``, fatal internal python
    error, or or special ``os exit`` methods, cleanup will occur on interpreter
    shutdown and resources will be freed. If a non-handled termination method
    does occur, the implications of holding resources varies on a per-OS basis.
    While no risk to data integrity is observed, repeated misuse may require a
    system reboot in order to achieve expected performance characteristics.
    """

    def __init__(self,
                 base_path: Path,
                 dataenv: lmdb.Environment,
                 hashenv: lmdb.Environment,
                 branchenv: lmdb.Environment,
                 refenv: lmdb.Environment,
                 commit: str):
        """Developer documentation of init method.

        Parameters
        ----------
        base_path : Path
            directory path to the Hangar repository on disk
        dataenv : lmdb.Environment
            db where the checkout record data is unpacked and stored.
        hashenv : lmdb.Environment
            db where the hash records are stored.
        branchenv : lmdb.Environment
            db where the branch records are stored.
        refenv : lmdb.Environment
            db where the commit references are stored.
        commit : str
            specific commit hash to checkout
        """
        self._commit_hash = commit
        self._repo_path = base_path
        self._dataenv = dataenv
        self._hashenv = hashenv
        self._branchenv = branchenv
        self._refenv = refenv
        self._enter_count = 0
        self._stack: Optional[ExitStack] = None

        self._columns = Columns._from_commit(
            repo_pth=self._repo_path,
            hashenv=self._hashenv,
            cmtrefenv=self._dataenv)
        self._differ = ReaderUserDiff(
            commit_hash=self._commit_hash,
            branchenv=self._branchenv,
            refenv=self._refenv)
        atexit.register(self.close)

    def _repr_pretty_(self, p, cycle):
        """pretty repr for printing in jupyter notebooks
        """
        self._verify_alive()
        res = f'Hangar {self.__class__.__name__}\
                \n    Writer       : False\
                \n    Commit Hash  : {self._commit_hash}\
                \n    Num Columns  : {len(self)}\n'
        p.text(res)

    def __repr__(self):
        self._verify_alive()
        res = f'{self.__class__}('\
              f'base_path={self._repo_path} '\
              f'dataenv={self._dataenv} '\
              f'hashenv={self._hashenv} '\
              f'commit={self._commit_hash})'
        return res

    def __enter__(self):
        self._verify_alive()
        with ExitStack() as stack:
            if self._enter_count == 0:
                stack.enter_context(self._columns)
            self._enter_count += 1
            self._stack = stack.pop_all()
        return self

    def __exit__(self, *exc):
        self._stack.close()
        self._enter_count -= 1

    def _verify_alive(self):
        """Validates that the checkout object has not been closed

        Raises
        ------
        PermissionError
            if the checkout was previously close
        """
        if not hasattr(self, '_columns'):
            err = PermissionError(
                f'Unable to operate on past checkout objects which have been '
                f'closed. No operation occurred. Please use a new checkout.')
            raise err from None

    @property
    def _is_conman(self) -> bool:
        self._verify_alive()
        return bool(self._enter_count)

    @property
    def columns(self) -> Columns:
        """Provides access to column interaction object.

        Can be used to either return the columns accessor for all elements or
        a single column instance by using dictionary style indexing.

            >>> co = repo.checkout(write=False)
            >>> len(co.columns)
            1
            >>> print(co.columns.keys())
            ['foo']
            >>> fooCol = co.columns['foo']
            >>> fooCol.dtype
            np.fooDtype
            >>> cols = co.columns
            >>> fooCol = cols['foo']
            >>> fooCol.dtype
            np.fooDtype
            >>> fooCol = cols.get('foo')
            >>> fooCol.dtype
            np.fooDtype

        .. seealso::

            The class :class:`~.columns.column.Columns` contains all methods
            accessible by this property accessor

        Returns
        -------
        :class:`~.columns.column.Columns`
            the columns object which behaves exactly like a
            columns accessor class but which can be invalidated when the writer
            lock is released.
        """
        self._verify_alive()
        return self._columns

    @property
    def diff(self) -> ReaderUserDiff:
        """Access the differ methods for a read-only checkout.

        .. seealso::

            The class :class:`ReaderUserDiff` contains all methods accessible
            by this property accessor

        Returns
        -------
        ReaderUserDiff
            weakref proxy to the differ object (and contained methods) which behaves
            exactly like the differ class but which can be invalidated when the
            writer lock is released.
        """
        self._verify_alive()
        wr = weakref.proxy(self._differ)
        return wr

    @property
    def commit_hash(self) -> str:
        """Commit hash this read-only checkout's data is read from.

            >>> co = repo.checkout()
            >>> co.commit_hash
            foohashdigesthere

        Returns
        -------
        str
            commit hash of the checkout
        """
        self._verify_alive()
        return self._commit_hash

    def log(self,
            branch: str = None,
            commit: str = None,
            *,
            return_contents: bool = False,
            show_time: bool = False,
            show_user: bool = False) -> Optional[dict]:
        """Displays a pretty printed commit log graph to the terminal.

        .. note::

            For programatic access, the return_contents value can be set to true
            which will retrieve relevant commit specifications as dictionary
            elements.

        if Neither `branch` nor `commit` arguments are supplied, the commit
        digest of the currently reader checkout will be used as default.

        Parameters
        ----------
        branch : str, optional
            The name of the branch to start the log process from. (Default value
            = None)
        commit : str, optional
            The commit hash to start the log process from. (Default value = None)
        return_contents : bool, optional, kwarg only
            If true, return the commit graph specifications in a dictionary
            suitable for programatic access/evaluation.
        show_time : bool, optional, kwarg only
            If true and return_contents is False, show the time of each commit
            on the printed log graph
        show_user : bool, optional, kwarg only
            If true and return_contents is False, show the committer of each
            commit on the printed log graph
        Returns
        -------
        Optional[dict]
            Dict containing the commit ancestor graph, and all specifications.
        """
        self._verify_alive()
        if (branch is None) and (commit is None):
            commit = self.commit_hash
        res = summarize.log(branchenv=self._branchenv,
                            refenv=self._refenv,
                            branch=branch,
                            commit=commit,
                            return_contents=return_contents,
                            show_time=show_time,
                            show_user=show_user)
        return res

    def close(self) -> None:
        """Gracefully close the reader checkout object.

        Though not strictly required for reader checkouts (as opposed to
        writers), closing the checkout after reading will free file handles and
        system resources, which may improve performance for repositories with
        multiple simultaneous read checkouts.
        """
        self._verify_alive()
        if isinstance(self._stack, ExitStack):
            self._stack.close()

        self._columns._destruct()
        for attr in list(self.__dict__.keys()):
            delattr(self, attr)
        atexit.unregister(self.close)
        return


# --------------- Write enabled checkout ---------------------------------------


class WriterCheckout(GetMixin, CheckoutDictIteration):
    """Checkout the repository at the head of a given branch for writing.

    This is the entry point for all writing operations to the repository, the
    writer class records all interactions in a special ``"staging"`` area,
    which is based off the state of the repository as it existed at the
    ``HEAD`` commit of a branch.

        >>> co = repo.checkout(write=True)
        >>> co.branch_name
        'master'
        >>> co.commit_hash
        'masterheadcommithash'
        >>> co.close()

    At the moment, only one instance of this class can write data to the
    staging area at a time. After the desired operations have been completed,
    it is crucial to call :meth:`close` to release the writer lock. In
    addition, after any changes have been made to the staging area, the branch
    ``HEAD`` cannot be changed. In order to checkout another branch ``HEAD``
    for writing, you must either :meth:`commit` the changes, or perform a
    hard-reset of the staging area to the last commit via
    :meth:`reset_staging_area`.

    In order to reduce the chance that the python interpreter is shut down
    without calling :meth:`close`, which releases the writer lock - a common
    mistake during ipython / jupyter sessions - an `atexit
    `_ hook is registered to
    :meth:`close`. If properly closed by the user, the hook is unregistered
    after completion with no ill effects. So long as a the process is NOT
    terminated via non-python SIGKILL, fatal internal python error, or or
    special os exit methods, cleanup will occur on interpreter shutdown and the
    writer lock will be released. If a non-handled termination method does
    occur, the :meth:`~.Repository.force_release_writer_lock` method must be
    called manually when a new python process wishes to open the writer
    checkout.
    """

    def __init__(self,
                 repo_pth: Path,
                 branch_name: str,
                 hashenv: lmdb.Environment,
                 refenv: lmdb.Environment,
                 stageenv: lmdb.Environment,
                 branchenv: lmdb.Environment,
                 stagehashenv: lmdb.Environment,
                 mode: str = 'a'):
        """Developer documentation of init method.

        Parameters
        ----------
        repo_pth : Path
            local file path of the repository.
        branch_name : str
            name of the branch whose ``HEAD`` commit will for the starting state
            of the staging area.
        hashenv lmdb.Environment
            db where the hash records are stored.
        refenv : lmdb.Environment
            db where the commit record data is unpacked and stored.
        stageenv : lmdb.Environment
            db where the stage record data is unpacked and stored.
        branchenv : lmdb.Environment
            db where the head record data is unpacked and stored.
        stagehashenv: lmdb.Environment
            db where the staged hash record data is stored.
        mode : str, optional
            open in write or read only mode, default is 'a' which is write-enabled.
        """
        self._enter_count = 0
        self._repo_path: Path = repo_pth
        self._branch_name = branch_name
        self._writer_lock = str(uuid4())
        self._stack: Optional[ExitStack] = None

        self._refenv = refenv
        self._hashenv = hashenv
        self._stageenv = stageenv
        self._branchenv = branchenv
        self._stagehashenv = stagehashenv

        self._columns: Optional[Columns] = None
        self._differ: Optional[WriterUserDiff] = None
        self._setup()
        atexit.register(self.close)

    def _repr_pretty_(self, p, cycle):
        """pretty repr for printing in jupyter notebooks
        """
        self._verify_alive()
        res = f'Hangar {self.__class__.__name__}\
                \n    Writer       : True\
                \n    Base Branch  : {self._branch_name}\
                \n    Num Columns  : {len(self)}\n'
        p.text(res)

    def __repr__(self):
        self._verify_alive()
        res = f'{self.__class__}('\
              f'base_path={self._repo_path} '\
              f'branch_name={self._branch_name} ' \
              f'hashenv={self._hashenv} '\
              f'refenv={self._refenv} '\
              f'stageenv={self._stageenv} '\
              f'branchenv={self._branchenv})\n'
        return res

    def __enter__(self):
        self._verify_alive()
        with ExitStack() as stack:
            if self._enter_count == 0:
                stack.enter_context(self._columns)
            self._enter_count += 1
            self._stack = stack.pop_all()
        return self

    def __exit__(self, *exc):
        self._stack.close()
        self._enter_count -= 1

    @property
    def _is_conman(self):
        self._verify_alive()
        return bool(self._enter_count)

    def _verify_alive(self):
        """Ensures that this class instance holds the writer lock in the database.

        Raises
        ------
        PermissionError
            If the checkout was previously closed (no :attr:``_writer_lock``)
            or if the writer lock value does not match that recorded in the
            branch db
        """
        if not hasattr(self, '_writer_lock'):
            with suppress(AttributeError):
                self._columns._destruct()
                del self._columns
            with suppress(AttributeError):
                del self._differ
            err = f'Unable to operate on past checkout objects which have been '\
                  f'closed. No operation occurred. Please use a new checkout.'
            raise PermissionError(err) from None

        try:
            heads.acquire_writer_lock(self._branchenv, self._writer_lock)
        except Exception as e:
            with suppress(AttributeError):
                self._columns._destruct()
                del self._columns
            with suppress(AttributeError):
                del self._differ
            raise e from None

    def _setup(self):
        """setup the staging area appropriately for a write enabled checkout.

        On setup, we cannot be sure what branch the staging area was previously
        checked out on, and we cannot be sure if there are any 'uncommitted
        changes' in the staging area (ie. the staging area is ``DIRTY``). The
        setup methods here ensure that we can safety make any changes to the
        staging area without overwriting uncommitted changes, and then perform
        the setup steps to checkout staging area state at that point in time.

        Raises
        ------
        ValueError
            if there are changes previously made in the staging area which were
            based on one branch's ``HEAD``, but a different branch was specified to
            be used for the base of this checkout.
        """
        self._verify_alive()
        current_head = heads.get_staging_branch_head(self._branchenv)
        currentDiff = WriterUserDiff(stageenv=self._stageenv,
                                     refenv=self._refenv,
                                     branchenv=self._branchenv,
                                     branch_name=current_head)
        if currentDiff.status() == 'DIRTY':
            if current_head != self._branch_name:
                e = ValueError(
                    f'Unable to check out branch: {self._branch_name} for writing '
                    f'as the staging area has uncommitted changes on branch: '
                    f'{current_head}. Please commit or stash uncommitted changes '
                    f'before checking out a different branch for writing.')
                self.close()
                raise e
        else:
            if current_head != self._branch_name:
                try:
                    cmt = heads.get_branch_head_commit(
                        branchenv=self._branchenv, branch_name=self._branch_name)
                except ValueError as e:
                    self.close()
                    raise e
                commiting.replace_staging_area_with_commit(
                    refenv=self._refenv, stageenv=self._stageenv, commit_hash=cmt)
                heads.set_staging_branch_head(
                    branchenv=self._branchenv, branch_name=self._branch_name)

        self._columns = Columns._from_staging_area(
            repo_pth=self._repo_path,
            hashenv=self._hashenv,
            stageenv=self._stageenv,
            stagehashenv=self._stagehashenv)
        self._differ = WriterUserDiff(
            stageenv=self._stageenv,
            refenv=self._refenv,
            branchenv=self._branchenv,
            branch_name=self._branch_name)

    @property
    def columns(self) -> Columns:
        """Provides access to column interaction object.

        Can be used to either return the columns accessor for all elements or
        a single column instance by using dictionary style indexing.

            >>> co = repo.checkout(write=True)
            >>> cols = co.columns
            >>> len(cols)
            0
            >>> fooCol = co.add_ndarray_column('foo', shape=(10, 10), dtype=np.uint8)
            >>> len(co.columns)
            1
            >>> len(co)
            1
            >>> list(co.columns.keys())
            ['foo']
            >>> list(co.keys())
            ['foo']
            >>> fooCol = co.columns['foo']
            >>> fooCol.dtype
            np.fooDtype
            >>> fooCol = cols.get('foo')
            >>> fooCol.dtype
            np.fooDtype
            >>> 'foo' in co.columns
            True
            >>> 'bar' in co.columns
            False

        .. seealso::

            The class :class:`~.columns.column.Columns` contains all methods
            accessible by this property accessor

        Returns
        -------
        :class:`~.columns.column.Columns`
            the columns object which behaves exactly like a columns accessor
            class but which can be invalidated when the writer lock is
            released.
        """
        self._verify_alive()
        return self._columns

    @property
    def diff(self) -> WriterUserDiff:
        """Access the differ methods which are aware of any staged changes.

        .. seealso::

            The class :class:`hangar.diff.WriterUserDiff` contains all methods
            accessible by this property accessor

        Returns
        -------
        WriterUserDiff
            weakref proxy to the differ object (and contained methods) which
            behaves exactly like the differ class but which can be invalidated
            when the writer lock is released.
        """
        self._verify_alive()
        wr = weakref.proxy(self._differ)
        return wr

    @property
    def branch_name(self) -> str:
        """Branch this write enabled checkout's staging area was based on.

        Returns
        -------
        str
            name of the branch whose commit ``HEAD`` changes are staged from.
        """
        self._verify_alive()
        return self._branch_name

    @property
    def commit_hash(self) -> str:
        """Commit hash which the staging area of `branch_name` is based on.

        Returns
        -------
        str
            commit hash
        """
        self._verify_alive()
        cmt = heads.get_branch_head_commit(branchenv=self._branchenv,
                                           branch_name=self._branch_name)
        return cmt

    def log(self,
            branch: str = None,
            commit: str = None,
            *,
            return_contents: bool = False,
            show_time: bool = False,
            show_user: bool = False) -> Optional[dict]:
        """Displays a pretty printed commit log graph to the terminal.

        .. note::

            For programatic access, the return_contents value can be set to true
            which will retrieve relevant commit specifications as dictionary
            elements.

        if Neither `branch` nor `commit` arguments are supplied, the branch which
        is currently checked out for writing will be used as default.

        Parameters
        ----------
        branch : str, optional
            The name of the branch to start the log process from. (Default value
            = None)
        commit : str, optional
            The commit hash to start the log process from. (Default value = None)
        return_contents : bool, optional, kwarg only
            If true, return the commit graph specifications in a dictionary
            suitable for programatic access/evaluation.
        show_time : bool, optional, kwarg only
            If true and return_contents is False, show the time of each commit
            on the printed log graph
        show_user : bool, optional, kwarg only
            If true and return_contents is False, show the committer of each
            commit on the printed log graph
        Returns
        -------
        Optional[dict]
            Dict containing the commit ancestor graph, and all specifications.
        """
        self._verify_alive()
        if (branch is None) and (commit is None):
            branch = self.branch_name
        res = summarize.log(branchenv=self._branchenv,
                            refenv=self._refenv,
                            branch=branch,
                            commit=commit,
                            return_contents=return_contents,
                            show_time=show_time,
                            show_user=show_user)
        return res

    def add_str_column(self,
                       name: str,
                       contains_subsamples: bool = False,
                       *,
                       backend: Optional[str] = None,
                       backend_options: Optional[dict] = None):
        """Initializes a :class:`str` container column

        Columns are created in order to store some arbitrary collection of data
        pieces. In this case, we store :class:`str` data. Items need not be
        related to each-other in any direct capacity; the only criteria hangar
        requires is that all pieces of data stored in the column have a
        compatible schema with each-other (more on this below). Each piece of
        data is indexed by some key (either user defined or automatically
        generated depending on the user's preferences). Both single level
        stores (sample keys mapping to data on disk) and nested stores (where
        some sample key maps to an arbitrary number of subsamples, in turn each
        pointing to some piece of store data on disk) are supported.

        All data pieces within a column have the same data type. For
        :class:`str` columns, there is no distinction between
        ``'variable_shape'`` and ``'fixed_shape'`` schema types. Values are
        allowed to take on a value of any size so long as the datatype and
        contents are valid for the schema definition.

        Parameters
        ----------
        name : str
            Name assigned to the column
        contains_subsamples : bool, optional
            True if the column column should store data in a nested structure.
            In this scheme, a sample key is used to index an arbitrary number
            of subsamples which map some (sub)key to a piece of data. If False,
            sample keys map directly to a single piece of data; essentially
            acting as a single level key/value store. By default, False.
        backend : Optional[str], optional
            ADVANCED USERS ONLY, backend format code to use for column data. If
            None, automatically inferred and set based on data shape and type.
            by default None
        backend_options : Optional[dict], optional
            ADVANCED USERS ONLY, filter opts to apply to column data. If None,
            automatically inferred and set based on data shape and type.
            by default None

        Returns
        -------
        :class:`~.columns.column.Columns`
            instance object of the initialized column.
        """
        self._verify_alive()
        if self.columns._any_is_conman() or self._is_conman:
            raise PermissionError('Not allowed while context manager is used.')

        # ------------- Checks for argument validity --------------------------

        try:
            if (not is_suitable_user_key(name)) or (not is_ascii(name)):
                raise ValueError(
                    f'Column name provided: `{name}` is invalid. Can only contain '
                    f'alpha-numeric or "." "_" "-" ascii characters (no whitespace). '
                    f'Must be <= 64 characters long')
            if name in self._columns:
                raise LookupError(f'Column already exists with name: {name}.')
            if not isinstance(contains_subsamples, bool):
                raise ValueError(f'contains_subsamples argument must be bool, '
                                 f'not type {type(contains_subsamples)}')
        except (ValueError, LookupError) as e:
            raise e from None

        # ---------- schema validation handled automatically by typesystem ----

        layout = 'nested' if contains_subsamples else 'flat'
        schema = StringVariableShape(
            dtype=str, column_layout=layout, backend=backend, backend_options=backend_options)

        # ------------------ create / return new column -----------------------

        col = self._initialize_new_column(
            column_name=name, column_layout=layout, schema=schema)
        return col

    def add_bytes_column(self,
                         name: str,
                         contains_subsamples: bool = False,
                         *,
                         backend: Optional[str] = None,
                         backend_options: Optional[dict] = None):
        """Initializes a :class:`bytes` container column

        Columns are created in order to store some arbitrary collection of data
        pieces. In this case, we store :class:`bbytes` data. Items need not be
        related to each-other in any direct capacity; the only criteria hangar
        requires is that all pieces of data stored in the column have a
        compatible schema with each-other (more on this below). Each piece of
        data is indexed by some key (either user defined or automatically
        generated depending on the user's preferences). Both single level
        stores (sample keys mapping to data on disk) and nested stores (where
        some sample key maps to an arbitrary number of subsamples, in turn each
        pointing to some piece of store data on disk) are supported.

        All data pieces within a column have the same data type. For
        :class:`bytes` columns, there is no distinction between
        ``'variable_shape'`` and ``'fixed_shape'`` schema types. Values are
        allowed to take on a value of any size so long as the datatype and
        contents are valid for the schema definition.

        Parameters
        ----------
        name : str
            Name assigned to the column
        contains_subsamples : bool, optional
            True if the column column should store data in a nested structure.
            In this scheme, a sample key is used to index an arbitrary number
            of subsamples which map some (sub)key to a piece of data. If False,
            sample keys map directly to a single piece of data; essentially
            acting as a single level key/value store. By default, False.
        backend : Optional[str], optional
            ADVANCED USERS ONLY, backend format code to use for column data. If
            None, automatically inferred and set based on data shape and type.
            by default None
        backend_options : Optional[dict], optional
            ADVANCED USERS ONLY, filter opts to apply to column data. If None,
            automatically inferred and set based on data shape and type.
            by default None

        Returns
        -------
        :class:`~.columns.column.Columns`
            instance object of the initialized column.
        """
        self._verify_alive()
        if self.columns._any_is_conman() or self._is_conman:
            raise PermissionError('Not allowed while context manager is used.')

        # ------------- Checks for argument validity --------------------------

        try:
            if (not is_suitable_user_key(name)) or (not is_ascii(name)):
                raise ValueError(
                    f'Column name provided: `{name}` is invalid. Can only contain '
                    f'alpha-numeric or "." "_" "-" ascii characters (no whitespace). '
                    f'Must be <= 64 characters long')
            if name in self._columns:
                raise LookupError(f'Column already exists with name: {name}.')
            if not isinstance(contains_subsamples, bool):
                raise ValueError(f'contains_subsamples argument must be bool, '
                                 f'not type {type(contains_subsamples)}')
        except (ValueError, LookupError) as e:
            raise e from None

        # ---------- schema validation handled automatically by typesystem ----

        layout = 'nested' if contains_subsamples else 'flat'
        schema = BytesVariableShape(
            dtype=bytes, column_layout=layout, backend=backend, backend_options=backend_options)

        # ------------------ create / return new column -----------------------

        col = self._initialize_new_column(
            column_name=name, column_layout=layout, schema=schema)
        return col

    def add_ndarray_column(self,
                           name: str,
                           shape: Optional[Union[int, tuple]] = None,
                           dtype: Optional[np.dtype] = None,
                           prototype: Optional[np.ndarray] = None,
                           variable_shape: bool = False,
                           contains_subsamples: bool = False,
                           *,
                           backend: Optional[str] = None,
                           backend_options: Optional[dict] = None):
        """Initializes a :class:`numpy.ndarray` container column.

        Columns are created in order to store some arbitrary collection of data
        pieces. In this case, we store :class:`numpy.ndarray` data. Items need
        not be related to each-other in any direct capacity; the only criteria
        hangar requires is that all pieces of data stored in the column have a
        compatible schema with each-other (more on this below). Each piece of
        data is indexed by some key (either user defined or automatically
        generated depending on the user's preferences). Both single level
        stores (sample keys mapping to data on disk) and nested stores (where
        some sample key maps to an arbitrary number of subsamples, in turn each
        pointing to some piece of store data on disk) are supported.

        All data pieces within a column have the same data type and number of
        dimensions. The size of each dimension can be either fixed (the default
        behavior) or variable per sample. For fixed dimension sizes, all data
        pieces written to the column must have the same shape & size which was
        specified at the time the column column was initialized. Alternatively,
        variable sized columns can write data pieces with dimensions of any
        size (up to a specified maximum).

        Parameters
        ----------
        name : str
            The name assigned to this column.
        shape : Optional[Union[int, Tuple[int]]]
            The shape of the data samples which will be written in this column.
            This argument and the `dtype` argument are required if a `prototype`
            is not provided, defaults to None.
        dtype : Optional[:class:`numpy.dtype`]
            The datatype of this column. This argument and the `shape` argument
            are required if a `prototype` is not provided., defaults to None.
        prototype : Optional[:class:`numpy.ndarray`]
            A sample array of correct datatype and shape which will be used to
            initialize the column storage mechanisms. If this is provided, the
            `shape` and `dtype` arguments must not be set, defaults to None.
        variable_shape : bool, optional
            If this is a variable sized column. If true, a the maximum shape is
            set from the provided ``shape`` or ``prototype`` argument. Any sample
            added to the column can then have dimension sizes <= to this
            initial specification (so long as they have the same rank as what
            was specified) defaults to False.
        contains_subsamples : bool, optional
            True if the column column should store data in a nested structure.
            In this scheme, a sample key is used to index an arbitrary number of
            subsamples which map some (sub)key to some piece of data. If False,
            sample keys map directly to a single piece of data; essentially
            acting as a single level key/value store. By default, False.
        backend : Optional[str], optional
            ADVANCED USERS ONLY, backend format code to use for column data. If
            None, automatically inferred and set based on data shape and type.
            by default None
        backend_options : Optional[dict], optional
            ADVANCED USERS ONLY, filter opts to apply to column data. If None,
            automatically inferred and set based on data shape and type.
            by default None

        Returns
        -------
        :class:`~.columns.column.Columns`
            instance object of the initialized column.
        """
        self._verify_alive()
        if self.columns._any_is_conman() or self._is_conman:
            raise PermissionError('Not allowed while context manager is used.')

        # ------------- Checks for argument validity --------------------------

        try:
            if (not is_suitable_user_key(name)) or (not is_ascii(name)):
                raise ValueError(
                    f'Column name provided: `{name}` is invalid. Can only contain '
                    f'alpha-numeric or "." "_" "-" ascii characters (no whitespace). '
                    f'Must be <= 64 characters long')
            if name in self.columns:
                raise LookupError(f'Column already exists with name: {name}.')
            if not isinstance(contains_subsamples, bool):
                raise ValueError(f'contains_subsamples is not bool type')

            # If shape/dtype is passed instead of a prototype arg, we use those values
            # to initialize a numpy array prototype. Using a :class:`numpy.ndarray`
            # for specification of dtype / shape params lets us offload much of the
            # required type checking / sanitization of userspace input to libnumpy,
            # rather than attempting to cover all possible cases here.
            if prototype is not None:
                if (shape is not None) or (dtype is not None):
                    raise ValueError(f'cannot set both prototype and shape/dtype args.')
            else:
                prototype = np.zeros(shape, dtype=dtype)
            dtype = prototype.dtype
            shape = prototype.shape
            if not all([x > 0 for x in shape]):
                raise ValueError(f'all dimensions must be sized greater than zero')
        except (ValueError, LookupError) as e:
            raise e from None

        # ---------- schema validation handled automatically by typesystem ----

        column_layout = 'nested' if contains_subsamples else 'flat'
        if variable_shape:
            schema = NdarrayVariableShape(dtype=dtype, shape=shape, column_layout=column_layout,
                                          backend=backend, backend_options=backend_options)
        else:
            schema = NdarrayFixedShape(dtype=dtype, shape=shape, column_layout=column_layout,
                                       backend=backend, backend_options=backend_options)

        # ------------------ create / return new column -----------------------

        col = self._initialize_new_column(
            column_name=name, column_layout=column_layout, schema=schema)
        return col

    def _initialize_new_column(self,
                               column_name: str,
                               column_layout: str,
                               schema) -> Columns:
        """Initialize a column and write spec to record db.

        Parameters
        ----------
        column_name: str
            name of the column
        column_layout: str
            One of ['flat', 'nested'] indicating column layout class to use
            during generation.
        schema: ColumnBase
            schema class instance providing column data spec, schema/column digest,
            data validator / hashing methods, and backend ID / options; all of which
            are needed to successfully create & save the column instance

        Returns
        -------
        Columns
            initialized column class instance.
        """
        # -------- set vals in lmdb only after schema is sure to exist --------

        schema_digest = schema.schema_hash_digest()
        columnSchemaKey = schema_db_key_from_column(column_name, layout=column_layout)
        columnSchemaVal = schema_record_db_val_from_digest(schema_digest)
        hashSchemaKey = schema_hash_db_key_from_digest(schema_digest)
        hashSchemaVal = schema_hash_record_db_val_from_spec(schema.schema)

        txnctx = ColumnTxn(self._stageenv, self._hashenv, self._stagehashenv)
        with txnctx.write() as ctx:
            ctx.dataTxn.put(columnSchemaKey, columnSchemaVal)
            ctx.hashTxn.put(hashSchemaKey, hashSchemaVal, overwrite=False)

        # ------------- create column instance and return to user -------------

        if column_layout == 'nested':
            setup_args = generate_nested_column(
                txnctx=txnctx, column_name=column_name,
                path=self._repo_path, schema=schema, mode='a')
        else:
            setup_args = generate_flat_column(
                txnctx=txnctx, column_name=column_name,
                path=self._repo_path, schema=schema, mode='a')

        self.columns._columns[column_name] = setup_args
        return self.columns[column_name]

    def merge(self, message: str, dev_branch: str) -> str:
        """Merge the currently checked out commit with the provided branch name.

        If a fast-forward merge is possible, it will be performed, and the
        commit message argument to this function will be ignored.

        Parameters
        ----------
        message : str
            commit message to attach to a three-way merge
        dev_branch : str
            name of the branch which should be merge into this branch
            (ie `master`)

        Returns
        -------
        str
            commit hash of the new commit for the `master` branch this checkout
            was started from.
        """
        self._verify_alive()
        commit_hash = select_merge_algorithm(
            message=message,
            branchenv=self._branchenv,
            stageenv=self._stageenv,
            refenv=self._refenv,
            stagehashenv=self._stagehashenv,
            master_branch=self._branch_name,
            dev_branch=dev_branch,
            repo_path=self._repo_path,
            writer_uuid=self._writer_lock)

        for asetHandle in self._columns.values():
            with suppress(KeyError):
                asetHandle._close()

        self._columns = Columns._from_staging_area(
            repo_pth=self._repo_path,
            hashenv=self._hashenv,
            stageenv=self._stageenv,
            stagehashenv=self._stagehashenv)
        self._differ = WriterUserDiff(
            stageenv=self._stageenv,
            refenv=self._refenv,
            branchenv=self._branchenv,
            branch_name=self._branch_name)

        return commit_hash

    def commit(self, commit_message: str) -> str:
        """Commit the changes made in the staging area on the checkout branch.

        Parameters
        ----------
        commit_message : str, optional
            user proved message for a log of what was changed in this commit.
            Should a fast forward commit be possible, this will NOT be added to
            fast-forward ``HEAD``.

        Returns
        -------
        str
            The commit hash of the new commit.

        Raises
        ------
        RuntimeError
            If no changes have been made in the staging area, no commit occurs.
        """
        self._verify_alive()

        open_columns = []
        for column in self._columns.values():
            if column._is_conman:
                open_columns.append(column.column)

        try:
            for column_name in open_columns:
                self._columns[column_name].__exit__()

            if self._differ.status() == 'CLEAN':
                e = RuntimeError('No changes made in staging area. Cannot commit.')
                raise e from None

            self._columns._close()
            commit_hash = commiting.commit_records(message=commit_message,
                                                   branchenv=self._branchenv,
                                                   stageenv=self._stageenv,
                                                   refenv=self._refenv,
                                                   repo_path=self._repo_path)
            # purge recs then reopen file handles so that we don't have to invalidate
            # previous weakproxy references like if we just called :meth:``_setup```
            hashs.clear_stage_hash_records(self._stagehashenv)
            self._columns._open()

        finally:
            for column_name in open_columns:
                self._columns[column_name].__enter__()

        return commit_hash

    def reset_staging_area(self, *, force=False) -> str:
        """Perform a hard reset of the staging area to the last commit head.

        After this operation completes, the writer checkout will automatically
        close in the typical fashion (any held references to :attr:``column``
        or :attr:``metadata`` objects will finalize and destruct as normal), In
        order to perform any further operation, a new checkout needs to be
        opened.

        .. warning::

            This operation is IRREVERSIBLE. all records and data which are note
            stored in a previous commit will be permanently deleted.

        Returns
        -------
        str
            Commit hash of the head which the staging area is reset to.

        Raises
        ------
        RuntimeError
            If no changes have been made to the staging area, No-Op.
        """
        self._verify_alive()
        print(f'Hard reset requested with writer_lock: {self._writer_lock}')

        if self._differ.status() == 'CLEAN':
            if not force:
                e = RuntimeError(f'No changes made in staging area. No reset necessary.')
                raise e from None

        if isinstance(self._stack, ExitStack):
            self._stack.close()
        if hasattr(self._columns, '_destruct'):
            self._columns._destruct()

        hashs.remove_stage_hash_records_from_hashenv(self._hashenv, self._stagehashenv)
        hashs.clear_stage_hash_records(self._stagehashenv)
        hashs.backends_remove_in_process_data(self._repo_path)

        branch_head = heads.get_staging_branch_head(self._branchenv)
        head_commit = heads.get_branch_head_commit(self._branchenv, branch_head)
        if head_commit == '':
            with suppress(ValueError):
                commiting.replace_staging_area_with_commit(refenv=self._refenv,
                                                           stageenv=self._stageenv,
                                                           commit_hash=head_commit)
        else:
            commiting.replace_staging_area_with_commit(refenv=self._refenv,
                                                       stageenv=self._stageenv,
                                                       commit_hash=head_commit)

        self._columns = Columns._from_staging_area(
            repo_pth=self._repo_path,
            hashenv=self._hashenv,
            stageenv=self._stageenv,
            stagehashenv=self._stagehashenv)
        self._differ = WriterUserDiff(
            stageenv=self._stageenv,
            refenv=self._refenv,
            branchenv=self._branchenv,
            branch_name=self._branch_name)
        return head_commit

    def close(self) -> None:
        """Close all handles to the writer checkout and release the writer lock.

        Failure to call this method after the writer checkout has been used
        will result in a lock being placed on the repository which will not
        allow any writes until it has been manually cleared.
        """
        with suppress(lmdb.Error):
            self._verify_alive()

        if isinstance(self._stack, ExitStack):
            self._stack.close()

        if hasattr(self, '_columns'):
            if hasattr(self._columns, '_destruct'):
                self._columns._destruct()

        with suppress(lmdb.Error):
            heads.release_writer_lock(self._branchenv, self._writer_lock)

        for attr in list(self.__dict__.keys()):
            delattr(self, attr)
        atexit.unregister(self.close)
        return


================================================
FILE: src/hangar/cli/__init__.py
================================================
from .cli import main

__all__ = ['main']


================================================
FILE: src/hangar/cli/cli.py
================================================
"""Module that contains the command line app.

Why does this file exist, and why not put this in __main__?

   You might be tempted to import things from __main__ later, but that will cause
   problems: the code will get executed twice:

      - When you run `python -m hangar` python will execute
        ``__main__.py`` as a script. That means there won't be any
        ``hangar.__main__`` in ``sys.modules``.
      - When you import __main__ it will get executed again (as a module) because
        there's no ``hangar.__main__`` in ``sys.modules``.

Also see (1) from http://click.pocoo.org/7/setuptools/#setuptools-integration
"""
import os
import time
from pathlib import Path

import click
import numpy as np

from hangar import Repository, __version__

from .utils import parse_custom_arguments, StrOrIntType


pass_repo = click.make_pass_decorator(Repository, ensure=True)


@click.group(no_args_is_help=True, add_help_option=True, invoke_without_command=True)
@click.version_option(version=__version__, help='display current Hangar Version')
@click.pass_context
def main(ctx):  # pragma: no cover
    P = os.getcwd()
    ctx.obj = Repository(path=P, exists=False)


# -------------------------------- Init ---------------------------------------


@main.command()
@click.option('--name', prompt='User Name', help='first and last name of user')
@click.option('--email', prompt='User Email', help='email address of the user')
@click.option('--overwrite', is_flag=True, default=False,
              help='overwrite a repository if it exists at the current path')
@pass_repo
def init(repo: Repository, name, email, overwrite):
    """Initialize an empty repository at the current path.
    """
    if repo.initialized and (not overwrite):
        click.echo(f'Repo already exists at: {repo.path}')
    else:
        repo.init(user_name=name, user_email=email, remove_old=overwrite)


# -------------------------- Writer Lock -------------------------------------


@main.command(name='writer-lock')
@click.option('--force-release', 'force_release_', is_flag=True, default=False,
              help='force release writer lock from the CLI.')
@pass_repo
def writer_lock_held(repo: Repository, force_release_):
    """Determine if the writer lock is held for a repository.

    Passing the ``--force-release`` flag will instantly release the writer lock,
    invalidating any process which currently holds it.
    """
    if force_release_:
        repo.force_release_writer_lock()
        click.echo(f'Success force release of writer lock.')
    else:
        if repo.writer_lock_held:
            click.echo(f'Writer lock is held.')
        else:
            click.echo(f'Writer lock is available.')



# -------------------------- Checkout Writer ----------------------------------


@main.command()
@click.argument('branchname', nargs=1, required=True)
@pass_repo
def checkout(repo: Repository, branchname):
    """Checkout writer head branch at BRANCHNAME.

    This method requires that no process currently holds the writer lock.
    In addition, it requires that the contents of the staging area are
    'CLEAN' (no changes have been staged).
    """
    try:
        co = repo.checkout(write=True, branch=branchname)
        co.close()
        click.echo(f'Writer checkout head set to branch: {branchname}')
    except (ValueError, PermissionError) as e:
        raise click.ClickException(e)


@main.command()
@click.option('--message', '-m', multiple=True,
              help=('The commit message. If provided multiple times '
                    'each argument gets converted into a new line.'))
@pass_repo
def commit(repo: Repository, message):
    """Commits outstanding changes.

    Commit changes to the given files into the repository. You will need to
    'push' to push up your changes to other repositories.
    """
    from hangar.records.summarize import status

    co = repo.checkout(write=True)
    try:
        if not message:
            diff = co.diff.staged()
            status_txt = status(co._hashenv, co.branch_name, diff.diff)
            status_txt.seek(0)
            marker = '# Changes To Be committed: \n'
            hint = ['\n', '\n', marker, '# \n']
            for line in status_txt.readlines():
                hint.append(f'# {line}')
            # open default system editor
            message = click.edit(''.join(hint))
            if message is None:
                click.echo('Aborted!')
                return
            msg = message.split(marker)[0].rstrip()
            if not msg:
                click.echo('Aborted! Empty commit message')
                return
        else:
            msg = '\n'.join(message)

        click.echo('Commit message:\n' + msg)
        try:
            digest = co.commit(msg)
            click.echo(f'Commit Successful. Digest: {digest}')
        except RuntimeError as e:
            raise click.ClickException(e)
    finally:
        co.close()


# -------------------------- Column Interactor ------------------------------


@main.group(no_args_is_help=True, add_help_option=True)
@click.pass_context
def column(ctx):  # pragma: no cover
    """Operations for working with columns in the writer checkout.
    """
    pass


@column.command(name='create')
@click.option('--variable-shape', 'variable_', is_flag=True, default=False,
              help='flag indicating sample dimensions can be any size up to max shape.')
@click.option('--contains-subsamples', 'subsamples_', is_flag=True, default=False,
              help=('flag indicating if this is a column which nests multiple '
                    'subsamples under a common sample key.'))
@click.argument('name', nargs=1, type=click.STRING, required=True)
@click.argument('dtype', nargs=1, type=click.Choice([
    'UINT8', 'INT8', 'UINT16', 'INT16', 'UINT32', 'INT32',
    'UINT64', 'INT64', 'FLOAT16', 'FLOAT32', 'FLOAT64', 'STR']), required=True)
@click.argument('shape', nargs=-1, type=click.INT, required=False)
@pass_repo
def create_column(repo: Repository, name, dtype, shape, variable_, subsamples_):
    """Create an column with NAME and DTYPE of SHAPE.

    The column will be created in the staging area / branch last used by a
    writer-checkout. Valid NAMEs contain only ascii letters and [``'.'``,
    ``'_'``, ``'-'``] (no whitespace). The DTYPE must be one of [``'UINT8'``,
    ``'INT8'``, ``'UINT16'``, ``'INT16'``, ``'UINT32'``, ``'INT32'``,
    ``'UINT64'``, ``'INT64'``, ``'FLOAT16'``, ``'FLOAT32'``, ``'FLOAT64'``,
    ``'STR'``].

    If a ndarray dtype is specified (not 'STR'), then the SHAPE must be the
    last argument(s) specified, where each dimension size is identified by
    a (space seperated) list of numbers.

    Examples:

    To specify, a column for some training images of dtype uint8 and shape
    (256, 256, 3) we should say:

       .. code-block:: console

          $ hangar column create train_images UINT8 256 256 3

    To specify that the samples can be variably shaped (have any dimension size
    up to the maximum SHAPE specified) we would say:

       .. code-block:: console

          $ hangar column create train_images UINT8 256 256 3 --variable-shape

    or equivalently:

       .. code-block:: console

          $ hangar column create --variable-shape train_images UINT8 256 256 3

    To specify that the column contains a nested set of subsample data under a
    common sample key, the ``--contains-subsamples`` flag can be used.

       .. code-block:: console

          $ hangar column create --contains-subsamples train_images UINT8 256 256 3

    """
    try:
        co = repo.checkout(write=True)
        if dtype == 'STR':
            col = co.add_str_column(name=name, contains_subsamples=subsamples_)
        else:
            col = co.add_ndarray_column(name=name,
                                        shape=shape,
                                        dtype=np.typeDict[dtype.lower()],
                                        variable_shape=variable_,
                                        contains_subsamples=subsamples_)
        click.echo(f'Initialized Column: {col.column}')
    except (ValueError, LookupError, PermissionError) as e:
        raise click.ClickException(e)
    finally:
        try:
            co.close()
        except NameError:
            pass


@column.command(name='remove')
@click.argument('name', nargs=1, type=click.STRING, required=True)
@pass_repo
def remove_column(repo: Repository, name):
    """Delete the column NAME (and all samples) from staging area.

    The column will be removed from the staging area / branch last used by a
    writer-checkout.
    """
    try:
        co = repo.checkout(write=True)
        removed = co.columns.delete(name)
        click.echo(f'Successfully removed column: {removed}')
    except (ValueError, KeyError, PermissionError) as e:
        raise click.ClickException(e)
    finally:
        try:
            co.close()
        except NameError:
            pass


# ---------------------------- Remote Interaction -----------------------------


@main.command()
@click.argument('remote', nargs=1, required=True)
@click.option('--name', prompt='User Name', help='first and last name of user')
@click.option('--email', prompt='User Email', help='email address of the user')
@click.option('--overwrite', is_flag=True, default=False,
              help='overwrite a repository if it exists at the current path')
@pass_repo
def clone(repo: Repository, remote, name, email, overwrite):
    """Initialize a repository at the current path and fetch updated records from REMOTE.

    Note: This method does not actually download the data to disk. Please look
    into the ``fetch-data`` command.
    """
    if repo.initialized and (not overwrite):
        click.echo(f'Repo already exists at: {repo.path}')
    else:
        repo.clone(name, email, remote, remove_old=overwrite)


@main.command(name='fetch')
@click.argument('remote', nargs=1, required=True)
@click.argument('branch', nargs=1, required=True)
@pass_repo
def fetch_records(repo: Repository, remote, branch):
    """Retrieve the commit history from REMOTE for BRANCH.

    This method does not fetch the data associated with the commits. See
    ``fetch-data`` to download the tensor data corresponding to a commit.
    """
    bName = repo.remote.fetch(remote=remote, branch=branch)
    click.echo(f'Fetched branch Name: {bName}')


@main.command(name='fetch-data')
@click.argument('remote', nargs=1, required=True)
@click.argument('startpoint', nargs=1, required=True)
@click.option('--column', '-d', multiple=True, required=False, default=None,
              help='specify any number of column keys to fetch data for.')
@click.option('--all-history', '-a', 'all_', is_flag=True, default=False, required=False,
              help='Retrieve data referenced in every parent commit accessible to the STARTPOINT')
@pass_repo
def fetch_data(repo: Repository, remote, startpoint, column, all_):
    """Get data from REMOTE referenced by STARTPOINT (short-commit or branch).

    The default behavior is to only download a single commit's data or the HEAD
    commit of a branch. Please review optional arguments for other behaviors.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.records.heads import get_staging_branch_head

    if startpoint is None:
        branch = get_staging_branch_head(repo._env.branchenv)
        commit = get_branch_head_commit(repo._env.branchenv, branch)
    elif startpoint in repo.list_branches():
        commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    else:
        commit = expand_short_commit_digest(repo._env.refenv, startpoint)
    click.echo(f'Fetching data for commit: {commit}')

    if len(column) == 0:
        column = None

    commits = repo.remote.fetch_data(remote=remote,
                                     commit=commit,
                                     column_names=column,
                                     retrieve_all_history=all_)
    click.echo(f'completed data for commits: {commits}')


@main.command()
@click.argument('remote', nargs=1, required=True)
@click.argument('branch', nargs=1, required=True)
@pass_repo
def push(repo: Repository, remote, branch):
    """Upload local BRANCH commit history / data to REMOTE server.
    """
    commit_hash = repo.remote.push(remote=remote, branch=branch)
    click.echo(f'Push data for commit hash: {commit_hash}')


# ----------------------- Remote Server References ----------------------------


@main.group(no_args_is_help=True, add_help_option=True)
@click.pass_context
def remote(ctx):  # pragma: no cover
    """Operations for working with remote server references
    """
    pass


@remote.command(name='list')
@pass_repo
def list_remotes(repo: Repository):
    """List all remote repository records.
    """
    click.echo(repo.remote.list_all())


@remote.command(name='add')
@click.argument('name', nargs=1, required=True)
@click.argument('address', nargs=1, required=True)
@pass_repo
def add_remote(repo: Repository, name, address):
    """Add a new remote server NAME with url ADDRESS to the local client.

    This name must be unique. In order to update an old remote, please remove it
    and re-add the remote NAME / ADDRESS combination.
    """
    click.echo(repo.remote.add(name=name, address=address))


@remote.command(name='remove')
@click.argument('name', nargs=1, required=True)
@pass_repo
def remove_remote(repo: Repository, name):
    """Remove the remote server NAME from the local client.

    This will not remove any tracked remote reference branches.
    """
    click.echo(repo.remote.remove(name=name))


# ---------------------------- User Visualizations ----------------------------


@main.command()
@click.argument('dev', nargs=1, required=True)
@click.argument('master', nargs=1, required=False, default=None)
@pass_repo
def diff(repo: Repository, dev, master):
    """Display diff of DEV commit/branch to MASTER commit/branch.

    If no MASTER is specified, then the staging area branch HEAD will
    will be used as the commit digest for MASTER. This operation will
    return a diff which could be interpreted as if you were merging
    the changes in DEV into MASTER.

    TODO: VERIFY ORDER OF OUTPUT IS CORRECT.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.commiting import get_staging_branch_head
    from hangar.records.summarize import status

    if dev not in repo.list_branches():
        dev = expand_short_commit_digest(repo._env.refenv, dev)

    if master is None:
        master = get_staging_branch_head(repo._env.branchenv)
    elif master not in repo.list_branches():
        master = expand_short_commit_digest(repo._env.refenv, master)

    diff_spec = repo.diff(master, dev)
    buf = status(hashenv=repo._env.hashenv, branch_name=dev, diff=diff_spec.diff)
    click.echo(buf.getvalue())

@main.command()
@click.argument('startpoint', nargs=1, required=False)
@pass_repo
def summary(repo: Repository, startpoint):
    """Display content summary at STARTPOINT (short-digest or branch).

    If no argument is passed in, the staging area branch HEAD wil be used as the
    starting point. In order to recieve a machine readable, and more complete
    version of this information, please see the ``Repository.summary()`` method
    of the API.
    """
    from hangar.records.commiting import expand_short_commit_digest

    if startpoint is None:
        click.echo(repo.summary())
    elif startpoint in repo.list_branches():
        click.echo(repo.summary(branch=startpoint))
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
        click.echo(repo.summary(commit=base_commit))


@main.command()
@click.argument('startpoint', required=False, default=None)
@pass_repo
def log(repo: Repository, startpoint):
    """Display commit graph starting at STARTPOINT (short-digest or name)

    If no argument is passed in, the staging area branch HEAD will be used as the
    starting point.
    """
    from hangar.records.commiting import expand_short_commit_digest

    if startpoint is None:
        click.echo(repo.log())
    elif startpoint in repo.list_branches():
        click.echo(repo.log(branch=startpoint))
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
        click.echo(repo.log(commit=base_commit))


@main.command()
@pass_repo
def status(repo: Repository):
    """Display changes made in the staging area compared to its base commit.
    """
    from hangar.records.summarize import status
    co = repo.checkout(write=True)
    try:
        diff = co.diff.staged()
        click.echo(status(co._hashenv, co.branch_name, diff.diff).getvalue(), nl=False)
    finally:
        co.close()


# ------------------------------- Branching -----------------------------------


@main.group(no_args_is_help=True, add_help_option=True)
@click.pass_context
def branch(ctx):  # pragma: no cover
    """Operate on and list branch pointers.
    """
    pass


@branch.command(name='list')
@pass_repo
def branch_list(repo: Repository):
    """List all branch names.

    Includes both remote branches as well as local branches.
    """
    click.echo(repo.list_branches())


@branch.command(name='create')
@click.argument('name', nargs=1, required=True)
@click.argument('startpoint', nargs=1, default=None, required=False)
@pass_repo
def branch_create(repo: Repository, name, startpoint):
    """Create a branch with NAME at STARTPOINT (short-digest or branch)

    If no STARTPOINT is provided, the new branch is positioned at the HEAD of
    the staging area branch, automatically.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.records.heads import get_staging_branch_head

    branch_names = repo.list_branches()
    if name in branch_names:
        e = ValueError(f'branch name: {name} already exists')
        raise click.ClickException(e)

    try:
        if startpoint is None:
            branch = get_staging_branch_head(repo._env.branchenv)
            base_commit = get_branch_head_commit(repo._env.branchenv, branch)
        elif startpoint in branch_names:
            base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
        else:
            base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

        res = repo.create_branch(name, base_commit=base_commit)
    except (KeyError, ValueError, RuntimeError) as e:
        raise click.ClickException(e)

    click.echo(f'Created BRANCH: {res.name} HEAD: {res.digest}')


@branch.command(name='delete')
@click.argument('name', nargs=1, required=True)
@click.option('--force', '-f', is_flag=True, default=False,
              help='flag to force delete branch which has un-merged history.')
@pass_repo
def branch_remove(repo: Repository, name, force):
    """Remove a branch pointer with the provided NAME.

    The NAME must be a branch present on the local machine.
    """
    try:
        res = repo.remove_branch(name, force_delete=force)
    except (ValueError, PermissionError, RuntimeError) as e:
        raise click.ClickException(e)

    click.echo(f'Deleted BRANCH: {res.name} HEAD: {res.digest}')


# ---------------------------- Server Commands --------------------------------


@main.command()
@click.option('--overwrite', is_flag=True, default=False,
              help='overwrite the hangar server instance if it exists at the current path.')
@click.option('--ip', default='localhost', show_default=True,
              help='the ip to start the server on. default is `localhost`')
@click.option('--port', default='50051', show_default=True,
              help='port to start the server on. default in `50051`')
@click.option('--timeout', default=60 * 60 * 24, required=False, show_default=True,
              help='time (in seconds) before server is stopped automatically')
def server(overwrite, ip, port, timeout):
    """Start a hangar server, initializing one if does not exist.

    The server is configured to top working in 24 Hours from the time it was
    initially started. To modify this value, please see the ``--timeout``
    parameter.

    The hangar server directory layout, contents, and access conventions are
    similar, though significantly different enough to the regular user "client"
    implementation that it is not possible to fully access all information via
    regular API methods. These changes occur as a result of the uniformity of
    operations promised by both the RPC structure and negotiations between the
    client/server upon connection.

    More simply put, we know more, so we can optimize access more; similar, but
    not identical.
    """
    from hangar.remote.server import serve

    P = os.getcwd()
    ip_port = f'{ip}:{port}'
    server, hangserver, channel_address = serve(P, overwrite, channel_address=ip_port)
    server.start()
    click.echo(f'Hangar Server Started')
    click.echo(f'* Start Time: {time.asctime()}')
    click.echo(f'* Base Directory Path: {P}')
    click.echo(f'* Operating on `IP_ADDRESS:PORT`: {channel_address}')
    try:
        startTime = time.time()
        while True:
            time.sleep(0.1)
            if time.time() - startTime > timeout:
                raise SystemExit
    except (KeyboardInterrupt, SystemExit):
        click.echo(f'Server Stopped at Time: {time.asctime()}')
        hangserver.close()
        server.stop(0)


# ---------------------------- Import Exporters -------------------------------


@main.command(name='import',
              context_settings=dict(allow_extra_args=True, ignore_unknown_options=True, ))
@click.argument('column', required=True)
@click.argument('path',
                required=True,
                type=click.Path(exists=True, dir_okay=True, file_okay=True, readable=True,
                                resolve_path=True))
@click.option('--branch', default=None, help='branch to import data')
@click.option('--plugin', default=None, help='override auto-infered plugin')
@click.option('--overwrite', is_flag=True,
              help='overwrite data samples with the same name as the imported data file ')
@pass_repo
@click.pass_context
def import_data(ctx, repo: Repository, column, path, branch, plugin, overwrite):
    """Import file or directory of files at PATH to COLUMN in the staging area.

    If passing in a directory, all files in the directory will be imported, if
    passing in a file, just that files specified will be imported.
    """
    # TODO: ignore warning through env variable
    from types import GeneratorType
    from hangar import external
    from hangar.records.heads import get_staging_branch_head

    kwargs = parse_custom_arguments(ctx.args)
    if branch is None:
        branch = get_staging_branch_head(repo._env.branchenv)
    elif branch not in repo.list_branches():
        raise click.ClickException(f'Branch name: {branch} does not exist, Exiting.')
    click.echo(f'Writing to branch: {branch}')

    co = repo.checkout(write=True, branch=branch)
    try:
        active_aset = co.columns.get(column)
        p = Path(path)
        files = [f.resolve() for f in p.iterdir()] if p.is_dir() else [p.resolve()]
        with active_aset as aset, click.progressbar(files) as filesBar:
            for f in filesBar:
                ext = ''.join(f.suffixes).strip('.')  # multi-suffix files (tar.bz2)
                loaded = external.load(f, plugin=plugin, extension=ext, **kwargs)
                if not isinstance(loaded, GeneratorType):
                    loaded = [loaded]
                for arr, fname in loaded:
                    if (not overwrite) and (fname in aset):
                        continue
                    try:
                        aset[fname] = arr
                    except ValueError as e:
                        click.echo(e)
    except (ValueError, KeyError) as e:
        raise click.ClickException(e)
    finally:
        co.close()


@main.command(name='export',
              context_settings=dict(allow_extra_args=True, ignore_unknown_options=True, ))
@click.argument('column', nargs=1, required=True)
@click.argument('startpoint', nargs=1, default=None, required=False)
@click.option('-o', '--out', 'outdir',
              nargs=1,
              required=False,
              default=os.getcwd(),
              type=click.Path(exists=True, dir_okay=True, file_okay=False, readable=True,
                              resolve_path=True),
              help="Directory to export data")
@click.option('-s', '--sample',
              nargs=1,
              default=None,
              type=StrOrIntType(),
              help=('Sample name to export. Default implementation is to interpret all input '
                    'names as string type. As a column can contain samples with both ``str`` '
                    'and ``int`` types, we allow you to specify ``name type`` of the sample. To '
                    'identify a potentially ambiguous name, we allow you to prepend the type of '
                    'sample name followed by a colon and then the sample name (ex. ``str:54`` '
                    'or ``int:54``). This can be done for any sample key.'))
@click.option('-f', '--format', 'format_',
              nargs=1,
              required=False,
              help='File format of output file')
@click.option('--plugin', required=False, help='override auto-inferred plugin')
@pass_repo
@click.pass_context
def export_data(ctx, repo: Repository, column, outdir, startpoint, sample, format_, plugin):
    """Export COLUMN sample data as it existed a STARTPOINT to some format and path.

    Specifying which sample to be exported is possible by using the switch
    ``--sample`` (without this, all the samples in the given column will be
    exported). Since hangar supports both int and str datatype for the sample
    name, specifying that while mentioning the sample name might be necessary
    at times. It is possible to do that by separating the name and type by a
    colon.

    Example:

       1. if the sample name is string of numeric 10 - ``str:10`` or ``10``

       2. if the sample name is ``sample1`` - ``str:sample1`` or ``sample1``

       3. if the sample name is an int, let say 10 - ``int:10``
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head
    from hangar import external
    kwargs = parse_custom_arguments(ctx.args)

    if startpoint in repo.list_branches():
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    elif startpoint:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
    else:
        branch_name = get_staging_branch_head(repo._env.branchenv)
        base_commit = get_branch_head_commit(repo._env.branchenv, branch_name)

    co = repo.checkout(commit=base_commit)
    try:
        aset = co.columns.get(column)
        sampleNames = [sample] if sample is not None else list(aset.keys())
        extension = format_.lstrip('.') if format_ else None
        with aset, click.progressbar(sampleNames) as sNamesBar:
            for sampleN in sNamesBar:
                data = aset[sampleN]
                formated_sampleN = f'{type(sampleN).__name__}:{sampleN}'
                try:
                    external.save(data, outdir, formated_sampleN, extension, plugin, **kwargs)
                except Exception as e:
                    raise click.ClickException(e)
    except KeyError as e:
        raise click.ClickException(e)
    finally:
        co.close()


@main.command(name='view',
              context_settings=dict(allow_extra_args=True, ignore_unknown_options=True, ))
@click.argument('column', nargs=1, type=str, required=True)
@click.argument('sample', nargs=1, type=StrOrIntType(), required=True)
@click.argument('startpoint', nargs=1, default=None, required=False)
@click.option('-f', '--format', 'format_', required=False, help='File format of output file')
@click.option('--plugin', default=None, help='Plugin name to use instead of auto-inferred plugin')
@pass_repo
@click.pass_context
def view_data(ctx, repo: Repository, column, sample, startpoint, format_, plugin):
    """Use a plugin to view the data of some SAMPLE in COLUMN at STARTPOINT.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head
    from hangar import external

    kwargs = parse_custom_arguments(ctx.args)
    if startpoint in repo.list_branches():
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    elif startpoint:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
    else:
        branch_name = get_staging_branch_head(repo._env.branchenv)
        base_commit = get_branch_head_commit(repo._env.branchenv, branch_name)

    co = repo.checkout(commit=base_commit)
    try:
        aset = co.columns.get(column)
        extension = format_.lstrip('.') if format_ else None
        data = aset[sample]
        try:
            external.show(data, plugin=plugin, extension=extension, **kwargs)
        except Exception as e:
            raise click.ClickException(e)
    except KeyError as e:
        raise click.ClickException(e)
    finally:
        co.close()


# ---------------------------- Developer Utils --------------------------------


@main.command(name='db-view', hidden=True)
@click.option('-a', is_flag=True, help='display all dbs in the repository')
@click.option('-b', is_flag=True, help='display the branch/heads db')
@click.option('-r', is_flag=True, help='display the references db')
@click.option('-d', is_flag=True, help='display the data hash db')
@click.option('-s', is_flag=True, help='display the stage record db')
@click.option('-z', is_flag=True, help='display the staged hash record db')
@click.option('--limit', default=30, help='limit the amount of records displayed before truncation')
@pass_repo
def lmdb_record_details(repo: Repository, a, b, r, d, s, z, limit):
    """DEVELOPER TOOL ONLY

    Display key/value pairs making up the dbs.
    """
    from hangar.context import Environments
    from hangar.records.summarize import details
    from hangar import constants as c

    if repo._repo_path.is_dir():
        repo_path = repo._repo_path
    elif repo._repo_path.parent.joinpath(c.DIR_HANGAR_SERVER).is_dir():
        repo_path = repo._repo_path.parent.joinpath(c.DIR_HANGAR_SERVER)
    else:
        click.echo(f'NO HANGAR INSTALLATION AT PATH: {repo._repo_path.parent}')
        return

    envs = Environments(pth=repo_path)
    try:
        if a:
            b, r, d, s, z = True, True, True, True, True
        if b:
            click.echo(details(envs.branchenv, line_limit=limit).getvalue())
        if r:
            click.echo(details(envs.refenv, line_limit=limit).getvalue())
        if d:
            click.echo(details(envs.hashenv, line_limit=limit).getvalue())
        if s:
            click.echo(details(envs.stageenv, line_limit=limit).getvalue())
        if z:
            click.echo(details(envs.stagehashenv, line_limit=limit).getvalue())
    finally:
        envs._close_environments()


================================================
FILE: src/hangar/cli/utils.py
================================================
import click


class StrOrIntType(click.ParamType):
    """Custom type for click to parse the sample name
    argument to integer or string
    """

    def convert(self, value, param, ctx):
        if not value:
            return None

        try:
            stype, sample = value.split(':') if ':' in value else ('str', value)
        except ValueError:
            self.fail(f"Sample name {value} not formatted properly", param, ctx)
        try:
            if stype not in ('str', 'int'):
                self.fail(f"type {stype} is not allowed", param, ctx)
            return int(sample) if stype == 'int' else str(sample)
        except (ValueError, TypeError):
            self.fail(f"{sample} is not a valid {stype}", param, ctx)


def parse_custom_arguments(click_args: list) -> dict:
    """
    Parse all the unknown arguments from click for downstream tasks. Used in
    user plugins for custom command line arguments.

    Parameters
    ----------
    click_args : list
        Unknown arguments from click

    Returns
    -------
    parsed : dict
        Parsed arguments stored as key value pair

    Note
    -----
    Unknown arguments must be long arguments i.e should start with --
    """
    parsed = {}
    for i in range(0, len(click_args), 2):
        key = click_args[i]
        val = click_args[i + 1]
        if not key.startswith('--'):
            raise RuntimeError(f"Could not parse argument {key}. It should be prefixed with `--`")
        parsed[key[2:]] = val
    return parsed


================================================
FILE: src/hangar/columns/__init__.py
================================================
from .column import Columns, ModifierTypes
from .common import ColumnTxn
from .constructors import (
    generate_flat_column,
    generate_nested_column,
    column_type_object_from_schema
)
from .introspection import is_column, is_writer_column

__all__ = (
    'Columns',
    'ModifierTypes',
    'generate_flat_column',
    'generate_nested_column',
    'column_type_object_from_schema',
    'ColumnTxn',
    'is_column',
    'is_writer_column'
)


================================================
FILE: src/hangar/columns/column.py
================================================
"""Constructor and Interaction Class for Columns
"""
from contextlib import ExitStack
from pathlib import Path
from typing import Iterable, List, Mapping, Optional, Tuple, Union, Dict, TYPE_CHECKING

import lmdb

from .common import ColumnTxn
from .constructors import (
    generate_flat_column, generate_nested_column, column_type_object_from_schema
)
from ..records import (
    schema_db_key_from_column,
    schema_hash_db_key_from_digest,
    schema_column_record_from_db_key,
    schema_spec_from_db_val,
    dynamic_layout_data_record_db_start_range_key,
)
from ..records.queries import RecordQuery
from ..op_state import writer_checkout_only
from ..txnctx import TxnRegister

if TYPE_CHECKING:
    from .layout_flat import FlatSampleWriter
    from .layout_nested import NestedSampleWriter, FlatSubsampleWriter


ModifierTypes = Union['NestedSampleWriter', 'FlatSubsampleWriter', 'FlatSampleWriter']
KeyType = Union[str, int]


class Columns:
    """Common access patterns and initialization/removal of columns in a checkout.

    This object is the entry point to all data stored in their
    individual columns. Each column contains a common schema which dictates
    the general shape, dtype, and access patters which the backends optimize
    access for. The methods contained within allow us to create, remove, query,
    and access these collections of common data pieces.
    """

    def __init__(self,
                 mode: str,
                 repo_pth: Path,
                 columns: Dict[str, ModifierTypes],
                 hashenv: Optional[lmdb.Environment] = None,
                 dataenv: Optional[lmdb.Environment] = None,
                 stagehashenv: Optional[lmdb.Environment] = None,
                 txnctx: Optional[ColumnTxn] = None):
        """Developer documentation for init method.

        .. warning::

            This class should not be instantiated directly. Instead use the factory
            functions :py:meth:`_from_commit` or :py:meth:`_from_staging` to return
            a pre-initialized class instance appropriately constructed for either a
            read-only or write-enabled checkout.

        Parameters
        ----------
        mode : str
            one of 'r' or 'a' to indicate read or write mode
        repo_pth : Path
            path to the repository on disk
        columns : Mapping[str, Union[ArraysetDataReader, ArraysetDataWriter]]
            dictionary of ArraysetData objects
        hashenv : Optional[lmdb.Environment]
            environment handle for hash records
        dataenv : Optional[lmdb.Environment]
            environment handle for the unpacked records. `data` is means to refer to
            the fact that the stageenv is passed in for for write-enabled, and a
            cmtrefenv for read-only checkouts.
        stagehashenv : Optional[lmdb.Environment]
            environment handle for newly added staged data hash records.
        txnctx: Optional[ColumnTxn]
            class implementing context managers to handle lmdb transactions
        """
        self._stack: Optional[ExitStack] = None
        self._is_conman_counter = 0
        self._mode = mode
        self._repo_pth = repo_pth
        self._columns = columns

        self._hashenv = hashenv
        self._dataenv = dataenv
        self._stagehashenv = stagehashenv
        self._txnctx = txnctx

    def _open(self):
        for v in self._columns.values():
            v._open()

    def _close(self):
        for v in self._columns.values():
            v._close()

    def _destruct(self):
        if isinstance(self._stack, ExitStack):
            self._stack.close()
        self._close()
        for column in self._columns.values():
            column._destruct()
        for attr in list(self.__dict__.keys()):
            delattr(self, attr)

    def __getattr__(self, name):
        """Raise permission error after checkout is closed.

         Only runs after a call to :meth:`_destruct`, which is responsible
         for deleting all attributes from the object instance.
        """
        try:
            self.__getattribute__('_mode')  # once checkout is closed, this won't exist.
        except AttributeError:
            err = (f'Unable to operate on past checkout objects which have been '
                   f'closed. No operation occurred. Please use a new checkout.')
            raise PermissionError(err) from None
        return self.__getattribute__(name)

# ------------- Methods Available To Both Read & Write Checkouts ------------------

    def _repr_pretty_(self, p, cycle):
        res = f'Hangar {self.__class__.__qualname__}\
                \n    Writeable         : {False if self._mode == "r" else True}\
                \n    Number of Columns : {len(self)}\
                \n    Column Names / Partial Remote References:\
                \n      - ' + '\n      - '.join(
            f'{asetn} / {aset.contains_remote_references}'
            for asetn, aset in self._columns.items())
        p.text(res)

    def __repr__(self):
        res = f'{self.__class__}('\
              f'repo_pth={self._repo_pth}, '\
              f'columns={self._columns}, '\
              f'mode={self._mode})'
        return res

    def _ipython_key_completions_(self):
        """Let ipython know that any key based access can use the column keys

        Since we don't want to inherit from dict, nor mess with `__dir__` for
        the sanity of developers, this is the best way to ensure users can
        autocomplete keys.

        Returns
        -------
        list
            list of strings, each being one of the column keys for access.
        """
        return self.keys()

    def __getitem__(self, key: str) -> ModifierTypes:
        """Dict style access to return the column object with specified key/name.

        Parameters
        ----------
        key : string
            name of the column object to get.

        Returns
        -------
        ModifierTypes
            The object which is returned depends on the mode of checkout
            specified. If the column was checked out with write-enabled,
            return writer object, otherwise return read only object.
        """
        try:
            return self._columns[key]
        except KeyError:
            raise KeyError(f'No column exists with name: {key}')

    def __contains__(self, key: str) -> bool:
        """Determine if a column with a particular name is stored in the checkout

        Parameters
        ----------
        key : str
            name of the column to check for

        Returns
        -------
        bool
            True if a column with the provided name exists in the checkout,
            otherwise False.
        """
        return True if key in self._columns else False

    def __len__(self) -> int:
        """Get the number of column columns contained in the checkout.
        """
        return len(self._columns)

    def __iter__(self) -> Iterable[str]:
        return iter(self._columns)

    @property
    def _is_conman(self):
        return bool(self._is_conman_counter)

    def _any_is_conman(self) -> bool:
        """Determine if self or any contains column class is conman.

        Returns
        -------
        bool
            [description]
        """
        res = any([self._is_conman, *[x._is_conman for x in self._columns.values()]])
        return res

    def __enter__(self):
        with ExitStack() as stack:
            for asetN in list(self._columns.keys()):
                stack.enter_context(self._columns[asetN])
            self._is_conman_counter += 1
            self._stack = stack.pop_all()
        return self

    def __exit__(self, *exc):
        self._is_conman_counter -= 1
        self._stack.close()

    @property
    def iswriteable(self) -> bool:
        """Bool indicating if this column object is write-enabled. Read-only attribute.
        """
        return False if self._mode == 'r' else True

    @property
    def contains_remote_references(self) -> Mapping[str, bool]:
        """Dict of bool indicating data reference locality in each column.

        Returns
        -------
        Mapping[str, bool]
            For each column name key, boolean value where False indicates all
            samples in column exist locally, True if some reference remote
            sources.
        """
        res = {}
        for asetn, aset in self._columns.items():
            res[asetn] = aset.contains_remote_references
        return res

    @property
    def remote_sample_keys(self) -> Mapping[str, Iterable[Union[int, str]]]:
        """Determine columns samples names which reference remote sources.

        Returns
        -------
        Mapping[str, Iterable[Union[int, str]]]
            dict where keys are column names and values are iterables of
            samples in the column containing remote references
        """
        res = {}
        for asetn, aset in self._columns.items():
            res[asetn] = aset.remote_reference_keys
        return res

    def keys(self) -> List[str]:
        """list all column keys (names) in the checkout

        Returns
        -------
        List[str]
            list of column names
        """
        return list(self._columns.keys())

    def values(self) -> Iterable[ModifierTypes]:
        """Yield all column object instances in the checkout.

        Yields
        -------
        Iterable[ModifierTypes]
            Generator of ColumnData accessor objects (set to read or write mode
            as appropriate)
        """
        for asetN in list(self._columns.keys()):
            asetObj = self._columns[asetN]
            yield asetObj

    def items(self) -> Iterable[Tuple[str, ModifierTypes]]:
        """Generator providing access to column_name, :class:`Columns`

        Yields
        ------
        Iterable[Tuple[str, ModifierTypes]]
            returns two tuple of all all column names/object pairs in the checkout.
        """
        for asetN in list(self._columns.keys()):
            asetObj = self._columns[asetN]
            yield (asetN, asetObj)

    def get(self, name: str) -> ModifierTypes:
        """Returns a column access object.

        This can be used in lieu of the dictionary style access.

        Parameters
        ----------
        name : str
            name of the column to return

        Returns
        -------
        ModifierTypes
            ColumnData accessor (set to read or write mode as appropriate) which
            governs interaction with the data
        """
        return self[name]

    # -------------------- Writer-Enabled Methods Only ------------------------

    @writer_checkout_only
    def __delitem__(self, key: str) -> str:
        """Remove a column and all data records if write-enabled process.

        Parameters
        ----------
        key : str
            Name of the column to remove from the repository. This will remove
            all records from the staging area (though the actual data and all
            records are still accessible) if they were previously committed.

        Returns
        -------
        str
            If successful, the name of the removed column.

        Raises
        ------
        PermissionError
            If any enclosed column is opened in a connection manager.
        """
        if self._any_is_conman():
            raise PermissionError(
                'Not allowed while any columns class is opened in a context manager')
        return self.delete(key)

    @writer_checkout_only
    def delete(self, column: str) -> str:
        """Remove the column and all data contained within it.

        Parameters
        ----------
        column : str
            name of the column to remove

        Returns
        -------
        str
            name of the removed column

        Raises
        ------
        PermissionError
            If any enclosed column is opened in a connection manager.
        KeyError
            If a column does not exist with the provided name
        """
        if self._any_is_conman():
            raise PermissionError(
                'Not allowed while any columns class is opened in a context manager')

        with ExitStack() as stack:
            datatxn = TxnRegister().begin_writer_txn(self._dataenv)
            stack.callback(TxnRegister().commit_writer_txn, self._dataenv)

            if column not in self._columns:
                e = KeyError(f'Cannot remove: {column}. Key does not exist.')
                raise e from None

            column_layout = self._columns[column].column_layout
            columnSchemaKey = schema_db_key_from_column(column, layout=column_layout)
            column_record = schema_column_record_from_db_key(columnSchemaKey)
            startRangeKey = dynamic_layout_data_record_db_start_range_key(column_record)

            self._columns[column]._close()
            self._columns.__delitem__(column)
            with datatxn.cursor() as cursor:
                cursor.first()
                recordsExist = cursor.set_range(startRangeKey)
                while recordsExist:
                    k = cursor.key()
                    if k.startswith(startRangeKey):
                        recordsExist = cursor.delete()
                    else:
                        recordsExist = False
            datatxn.delete(columnSchemaKey)

        return column

    @classmethod
    def _from_staging_area(cls, repo_pth, hashenv, stageenv, stagehashenv):
        """INTERNAL USE ONLY

        Class method factory to checkout :class:`Columns` in write mode

        Once you get here, we assume the write lock verification has
        passed, and that write operations are safe to perform.

        Parameters
        ----------
        repo_pth : Path
            directory path to the hangar repository on disk
        hashenv : lmdb.Environment
            environment where tensor data hash records are open in write mode.
        stageenv : lmdb.Environment
            environment where staging records (dataenv) are opened in write mode.
        stagehashenv : lmdb.Environment
            environment where the staged hash records are stored in write mode

        Returns
        -------
        :class:`~column.Columns`
            Interface class with write-enabled attributes activate which contains
            live column data accessors in `write` mode.
        """
        columns = {}
        txnctx = ColumnTxn(stageenv, hashenv, stagehashenv)
        query = RecordQuery(stageenv)
        stagedSchemaSpecs = query.schema_specs()

        staged_col_schemas = {}
        with txnctx.read() as r_txn:
            # need to do some conversions here...
            # ref record digest -> hash db key -> schema spec dict -> schema obj
            for column_record, schema_digest_rec in stagedSchemaSpecs.items():
                hashSchemaKey = schema_hash_db_key_from_digest(schema_digest_rec.digest)
                hashSchemaVal = r_txn.hashTxn.get(hashSchemaKey)
                schema_dict = schema_spec_from_db_val(hashSchemaVal)
                schema = column_type_object_from_schema(schema_dict)
                staged_col_schemas[column_record] = schema

        for column_record, schema in staged_col_schemas.items():
            if column_record.layout == 'nested':
                column = generate_nested_column(
                    txnctx=txnctx, column_name=column_record.column,
                    path=repo_pth, schema=schema, mode='a')
            else:
                column = generate_flat_column(
                    txnctx=txnctx, column_name=column_record.column,
                    path=repo_pth, schema=schema, mode='a')
            columns[column_record.column] = column

        return cls(mode='a',
                   repo_pth=repo_pth,
                   columns=columns,
                   hashenv=hashenv,
                   dataenv=stageenv,
                   stagehashenv=stagehashenv,
                   txnctx=txnctx)

    @classmethod
    def _from_commit(cls, repo_pth, hashenv, cmtrefenv):
        """INTERNAL USE ONLY

        Class method factory to checkout :class:`.Columns` in read-only mode

        For read mode, no locks need to be verified, but construction should
        occur through this interface only.

        Parameters
        ----------
        repo_pth : Path
            directory path to the hangar repository on disk
        hashenv : lmdb.Environment
            environment where tensor data hash records are open in read-only mode.
        cmtrefenv : lmdb.Environment
            environment where staging checkout records are opened in read-only mode.

        Returns
        -------
        :class:`~column.Columns`
            Interface class with write-enabled attributes deactivated which
            contains live column data accessors in `read-only` mode.
        """
        columns = {}
        txnctx = ColumnTxn(cmtrefenv, hashenv, None)
        query = RecordQuery(cmtrefenv)
        cmtSchemaSpecs = query.schema_specs()

        cmt_col_schemas = {}
        with txnctx.read() as r_txn:
            # need to do some conversions here...
            # ref record digest -> hash db key -> schema spec dict -> schema obj
            for column_record, schema_digest_rec in cmtSchemaSpecs.items():
                hashSchemaKey = schema_hash_db_key_from_digest(schema_digest_rec.digest)
                hashSchemaVal = r_txn.hashTxn.get(hashSchemaKey)
                schema_dict = schema_spec_from_db_val(hashSchemaVal)
                schema = column_type_object_from_schema(schema_dict)
                cmt_col_schemas[column_record] = schema

        for column_record, schema in cmt_col_schemas.items():
            if column_record.layout == 'nested':
                column = generate_nested_column(
                    txnctx=txnctx, column_name=column_record.column,
                    path=repo_pth, schema=schema, mode='r')
            else:
                column = generate_flat_column(
                    txnctx=txnctx, column_name=column_record.column,
                    path=repo_pth, schema=schema, mode='r')
            columns[column_record.column] = column

        return cls(mode='r',
                   repo_pth=repo_pth,
                   columns=columns,
                   hashenv=None,
                   dataenv=None,
                   stagehashenv=None,
                   txnctx=None)


================================================
FILE: src/hangar/columns/common.py
================================================
from contextlib import contextmanager
from typing import Optional

import lmdb

from ..txnctx import TxnRegister


class ColumnTxn(object):
    """Provides context manager ready methods to handle lmdb transactions.

    In order to prevent passing around lmdb.Environment objects, we instantiate
    this class once for each column column and pass weakref proxy handels
    around to reference this object. Calling open / close methods (or using the
    ``with`` style methods) initializes transactions for the appropraite
    environments which are stored in instance attributes for access by the
    caller.
    """

    __slots__ = ('stagehashenv', 'dataenv', 'hashenv', 'hashTxn',
                 'dataTxn', 'stageHashTxn', '_TxnRegister', '__weakref__')

    def __init__(self, dataenv, hashenv, stagehashenv):

        self._TxnRegister = TxnRegister()
        self.stagehashenv = stagehashenv
        self.dataenv = dataenv
        self.hashenv = hashenv

        self.hashTxn: Optional[lmdb.Transaction] = None
        self.dataTxn: Optional[lmdb.Transaction] = None
        self.stageHashTxn: Optional[lmdb.Transaction] = None

    @property
    def _debug_(self):  # pragma: no cover
        return {
            f'__class__': self.__class__,
            f'_TxnRegister': self._TxnRegister._debug_,
            f'dataenv': self.dataenv,
            f'hashenv': self.hashenv,
            f'hashTxn': self.hashTxn,
            f'dataTxn': self.dataTxn,
            f'stageHashTxn': self.stageHashTxn,
        }

    def open_read(self):
        """Manually open read-only transactions, caller responsible for closing.
        """
        self.hashTxn = self._TxnRegister.begin_reader_txn(self.hashenv)
        self.dataTxn = self._TxnRegister.begin_reader_txn(self.dataenv)
        return self

    def close_read(self):
        """Manually close read-only transactions, must be called after manual open.
        """
        self.hashTxn = self._TxnRegister.abort_reader_txn(self.hashenv)
        self.dataTxn = self._TxnRegister.abort_reader_txn(self.dataenv)

    def open_write(self):
        """Manually open write-enabled transactions, caller responsible for closing.
        """
        self.hashTxn = self._TxnRegister.begin_writer_txn(self.hashenv)
        self.dataTxn = self._TxnRegister.begin_writer_txn(self.dataenv)
        self.stageHashTxn = self._TxnRegister.begin_writer_txn(self.stagehashenv)
        return self

    def close_write(self):
        """Manually close write-enabled transactions, must be called after manual open.
        """
        self.hashTxn = self._TxnRegister.commit_writer_txn(self.hashenv)
        self.dataTxn = self._TxnRegister.commit_writer_txn(self.dataenv)
        self.stageHashTxn = self._TxnRegister.commit_writer_txn(self.stagehashenv)

    @contextmanager
    def read(self):
        """Use ``with`` style context manager to open read-only transaction.

        Transaction is automatically closed for the caller irregardless of any
        application exceptions.
        """
        try:
            yield self.open_read()
        finally:
            self.close_read()

    @contextmanager
    def write(self):
        """Use ``with`` style context manager to open write-enabled transaction.

        Transaction is automatically closed for the caller irregardless of any
        application exceptions.
        """
        try:
            yield self.open_write()
        finally:
            self.close_write()


def open_file_handles(backends, path, mode, schema, *, remote_operation=False):
    """Open backend accessor file handles for reading

    Parameters
    ----------
    backends : Set[str]
        if ``mode == 'r'`` then this should be the used backend format
        codes in the column. if ``mode == 'a'``, then this should be a
        list of the allowed backend format codes this schema can feasably
        write to.
    path : Path
        path to the hangar repository on disk
    mode : str
        one of ['r', 'a'] indicating read or write mode to open backends in.
    schema : ColumnDefinitionTypes
        schema spec so required values can be filled in to backend openers.

    Returns
    -------
    AccessorMapType
        dict mapping backend format codes to initialized instances of each
        read-only backend.
    """
    from ..backends import BACKEND_ACCESSOR_MAP

    fhandles = {}
    for be, accessor in BACKEND_ACCESSOR_MAP.items():
        if be in backends:
            if accessor is None:
                continue

            init_requires = schema._beopts.init_requires
            # TODO rework names for this hack
            kwargs = {}
            for arg in init_requires:
                if arg == 'repo_path':
                    kwargs[arg] = path
                elif arg == 'schema_shape':
                    kwargs[arg] = schema.shape
                elif arg == 'schema_dtype':
                    kwargs[arg] = schema.dtype

            fhandles[be] = accessor(**kwargs)
            fhandles[be].open(mode=mode, remote_operation=remote_operation)

    if mode == 'a':
        if schema.backend in fhandles:
            fhandles[schema.backend].backend_opts = schema.backend_options
    return fhandles


================================================
FILE: src/hangar/columns/constructors.py
================================================
"""Constructors for initializing FlatSampleReader and NestedSampleReader columns
"""
import warnings
from _weakref import proxy
from collections import defaultdict
from typing import Union

from wrapt import ObjectProxy

from .common import open_file_handles
from .layout_flat import FlatSampleReader, FlatSampleWriter
from .layout_nested import (
    FlatSubsampleReader, FlatSubsampleWriter,
    NestedSampleReader, NestedSampleWriter,
)
from ..records.queries import RecordQuery
from ..records import hash_data_db_key_from_raw_key
from ..typesystem import (
    NdarrayFixedShape,
    NdarrayVariableShape,
    StringVariableShape,
    BytesVariableShape
)
from ..backends import BACKEND_IS_LOCAL_MAP, backend_decoder


# --------------- methods common to all column layout types -------------------


KeyType = Union[str, int]

_column_definitions = (
    NdarrayVariableShape,
    NdarrayFixedShape,
    StringVariableShape,
    BytesVariableShape
)


def column_type_object_from_schema(schema: dict):
    for c in _column_definitions:
        try:
            instance = c(**schema)
            return instance
        except (TypeError, ValueError) as e:
            pass
    else:  # N.B. for-else loop (ie. "no-break")
        raise ValueError(f'Could not instantiate column schema object for {schema}')


def _warn_remote(aset_name):
    warnings.warn(
        f'Column: {aset_name} contains `reference-only` samples, with '
        f'actual data residing on a remote server. A `fetch-data` '
        f'operation is required to access these samples.', UserWarning)


# --------- FlatSampleReader constructor metaclass / setup methods ------------------


def _flat_load_sample_keys_and_specs(column_name, txnctx):
    """Load flat sample key / backend location mapping info memory.

    Parameters
    ----------
    column_name: str
        name of the column to load.
    txnctx: ColumnTxn
        transaction context object used to access commit ref info on disk

    Returns
    -------
    Tuple[FlatSampleMapType, Set[str]]
        First element is single level dictionary mapping sample key to backend
        location. Second element is set of all unique backends encountered
        for every data pice in the column.
    """
    seen_bes = set()
    sspecs = {}
    with txnctx.read() as ctx:
        hashTxn = ctx.hashTxn
        asetNamesSpec = RecordQuery(ctx.dataenv).column_data_records(column_name)
        for asetNames, dataSpec in asetNamesSpec:
            hashKey = hash_data_db_key_from_raw_key(dataSpec.digest)
            hash_ref = hashTxn.get(hashKey)
            be_loc = backend_decoder(hash_ref)
            sspecs[asetNames.sample] = be_loc
    seen_bes.update((spc.backend for spc in sspecs.values()))
    return (sspecs, seen_bes)


def generate_flat_column(txnctx, column_name, path, schema, mode):
    """Generate instance ready structures for read-only checkouts

    Parameters
    ----------
    txnctx : ColumnTxn
        transaction context object used to access commit ref info on disk
    column_name : str
        name of the column that the reader constructors are being
        generated for
    path : Path
        path to the repository on disk
    schema : ColumnDefinitionTypes
        schema definition of the column.
    mode: str
        read-only or write-enabled mode. one of ['a', 'r'].

    Returns
    -------
    :class:`~.flat.FlatSampleReader`
        Top level column accessor classes fully initialized for requested
        state. initailized structures defining and initializing access to
        the sample data on disk.
    """
    sspecs, bes = _flat_load_sample_keys_and_specs(column_name, txnctx)
    if not all([BACKEND_IS_LOCAL_MAP[be] for be in bes]):
        _warn_remote(column_name)
    if mode == 'a':
        bes.add(schema.backend)
    file_handles = open_file_handles(backends=bes, path=path, mode=mode, schema=schema)

    if mode == 'r':
        res = FlatSampleReader(columnname=column_name,
                               samples=sspecs,
                               backend_handles=file_handles,
                               schema=schema,
                               repo_path=path,
                               mode=mode)
    elif mode == 'a':
        res = FlatSampleWriter(aset_ctx=txnctx,
                               columnname=column_name,
                               samples=sspecs,
                               backend_handles=file_handles,
                               schema=schema,
                               repo_path=path,
                               mode=mode)
    else:
        raise ValueError(f'mode {mode} is not valid.')

    return res


# --------- NestedSampleReader constructor metaclass / setup methods ----------------


def _nested_load_sample_keys_and_specs(column_name, txnctx):
    """Load nested sample/subsample keys and backend location into memory from disk.

    Parameters
    ----------
    column_name : str
        name of the column to load.
    txnctx : ColumnTxn
        transaction context object used to access commit ref info on disk

    Returns
    -------
    Tuple[NestedSampleMapType, Set[str]]
        First element is nested dictionary where each sample name maps to
        subsample contents dict (associating subsample names with backend
        locations). Second element is set of all unique backends encountered
        for every data pice in the column.
    """
    seen_bes = set()
    sspecs = defaultdict(dict)
    with txnctx.read() as ctx:
        hashTxn = ctx.hashTxn
        asetNamesSpec = RecordQuery(ctx.dataenv).column_data_records(column_name)
        for asetNames, dataSpec in asetNamesSpec:
            hashKey = hash_data_db_key_from_raw_key(dataSpec.digest)
            hash_ref = hashTxn.get(hashKey)
            be_loc = backend_decoder(hash_ref)
            sspecs[asetNames.sample].update({asetNames.subsample: be_loc})
            seen_bes.add(be_loc.backend)
    return (sspecs, seen_bes)


def generate_nested_column(txnctx, column_name, path, schema, mode):
    """Generate instance ready structures for read-only checkouts

    Parameters
    ----------
    txnctx : ColumnTxn
        transaction context object used to access commit ref info on disk
    column_name : str
        name of the column that the reader constructors are being
        generated for
    path : Path
        path to the repository on disk
    schema : ColumnDefinitionTypes
        schema definition of the column.
    mode: str
    read-only or write-enabled mode. one of ['a', 'r'].

    Returns
    -------
    :class:`~.nested.NestedSampleReader`
        Top level column accessor classes fully initialized for requested
        state. Initailized structures defining and initializing access to
        the subsample data on disk.
    """
    specs, bes = _nested_load_sample_keys_and_specs(column_name, txnctx)
    if not all([BACKEND_IS_LOCAL_MAP[be] for be in bes]):
        _warn_remote(column_name)
    if mode == 'a':
        bes.add(schema.backend)
    fhand = open_file_handles(backends=bes, path=path, mode=mode, schema=schema)
    samples = {}
    schema_proxy = proxy(schema)
    fhand['enter_count'] = 0

    if mode == 'r':
        for samp, subspecs in specs.items():
            samples[samp] = FlatSubsampleReader(
                columnname=column_name,
                samplen=samp,
                be_handles=fhand,
                specs=subspecs,
                mode='r')
        res = NestedSampleReader(
            columnname=column_name,
            samples=samples,
            backend_handles=fhand,
            repo_path=path,
            mode='r',
            schema=schema)
    elif mode == 'a':
        fhand = ObjectProxy(fhand)
        fhand_proxy = proxy(fhand)
        for samp, subspecs in specs.items():
            samples[samp] = FlatSubsampleWriter(
                schema=schema_proxy,
                aset_ctx=proxy(txnctx),
                repo_path=path,
                columnname=column_name,
                samplen=samp,
                be_handles=fhand_proxy,
                specs=subspecs,
                mode='a')
        res = NestedSampleWriter(
            aset_ctx=txnctx,
            columnname=column_name,
            samples=samples,
            backend_handles=fhand,
            schema=schema,
            repo_path=path,
            mode='a')
    else:
        raise ValueError(f'mode {mode} is not valid.')

    return res


================================================
FILE: src/hangar/columns/introspection.py
================================================
from .layout_flat import FlatSampleReader, FlatSampleWriter
from .layout_nested import (
    FlatSubsampleReader,
    FlatSubsampleWriter,
    NestedSampleReader,
    NestedSampleWriter
)


def is_column(obj) -> bool:
    """Determine if arbitrary input is an instance of a column layout.

    Returns
    -------
    bool: True if input is an column, otherwise False.
    """
    return isinstance(obj, (FlatSampleReader, FlatSubsampleReader, NestedSampleReader))


def is_writer_column(obj) -> bool:
    """Determine if arbitrary input is an instance of a write-enabled column layout.

    Returns
    -------
    bool: True if input is write-enabled column, otherwise False.
    """
    return isinstance(obj, (FlatSampleWriter, FlatSubsampleWriter, NestedSampleWriter))


================================================
FILE: src/hangar/columns/layout_flat.py
================================================
"""Accessor class for columns containing single-level key/value mappings

The FlatSampleReader container is used to store data (in any backend) in a column
containing a single level key/value mapping from names/ids to data.

All backends are supported.
"""
from contextlib import ExitStack
from pathlib import Path
from operator import attrgetter as op_attrgetter
from typing import Tuple, Union, Iterable, Optional, Any

from .common import open_file_handles
from ..records import (
    data_record_db_val_from_digest,
    data_record_digest_val_from_db_val,
    flat_data_db_key_from_names,
    hash_data_db_key_from_raw_key,
    schema_db_key_from_column,
    schema_hash_db_key_from_digest,
    schema_hash_record_db_val_from_spec,
    schema_record_db_val_from_digest
)
from ..records.parsing import generate_sample_name
from ..backends import backend_decoder
from ..op_state import reader_checkout_only
from ..utils import is_suitable_user_key
from ..optimized_utils import valfilter, valfilterfalse


KeyType = Union[str, int]


class FlatSampleReader:
    """Class implementing get access to data in a column.

    This class exposes the standard API to access data stored in a single level
    key / value mapping column. Usage is modeled after the python :class:`dict`
    style syntax -- with a few additional utility and inspection methods and
    properties added. Methods named after those of a python :class:`dict` have
    syntactically identical arguments and behavior to that of the standard
    library.

    If not opened in a ``write-enabled`` checkout, then attempts to add or
    delete data or container properties will raise an exception (in the form of
    a :class:`PermissionError`). No changes will be propogated unless a
    ``write-enabled`` checkout is used.

    This object can be serialized -- pickled -- for parallel processing /
    reading if opened in a ``read-only`` checkout. Parallel operations are both
    thread and process safe, though performance may significantly differ
    between multithreaded vs multiprocessed code (depending on the backend data
    is stored in). Attempts to serialize objects opened in ``write-enabled``
    checkouts are not supported and will raise a :class:`PermissionError` if
    attempted. This behavior is enforced in order to ensure data and record
    integrity while writing to the repository.
    """

    __slots__ = ('_mode', '_column_name', '_samples', '_be_fs',
                 '_path', '_stack', '_enter_count', '_schema')
    _attrs = __slots__

    def __init__(self,
                 columnname: str,
                 samples,
                 backend_handles,
                 schema,
                 repo_path: Path,
                 mode: str,
                 *args, **kwargs):

        self._stack: Optional[ExitStack] = None
        self._mode = mode
        self._column_name = columnname
        self._samples = samples
        self._be_fs = backend_handles
        self._path = repo_path
        self._schema = schema
        self._enter_count = 0

    @property
    def _debug_(self):  # pragma: no cover
        return {
            '__class__': self.__class__,
            '_mode': self._mode,
            '_column_name': self._column_name,
            '_be_fs': self._be_fs,
            '_path': self._path,
            '_contains_subsamples': self.contains_subsamples,
            '_stack': self._stack._exit_callbacks if self._stack else self._stack,
            '_enter_count': self._enter_count,
        }

    def __repr__(self):
        res = (
            f'{self.__class__.__qualname__}('
            f'repo_pth={self._path}, '
            f'aset_name={self._column_name}, '
            f"{[f'{key}={val}, ' for key, val in self._schema.schema.items()]}, "
            f'mode={self._mode})')
        return res

    def _repr_pretty_(self, p, cycle):
        res = f'Hangar {self.__class__.__qualname__} \
                \n    Column Name              : {self._column_name}\
                \n    Writeable                : {self.iswriteable}\
                \n    Column Type              : {self.column_type}\
                \n    Column Layout            : {self.column_layout}\
                \n    Schema Type              : {self.schema_type}\
                \n    DType                    : {self.dtype}\
                \n    Shape                    : {self.shape}\
                \n    Number of Samples        : {self.__len__()}\
                \n    Partial Remote Data Refs : {bool(self.contains_remote_references)}\n'
        p.text(res)

    def _ipython_key_completions_(self):  # pragma: no cover
        """Let ipython know that any key based access can use the column keys

        Since we don't want to inherit from dict, nor mess with `__dir__` for
        the sanity of developers, this is the best way to ensure users can
        autocomplete keys.

        Returns
        -------
        list
            list of strings, each being one of the column keys for access.
        """
        return list(self.keys())

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        return {slot: getattr(self, slot) for slot in self.__slots__}

    def __setstate__(self, state: dict) -> None:
        """ensure multiprocess operations can pickle relevant data.

        Technically should be decorated with @reader_checkout_only, but since
        at instance creation that is not an attribute, the decorator won't
        know. Since only readers can be pickled, This isn't much of an issue.
        """
        for slot, value in state.items():
            setattr(self, slot, value)

    def __enter__(self):
        return self

    def __exit__(self, *exc):
        return

    def _destruct(self):
        if isinstance(self._stack, ExitStack):
            self._stack.close()
        self._close()
        for attr in self._attrs:
            delattr(self, attr)

    def __getattr__(self, name):
        """Raise permission error after checkout is closed.

         Only runs after a call to :meth:`_destruct`, which is responsible for
         deleting all attributes from the object instance.
        """
        try:
            self.__getattribute__('_mode')  # once checkout is closed, this won't exist.
        except AttributeError:
            err = (f'Unable to operate on past checkout objects which have been '
                   f'closed. No operation occurred. Please use a new checkout.')
            raise PermissionError(err) from None
        return self.__getattribute__(name)

    @property
    def _is_conman(self) -> bool:
        return bool(self._enter_count)

    def __iter__(self) -> Iterable[KeyType]:
        """Create iterator yielding an column sample keys.

        Yields
        -------
        Iterable[KeyType]
            Sample key contained in the column.
        """
        yield from self.keys()

    def __len__(self) -> int:
        """Check how many samples are present in a given column.
        """
        return len(self._samples)

    def __contains__(self, key: KeyType) -> bool:
        """Determine if a key is a valid sample name in the column.
        """
        return key in self._samples

    def _open(self):
        for val in self._be_fs.values():
            val.open(mode=self._mode)

    def _close(self):
        for val in self._be_fs.values():
            val.close()

    def __getitem__(self, key: KeyType):
        """Retrieve data for some sample key via dict style access conventions.

        .. seealso:: :meth:`get`

        Parameters
        ----------
        key : KeyType
            Sample key to retrieve from the column.

        Returns
        -------
        value
            Data corresponding to the provided sample key.

        Raises
        ------
        KeyError
            if no sample with the requested key exists.
        """
        spec = self._samples[key]
        return self._be_fs[spec.backend].read_data(spec)

    def get(self, key: KeyType, default=None):
        """Retrieve the data associated with some sample key

        Parameters
        ----------
        key : KeyType
            The name of the subsample(s) to retrieve. Passing a single
            subsample key will return the stored data value.
        default : Any
            if a `key` parameter is not found, then return this value instead.
            By default, None.

        Returns
        -------
        value
            data data stored under subsample key if key exists, else
            default value if not found.
        """
        try:
            return self[key]
        except KeyError:
            return default

    @property
    def column(self) -> str:
        """Name of the column.
        """
        return self._column_name

    @property
    def column_type(self):
        """Data container type of the column ('ndarray', 'str', etc).
        """
        return self._schema.column_type

    @property
    def column_layout(self):
        """Column layout type ('nested', 'flat', etc).
        """
        return self._schema.column_layout

    @property
    def schema_type(self):
        """Schema type of the contained data ('variable_shape', 'fixed_shape', etc).
        """
        return self._schema.schema_type

    @property
    def dtype(self):
        """Dtype of the columns data (np.float, str, etc).
        """
        return self._schema.dtype

    @property
    def shape(self):
        """(Max) shape of data that can (is) written in the column.
        """
        try:
            return self._schema.shape
        except AttributeError:
            return None

    @property
    def backend(self) -> str:
        """Code indicating which backing store is used when writing data.
        """
        return self._schema.backend

    @property
    def backend_options(self):
        """Filter / Compression options applied to backend when writing data.
        """
        return self._schema.backend_options

    @property
    def iswriteable(self) -> bool:
        """Bool indicating if this column object is write-enabled.
        """
        return False if self._mode == 'r' else True

    @property
    def contains_subsamples(self) -> bool:
        """Bool indicating if sub-samples are contained in this column container.
        """
        return False

    @property
    def contains_remote_references(self) -> bool:
        """Bool indicating if all samples in column exist on local disk.

        The data associated with samples referencing some remote server will
        need to be downloaded (``fetched`` in the hangar vocabulary) before
        they can be read into memory.

        Returns
        -------
        bool
            False if at least one sample in the column references data stored
            on some remote server. True if all sample data is available on the
            machine's local disk.
        """
        _islocal_func = op_attrgetter('islocal')
        return not all(map(_islocal_func, self._samples.values()))

    @property
    def remote_reference_keys(self) -> Tuple[KeyType]:
        """Compute sample names whose data is stored in a remote server reference.

        Returns
        -------
        Tuple[KeyType]
            list of sample keys in the column whose data references indicate
            they are stored on a remote server.
        """
        _islocal_func = op_attrgetter('islocal')
        return tuple(valfilterfalse(_islocal_func, self._samples).keys())

    def _mode_local_aware_key_looper(self, local: bool) -> Iterable[KeyType]:
        """Generate keys for iteration with dict update safety ensured.

        Parameters
        ----------
        local : bool
            True if keys should be returned which only exist on the local machine.
            Fale if remote sample keys should be excluded.

        Returns
        -------
        Iterable[KeyType]
            Sample keys conforming to the `local` argument spec.
        """
        _islocal_func = op_attrgetter('islocal')
        if local:
            if self._mode == 'r':
                yield from valfilter(_islocal_func, self._samples).keys()
            else:
                yield from tuple(valfilter(_islocal_func, self._samples).keys())
        else:
            if self._mode == 'r':
                yield from self._samples.keys()
            else:
                yield from tuple(self._samples.keys())

    def keys(self, local: bool = False) -> Iterable[KeyType]:
        """Generator yielding the name (key) of every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned keys will only correspond to data which is
            available for reading on the local disk, by default False.

        Yields
        ------
        Iterable[KeyType]
            Keys of one subsample at a time inside the sample.
        """
        yield from self._mode_local_aware_key_looper(local)

    def values(self, local: bool = False) -> Iterable[Any]:
        """Generator yielding the data for every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned values will only correspond to data which is
            available for reading on the local disk. No attempt will be made to
            read data existing on a remote server, by default False.

        Yields
        ------
        Iterable[Any]
            Values of one subsample at a time inside the sample.
        """
        for key in self._mode_local_aware_key_looper(local):
            yield self[key]

    def items(self, local: bool = False) -> Iterable[Tuple[KeyType, Any]]:
        """Generator yielding (name, data) tuple for every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned keys/values will only correspond to data which is
            available for reading on the local disk, No attempt will be made to
            read data existing on a remote server, by default False.

        Yields
        ------
        Iterable[Tuple[KeyType, Any]]
            Name and stored value for every subsample inside the sample.
        """
        for key in self._mode_local_aware_key_looper(local):
            yield (key, self[key])

# ---------------- writer methods only after this point -------------------


class FlatSampleWriter(FlatSampleReader):

    __slots__ = ('_txnctx',)
    _attrs = __slots__ + FlatSampleReader.__slots__

    def __init__(self, aset_ctx, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._txnctx = aset_ctx

    def __enter__(self):
        with ExitStack() as stack:
            self._txnctx.open_write()
            stack.callback(self._txnctx.close_write)
            if self._enter_count == 0:
                for k in self._be_fs.keys():
                    stack.enter_context(self._be_fs[k])
            self._enter_count += 1
            self._stack = stack.pop_all()
        return self

    def __exit__(self, *exc):
        self._stack.close()
        self._enter_count -= 1

    def _set_arg_validate(self, key, value):
        """Verify if key / value pair is valid to be written in this column

        Parameters
        ----------
        key
            name to associate with this data piece
        value
            piece of data to store in the column

        Raises
        ------
        ValueError
            If key is not valid type/contents or if value is not correct object
            type / if it does not conform to column schema
        """
        if not is_suitable_user_key(key):
            raise ValueError(f'Sample name `{key}` is not suitable.')

        isCompat = self._schema.verify_data_compatible(value)
        if not isCompat.compatible:
            raise ValueError(isCompat.reason)

    def _perform_set(self, key, value):
        """Internal write method. Assumes all arguments validated and context is open

        Parameters
        ----------
        key
            sample key to store
        value
            data to store
        """
        full_hash = self._schema.data_hash_digest(value)

        hashKey = hash_data_db_key_from_raw_key(full_hash)
        # check if data record already exists with given key
        dataRecKey = flat_data_db_key_from_names(self._column_name, key)
        existingDataRecVal = self._txnctx.dataTxn.get(dataRecKey, default=False)
        if existingDataRecVal:
            # check if data record already with same key & hash value
            existingDataRec = data_record_digest_val_from_db_val(existingDataRecVal)
            if full_hash == existingDataRec.digest:
                return

        # write new data if data hash does not exist
        existingHashVal = self._txnctx.hashTxn.get(hashKey, default=False)
        if existingHashVal is False:
            hashVal = self._be_fs[self._schema.backend].write_data(value)
            self._txnctx.hashTxn.put(hashKey, hashVal)
            self._txnctx.stageHashTxn.put(hashKey, hashVal)
            hash_spec = backend_decoder(hashVal)
        else:
            hash_spec = backend_decoder(existingHashVal)
            if hash_spec.backend not in self._be_fs:
                # when adding data which is already stored in the repository, the
                # backing store for the existing data location spec may not be the
                # same as the backend which the data piece would have been saved in here.
                #
                # As only the backends actually referenced by a columns samples are
                # initialized (accessible by the column), there is no guarantee that
                # an accessor exists for such a sample. In order to prevent internal
                # errors from occurring due to an uninitialized backend if a previously
                # existing data piece is "saved" here and subsequently read back from
                # the same writer checkout, we perform an existence check and backend
                # initialization, if appropriate.
                fh = open_file_handles(backends=(hash_spec.backend,),
                                       path=self._path,
                                       mode='a',
                                       schema=self._schema)
                self._be_fs[hash_spec.backend] = fh[hash_spec.backend]

        # add the record to the db
        dataRecVal = data_record_db_val_from_digest(full_hash)
        self._txnctx.dataTxn.put(dataRecKey, dataRecVal)
        self._samples[key] = hash_spec

    def __setitem__(self, key, value):
        """Store a piece of data in a column.

        .. seealso::

            :meth:`update` for an implementation analogous to python's built in
            :meth:`dict.update` method which accepts a dict or iterable of
            key/value pairs to add in the same operation.

        Parameters
        ----------
        key
            name to assign to the sample (assuming the column accepts named
            samples), If str, can only contain alpha-numeric ascii characters
            (in addition to '-', '.', '_'). Integer key must be >= 0. by
            default, None
        value
            data to store as a sample in the column.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)
            self._set_arg_validate(key, value)
            self._perform_set(key, value)

    def append(self, value) -> KeyType:
        """Store some data in a sample with an automatically generated key.

        This method should only be used if the context some piece of data is
        used in is independent from its value (i.e. when reading data back,
        there is no useful information which needs to be conveyed between the
        data source's name/id and the value of that piece of information.)
        Think carefully before going this route, as this posit does not apply
        to many common use cases.

        To store the data with a user defined key, use :meth:`update` or
        :meth:`__setitem__`

        Parameters
        ----------
        value
            Piece of data to store in the column.

        Returns
        -------
        KeyType
            Name of the generated key this data is stored with.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)
            key = generate_sample_name()
            while key in self._samples:
                key = generate_sample_name()
            self._set_arg_validate(key, value)
            self._perform_set(key, value)
            return key

    def update(self, other=None, **kwargs):
        """Store some data with the key/value pairs from other, overwriting existing keys.

        :meth:`update` implements functionality similar to python's builtin
        :meth:`dict.update` method, accepting either a dictionary or other
        iterable (of length two) listing key / value pairs.

        Parameters
        ----------
        other
            Accepts either another dictionary object or an iterable of
            key/value pairs (as tuples or other iterables of length two).
            mapping sample names to data value instances instances, If sample
            name is string type, can only contain alpha-numeric ascii
            characters (in addition to '-', '.', '_'). Int key must be >= 0. By
            default, None.
        **kwargs
            keyword arguments provided will be saved with keywords as sample keys
            (string type only) and values as np.array instances.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            if other:
                if not isinstance(other, dict):
                    other = dict(other)
                else:
                    other = other.copy()
            elif other is None:
                other = {}
            if kwargs:
                # we have to merge kwargs dict with `other` before operating on
                # either so all validation and writing occur atomically
                other.update(kwargs)

            for key, val in other.items():
                self._set_arg_validate(key, val)
            for key, val in other.items():
                self._perform_set(key, val)

    def __delitem__(self, key: KeyType) -> None:
        """Remove a sample from the column. Convenience method to :meth:`delete`.

        .. seealso::

            :meth:`pop` to return a value and then delete it in the same operation

        Parameters
        ----------
        key : KeyType
            Name of the sample to remove from the column.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            if key not in self._samples:
                raise KeyError(key)

            dataKey = flat_data_db_key_from_names(self._column_name, key)
            isRecordDeleted = self._txnctx.dataTxn.delete(dataKey)
            if isRecordDeleted is False:
                raise RuntimeError(
                    f'Internal error. Not able to delete key {key} from staging '
                    f'db even though existance passed in memory verification. '
                    f'Please report this message in full to the hangar development team.',
                    f'Specified key: <{type(key)} {key}>', f'Calculated dataKey: <{dataKey}>',
                    f'isRecordDeleted: <{isRecordDeleted}>', f'DEBUG STRING: {self._debug_}')
            del self._samples[key]

    def pop(self, key: KeyType):
        """Retrieve some value for some key(s) and delete it in the same operation.

        Parameters
        ----------
        key : KeysType
            Sample key to remove

        Returns
        -------
        value
            Upon success, the value of the removed key.

        Raises
        ------
        KeyError
            If there is no sample with some key in the column.
        """
        value = self[key]
        del self[key]
        return value

    def change_backend(self, backend: str, backend_options: Optional[dict] = None):
        """Change the default backend and filters applied to future data writes.

        .. warning::

           This method is meant for advanced users only. Please refer to the
           hangar backend codebase for information on accepted parameters and
           options.

        Parameters
        ----------
        backend : str
            Backend format code to swtich to.
        backend_options : Optional[dict]
            Backend option specification to use (if specified). If left to
            default value of None, then default options for backend are
            automatically used.

        Raises
        ------
        RuntimeError
            If this method was called while this column is invoked in a
            context manager
        ValueError
            If the backend format code is not valid.
        """
        if self._is_conman:
            raise RuntimeError('Cannot call method inside column context manager.')

        self._schema.change_backend(backend, backend_options=backend_options)

        new_schema_digest = self._schema.schema_hash_digest()
        columnSchemaKey = schema_db_key_from_column(self._column_name, layout=self.column_layout)
        columnSchemaVal = schema_record_db_val_from_digest(new_schema_digest)
        hashSchemaKey = schema_hash_db_key_from_digest(new_schema_digest)
        hashSchemaVal = schema_hash_record_db_val_from_spec(self._schema.schema)

        # -------- set vals in lmdb only after schema is sure to exist --------

        with self._txnctx.write() as ctx:
            ctx.dataTxn.put(columnSchemaKey, columnSchemaVal)
            ctx.hashTxn.put(hashSchemaKey, hashSchemaVal, overwrite=False)

        new_backend = self._schema.backend
        if new_backend not in self._be_fs:
            fhands = open_file_handles(
                backends=[new_backend],
                path=self._path,
                mode='a',
                schema=self._schema)
            self._be_fs[new_backend] = fhands[new_backend]
        else:
            self._be_fs[new_backend].close()
        self._be_fs[new_backend].open(mode='a')
        self._be_fs[new_backend].backend_opts = self._schema.backend_options
        return


================================================
FILE: src/hangar/columns/layout_nested.py
================================================
"""Accessor column containing nested mapping of data under top level keys.
"""
from contextlib import ExitStack
from pathlib import Path
from typing import (
    Tuple, Union, Dict, Iterable, Any, Optional
)
from operator import attrgetter as op_attrgetter
from operator import getitem as op_getitem
from weakref import proxy
from functools import reduce

from .common import open_file_handles
from ..records import (
    data_record_db_val_from_digest,
    data_record_digest_val_from_db_val,
    nested_data_db_key_from_names,
    hash_data_db_key_from_raw_key,
    schema_db_key_from_column,
    schema_hash_db_key_from_digest,
    schema_hash_record_db_val_from_spec,
    schema_record_db_val_from_digest,
)
from ..records.parsing import generate_sample_name
from ..backends import backend_decoder, BACKEND_ACCESSOR_MAP
from ..op_state import reader_checkout_only
from ..utils import is_suitable_user_key
from ..optimized_utils import valfilter, valfilterfalse


KeyType = Union[str, int]
EllipsisType = type(Ellipsis)
SubsampleGetKeysType = Union[KeyType, EllipsisType, slice]
SampleGetKeysType = Union[KeyType, Tuple[KeyType, SubsampleGetKeysType]]


class FlatSubsampleReader(object):

    __slots__ = ('_column_name', '_stack', '_be_fs',
                 '_mode', '_subsamples', '_samplen')
    _attrs = __slots__

    def __init__(self,
                 columnname: str,
                 samplen: str,
                 be_handles: BACKEND_ACCESSOR_MAP,
                 specs,
                 mode: str,
                 *args, **kwargs):

        self._column_name = columnname
        self._samplen = samplen
        self._be_fs = be_handles
        self._subsamples = specs
        self._mode = mode
        self._stack: Optional[ExitStack] = None

    @property
    def _debug_(self):  # pragma: no cover
        return {
            '__class__': self.__class__,
            '_column_name': self._column_name,
            '_samplen': self._samplen,
            '_be_fs': self._be_fs,
            '_subsamples': self._subsamples,
            '_mode': self._mode,
            '_stack': self._stack._exit_callbacks if self._stack else self._stack,
        }

    def __repr__(self):
        res = f'{self.__class__}('\
              f'column_name={self._column_name}, '\
              f'sample_name={self._samplen})'
        return res

    def _repr_pretty_(self, p, cycle):
        res = f'Hangar {self.__class__.__name__} \
                \n    Column Name          : {self._column_name}\
                \n    Sample Name          : {self._samplen}\
                \n    Writeable            : "{self.iswriteable}"\
                \n    Number of Subsamples : {len(self)}\n'
        p.text(res)

    def _ipython_key_completions_(self):
        """Let ipython know that any key based access can use the column keys

        Since we don't want to inherit from dict, nor mess with `__dir__` for
        the sanity of developers, this is the best way to ensure users can
        autocomplete keys.

        Returns
        -------
        list
            list of strings, each being one of the column keys for access.
        """
        return list(self.keys())

    def __enter__(self):
        self._enter_count += 1
        return self

    def __exit__(self, *exc):
        self._enter_count -= 1

    def _destruct(self):
        if isinstance(self._stack, ExitStack):
            self._stack.close()
        for attr in self._attrs:
            delattr(self, attr)

    def __getattr__(self, name):
        """Raise permission error after checkout is closed.

         Only runs after a call to :meth:`_destruct`, which is responsible for
         deleting all attributes from the object instance.
        """
        try:
            self.__getattribute__('_mode')  # once checkout is closed, this won't exist.
        except AttributeError:
            err = (f'Unable to operate on past checkout objects which have been '
                   f'closed. No operation occurred. Please use a new checkout.')
            raise PermissionError(err) from None
        return self.__getattribute__(name)

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        return {slot: getattr(self, slot) for slot in self.__slots__}

    def __setstate__(self, state: dict) -> None:
        """ensure multiprocess operations can pickle relevant data.

        Technically should be decorated with @reader_checkout_only, but since
        at instance creation that is not an attribute, the decorator won't
        know. Since only readers can be pickled, This isn't much of an issue.
        """
        for slot, value in state.items():
            setattr(self, slot, value)

    def __len__(self) -> int:
        return len(self._subsamples)

    def __contains__(self, key: KeyType) -> bool:
        return key in self._subsamples

    def __iter__(self) -> Iterable[KeyType]:
        yield from self.keys()

    def __getitem__(self, key: SubsampleGetKeysType) -> Union[Any, Dict[KeyType, Any]]:
        """Retrieve data for some subsample key via dict style access conventions.

        .. seealso:: :meth:`get`

        Parameters
        ----------
        key : SubsampleGetKeysType
            Sample key to retrieve from the column. Alternatively, ``slice``
            syntax can be used to retrieve a selection of subsample
            keys/values. An empty slice (``: == slice(None)``) or ``Ellipsis``
            (``...``) will return all subsample keys/values. Passing a
            non-empty slice (``[1:5] == slice(1, 5)``) will select keys to
            retrieve by enumerating all subsamples and retrieving the element
            (key) for each step across the range. Note: order of enumeration is
            not guaranteed; do not rely on any ordering observed when using
            this method.

        Returns
        -------
        Union[Any, Dict[KeyType, Any]]
            Sample data corresponding to the provided key. or dictionary
            of subsample keys/data if Ellipsis or slice passed in as key.

        Raises
        ------
        KeyError
            if no sample with the requested key exists.
        """
        # select subsample(s) with regular keys
        if isinstance(key, (str, int)):
            spec = self._subsamples[key]
            return self._be_fs[spec.backend].read_data(spec)
        # select all subsamples
        elif key is Ellipsis:
            res = {}
            for subsample, spec in self._subsamples.items():
                res[subsample] = self._be_fs[spec.backend].read_data(spec)
            return res
        # slice subsamples by sorted order of keys
        elif isinstance(key, slice):
            res = {}
            subsample_spec_slice = tuple(self._subsamples.items())[key]
            for subsample, spec in subsample_spec_slice:
                spec = self._subsamples[subsample]
                res[subsample] = self._be_fs[spec.backend].read_data(spec)
            return res
        else:
            raise TypeError(f'key {key} type {type(key)} not valid.')

    @property
    def _enter_count(self):
        return self._be_fs['enter_count']

    @_enter_count.setter
    def _enter_count(self, value):
        self._be_fs['enter_count'] = value

    @property
    def _is_conman(self):
        return bool(self._enter_count)

    @property
    def sample(self) -> KeyType:
        """Name of the sample this column subsamples are stured under.
        """
        return self._samplen

    @property
    def column(self) -> str:
        """Name of the column.
        """
        return self._column_name

    @property
    def iswriteable(self) -> bool:
        """Bool indicating if this column object is write-enabled.
        """
        return False if self._mode == 'r' else True

    @property
    def data(self) -> Dict[KeyType, Any]:
        """Return dict mapping every subsample key / data value stored in the sample.

        Returns
        -------
        Dict[KeyType, Any]
            Dictionary mapping subsample name(s) (keys) to their stored values
            as :class:`numpy.ndarray` instances.
        """
        return self[...]

    def _mode_local_aware_key_looper(self, local: bool) -> Iterable[KeyType]:
        """Generate keys for iteration with dict update safety ensured.

        Parameters
        ----------
        local : bool
            True if keys should be returned which only exist on the local
            machine. False if remote sample keys should be excluded.

        Returns
        -------
        Iterable[KeyType]
            Sample keys conforming to the `local` argument spec.
        """
        _islocal_func = op_attrgetter('islocal')
        if local:
            if self._mode == 'r':
                yield from valfilter(_islocal_func, self._subsamples).keys()
            else:
                yield from tuple(valfilter(_islocal_func, self._subsamples).keys())
        else:
            if self._mode == 'r':
                yield from self._subsamples.keys()
            else:
                yield from tuple(self._subsamples.keys())

    @property
    def contains_remote_references(self) -> bool:
        """Bool indicating all subsamples in sample column exist on local disk.

        The data associated with subsamples referencing some remote server will
        need to be downloaded (``fetched`` in the hangar vocabulary) before
        they can be read into memory.

        Returns
        -------
        bool
            False if at least one subsample in the column references data
            stored on some remote server. True if all sample data is available
            on the machine's local disk.
        """
        _islocal_func = op_attrgetter('islocal')
        return not all(map(_islocal_func, self._subsamples.values()))

    @property
    def remote_reference_keys(self) -> Tuple[KeyType]:
        """Compute subsample names whose data is stored in a remote server reference.

        Returns
        -------
        Tuple[KeyType]
            list of subsample keys in the column whose data references indicate
            they are stored on a remote server.
        """
        _islocal_func = op_attrgetter('islocal')
        return tuple(valfilterfalse(_islocal_func, self._subsamples).keys())

    def keys(self, local: bool = False) -> Iterable[KeyType]:
        """Generator yielding the name (key) of every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned keys will only correspond to data which is
            available for reading on the local disk, by default False.

        Yields
        ------
        Iterable[KeyType]
            Keys of one subsample at a time inside the sample.
        """
        yield from self._mode_local_aware_key_looper(local)

    def values(self, local: bool = False) -> Iterable[Any]:
        """Generator yielding the data for every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned values will only correspond to data which is
            available for reading on the local disk. No attempt will be made to
            read data existing on a remote server, by default False.

        Yields
        ------
        Iterable[Any]
            Values of one subsample at a time inside the sample.
        """
        for key in self._mode_local_aware_key_looper(local):
            yield self[key]

    def items(self, local: bool = False) -> Iterable[Tuple[KeyType, Any]]:
        """Generator yielding (name, data) tuple for every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned keys/values will only correspond to data which is
            available for reading on the local disk, No attempt will be made to
            read data existing on a remote server, by default False.

        Yields
        ------
        Iterable[Tuple[KeyType, Any]]
            Name and stored value for every subsample inside the sample.
        """
        for key in self._mode_local_aware_key_looper(local):
            yield (key, self[key])

    def get(self, key: KeyType, default=None):
        """Retrieve the data associated with some subsample key

        Parameters
        ----------
        key : SubsampleGetKeysType
            The name of the subsample(s) to retrieve. Passing a single
            subsample key will return the stored :class:`numpy.ndarray`
        default
            if a `key` parameter is not found, then return this value instead.
            By default, None.

        Returns
        -------
        value
            data stored under subsample key if key exists, else default
            value if not found.
        """
        try:
            return self[key]
        except KeyError:
            return default


# ---------------- writer methods only after this point -------------------


class FlatSubsampleWriter(FlatSubsampleReader):

    __slots__ = ('_schema', '_txnctx', '_path')
    _attrs = __slots__ + FlatSubsampleReader.__slots__

    def __init__(self,
                 schema,
                 repo_path: Path,
                 aset_ctx=None,
                 *args, **kwargs):

        super().__init__(*args, **kwargs)
        self._path = repo_path
        self._schema = schema
        self._txnctx = aset_ctx

    def __enter__(self):
        with ExitStack() as stack:
            self._txnctx.open_write()
            stack.callback(self._txnctx.close_write)
            if self._enter_count == 0:
                for k in self._be_fs.keys():
                    if k in ('enter_count', 'schema_spec'):
                        continue
                    stack.enter_context(self._be_fs[k])
            self._enter_count += 1
            self._stack = stack.pop_all()
        return self

    def __exit__(self, *exc):
        self._stack.close()
        self._enter_count -= 1
        if self._enter_count == 0:
            self._stack = None

    def _set_arg_validate(self, key, value):
        if not is_suitable_user_key(key):
            raise ValueError(f'Sample name `{key}` is not suitable.')
        isCompat = self._schema.verify_data_compatible(value)
        if not isCompat.compatible:
            raise ValueError(isCompat.reason)

    def _perform_set(self, key, value):
        """Internal write method. Assumes all arguments validated and cm open.

        Parameters
        ----------
        key
            subsample key to store
        value
            data to store
        """
        # full_hash = ndarray_hasher_tcode_0(value)
        full_hash = self._schema.data_hash_digest(value)
        hashKey = hash_data_db_key_from_raw_key(full_hash)

        # check if data record already exists with given key
        dataRecKey = nested_data_db_key_from_names(self._column_name, self._samplen, key)
        existingDataRecVal = self._txnctx.dataTxn.get(dataRecKey, default=False)
        if existingDataRecVal:
            # check if data record already with same key & hash value
            existingDataRec = data_record_digest_val_from_db_val(existingDataRecVal)
            if full_hash == existingDataRec.digest:
                return

        # write new data if data hash does not exist
        existingHashVal = self._txnctx.hashTxn.get(hashKey, default=False)
        if existingHashVal is False:
            backendCode = self._schema.backend
            hashVal = self._be_fs[backendCode].write_data(value)
            self._txnctx.hashTxn.put(hashKey, hashVal)
            self._txnctx.stageHashTxn.put(hashKey, hashVal)
            hash_spec = backend_decoder(hashVal)
        else:
            hash_spec = backend_decoder(existingHashVal)
            if hash_spec.backend not in self._be_fs:
                # when adding data which is already stored in the repository, the
                # backing store for the existing data location spec may not be the
                # same as the backend which the data piece would have been saved in here.
                #
                # As only the backends actually referenced by a columns samples are
                # initialized (accessible by the column), there is no guarantee that
                # an accessor exists for such a sample. In order to prevent internal
                # errors from occurring due to an uninitialized backend if a previously
                # existing data piece is "saved" here and subsequently read back from
                # the same writer checkout, we perform an existence check and backend
                # initialization, if appropriate.
                fh = open_file_handles(backends=(hash_spec.backend,),
                                       path=self._path,
                                       mode='a',
                                       schema=self._schema)
                self._be_fs[hash_spec.backend] = fh[hash_spec.backend]

        # add the record to the db
        dataRecVal = data_record_db_val_from_digest(full_hash)
        self._txnctx.dataTxn.put(dataRecKey, dataRecVal)
        self._subsamples[key] = hash_spec

    def __setitem__(self, key, value):
        """Store data as a subsample. Convenience method to :meth:`add`.

        .. seealso::

            :meth:`update` for an implementation analogous to python's built
            in :meth:`dict.update` method which accepts a dict or iterable of
            key/value pairs to add in the same operation.

        Parameters
        ----------
        key
            Key (name) of the subsample to add to the column.
        value
            Data to add as the sample.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)
            self._set_arg_validate(key, value)
            self._perform_set(key, value)

    def append(self, value) -> KeyType:
        """Store some data in a subsample with an automatically generated key.

        This method should only be used if the context some piece of data is
        used in is independent from it's value (ie. when reading data back,
        there is no useful information which needs to be conveyed between the
        data source's name/id and the value of that piece of information.)
        Think carefully before going this route, as this posit does not apply
        to many common use cases.

        .. seealso::

            In order to store the data with a user defined key, use
            :meth:`update` or :meth:`__setitem__`

        Parameters
        ----------
        value
            Piece of data to store in the column.

        Returns
        -------
        KeyType
            Name of the generated key this data is stored with.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)
            key = generate_sample_name()
            while key in self._subsamples:
                key = generate_sample_name()
            self._set_arg_validate(key, value)
            self._perform_set(key, value)
            return key

    def update(self, other=None, **kwargs):
        """Store data with the key/value pairs, overwriting existing keys.

        :meth:`update` implements functionality similar to python's builtin
        :meth:`dict.update` method, accepting either a dictionary or other
        iterable (of length two) listing key / value pairs.

        Parameters
        ----------
        other
            Accepts either another dictionary object or an iterable of
            key/value pairs (as tuples or other iterables of length two).
            mapping sample names to data values, If sample name is string type,
            can only contain alpha-numeric ascii characters (in addition to
            '-', '.', '_'). Int key must be >= 0. By default, None.
        **kwargs
            keyword arguments provided will be saved with keywords as subsample
            keys (string type only) and values as np.array instances.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            if other:
                if not isinstance(other, dict):
                    other = dict(other)
                else:
                    other = other.copy()
            elif other is None:
                other = {}
            if kwargs:
                # we have to merge kwargs dict with `other` before operating on
                # either so all validation and writing occur atomically
                other.update(kwargs)

            for key, val in other.items():
                self._set_arg_validate(key, val)
            for key, val in other.items():
                self._perform_set(key, val)

    def __delitem__(self, key: KeyType):
        """Remove a subsample from the column.`.

        .. seealso::

            :meth:`pop` to simultaneously get a keys value and delete it.

        Parameters
        ----------
        key : KeyType
            Name of the sample to remove from the column.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            if key not in self._subsamples:
                raise KeyError(key)

            dbKey = nested_data_db_key_from_names(self._column_name, self._samplen, key)
            isRecordDeleted = self._txnctx.dataTxn.delete(dbKey)
            if isRecordDeleted is False:
                raise RuntimeError(
                    f'Internal error. Not able to delete key {key} from staging '
                    f'db even though existence passed in memory verification. '
                    f'Please report this message in full to the hangar development team.',
                    f'Specified key: <{type(key)} {key}>', f'Calculated dbKey: <{dbKey}>',
                    f'isRecordDeleted: <{isRecordDeleted}>', f'DEBUG STRING: {self._debug_}')
            del self._subsamples[key]

    def pop(self, key: KeyType):
        """Retrieve some value for some key(s) and delete it in the same operation.

        Parameters
        ----------
        key : KeysType
            Sample key to remove

        Returns
        -------
        value
            Upon success, the value of the removed key.
        """
        value = self[key]
        del self[key]
        return value


class NestedSampleReader:

    __slots__ = ('_mode', '_column_name', '_samples',
                 '_be_fs', '_path', '_stack', '_schema')
    _attrs = __slots__

    def __init__(self,
                 columnname: str,
                 samples: Dict[KeyType, FlatSubsampleReader],
                 backend_handles: Dict[str, Any],
                 repo_path: Path,
                 mode: str,
                 schema=None,
                 *args, **kwargs):

        self._mode = mode
        self._column_name = columnname
        self._samples = samples
        self._be_fs = backend_handles
        self._path = repo_path
        self._stack: Optional[ExitStack] = None
        self._schema = schema

    def __repr__(self):
        res = (
            f'{self.__class__.__qualname__}('
            f'repo_pth={self._path}, '
            f'columnname={self._column_name}, '
            f"{[f'{key}={val}, ' for key, val in self._schema.schema.items()]}, "
            f'mode={self._mode})')
        return res

    def _repr_pretty_(self, p, cycle):
        res = f'Hangar {self.__class__.__qualname__} \
                \n    Column Name              : {self.column}\
                \n    Writeable                : {self.iswriteable}\
                \n    Column Type              : {self.column_type}\
                \n    Column Layout            : {self.column_layout}\
                \n    Schema Type              : {self.schema_type}\
                \n    DType                    : {self.dtype}\
                \n    Shape                    : {self.shape}\
                \n    Number of Samples        : {len(self)}\
                \n    Number of Subsamples     : {self.num_subsamples}\
                \n    Partial Remote Data Refs : {bool(self.contains_remote_references)}\n'
        p.text(res)

    def _ipython_key_completions_(self):
        """Let ipython know that any key based access can use the column keys

        Since we don't want to inherit from dict, nor mess with `__dir__` for
        the sanity of developers, this is the best way to ensure users can
        autocomplete keys.

        Returns
        -------
        list
            list of strings, each being one of the column keys for access.
        """
        return list(self.keys())

    def __enter__(self):
        self._enter_count += 1
        return self

    def __exit__(self, *exc):
        self._enter_count -= 1

    def _destruct(self):
        if isinstance(self._stack, ExitStack):
            self._stack.close()
        self._close()
        for sample in self._samples.values():
            sample._destruct()
        for attr in self._attrs:
            delattr(self, attr)

    def __getattr__(self, name):
        """Raise permission error after checkout is closed.

         Only runs after a call to :meth:`_destruct`, which is responsible
         for deleting all attributes from the object instance.
        """
        try:
            self.__getattribute__('_mode')  # once checkout is closed, this won't exist.
        except AttributeError:
            err = (f'Unable to operate on past checkout objects which have been '
                   f'closed. No operation occurred. Please use a new checkout.')
            raise PermissionError(err) from None
        return self.__getattribute__(name)

    @reader_checkout_only
    def __getstate__(self) -> dict:
        """ensure multiprocess operations can pickle relevant data.
        """
        return {slot: getattr(self, slot) for slot in self.__slots__}

    def __setstate__(self, state: dict) -> None:
        """ensure multiprocess operations can pickle relevant data.

        Technically should be decorated with @reader_checkout_only, but since
        at instance creation the '_mode' is not a set attribute, the decorator
        won't know how to process. Since only readers can be pickled, This
        isn't much of an issue.
        """
        for slot, value in state.items():
            setattr(self, slot, value)

    def __getitem__(
            self, key: SampleGetKeysType
    ) -> Union[FlatSubsampleReader, Union[Any, Dict[KeyType, Any]]]:
        """Get the sample access class for some sample key.

        Parameters
        ----------
        key
            Name of sample to retrieve

        Returns
        -------
        Union[FlatSubsampleReader, Union[Any, Dict[KeyType, Any]]]
            Sample accessor corresponding to the given key

        Raises
        ------
        KeyError
            If no sample with the provided key exists.
        """
        if isinstance(key, (list, tuple)):
            return reduce(op_getitem, key, self._samples)
        else:
            res = self._samples[key]
        return res

    def __iter__(self) -> Iterable[KeyType]:
        """Create iterator yielding an column sample keys.

        Yields
        -------
        Iterable[KeyType]
            Sample key contained in the column.
        """
        yield from self.keys()

    def __len__(self) -> int:
        """Find number of samples in the column
        """
        return len(self._samples)

    def __contains__(self, key: KeyType) -> bool:
        """Determine if some sample key exists in the column.
        """
        return key in self._samples

    def _open(self):
        for val in self._be_fs.values():
            try:
                # since we are storing non backend accessor information in the
                # be_fs weakref proxy for the purpose of memory savings, not
                # all elements have a `open` method
                val.open(mode=self._mode)
            except AttributeError:
                pass

    def _close(self):
        for val in self._be_fs.values():
            # since we are storing non backend accessor information in the
            # be_fs weakref proxy for the purpose of memory savings, not all
            # elements have a `close` method
            try:
                val.close()
            except AttributeError:
                pass

    @property
    def _enter_count(self):
        return self._be_fs['enter_count']

    @_enter_count.setter
    def _enter_count(self, value):
        self._be_fs['enter_count'] = value

    @property
    def _is_conman(self):
        return bool(self._enter_count)

    @property
    def column(self) -> str:
        """Name of the column.
        """
        return self._column_name

    @property
    def column_type(self):
        """Data container type of the column ('ndarray', 'str', etc).
        """
        return self._schema.column_type

    @property
    def column_layout(self):
        """Column layout type ('nested', 'flat', etc).
        """
        return self._schema.column_layout

    @property
    def schema_type(self):
        """Schema type of the contained data ('variable_shape', 'fixed_shape', etc).
        """
        return self._schema.schema_type

    @property
    def dtype(self):
        """Dtype of the columns data (np.float, str, etc).
        """
        return self._schema.dtype

    @property
    def shape(self):
        """(Max) shape of data that can (is) written in the column.
        """
        try:
            return self._schema.shape
        except AttributeError:
            return None

    @property
    def backend(self) -> str:
        """Code indicating which backing store is used when writing data.
        """
        return self._schema.backend

    @property
    def backend_options(self):
        """Filter / Compression options applied to backend when writing data.
        """
        return self._schema.backend_options

    @property
    def iswriteable(self) -> bool:
        """Bool indicating if this column object is write-enabled.
        """
        return False if self._mode == 'r' else True

    def _mode_local_aware_key_looper(self, local: bool) -> Iterable[KeyType]:
        """Generate keys for iteration with dict update safety ensured.

        Parameters
        ----------
        local
            True if keys should be returned which only exist on the local
            machine. False if remote sample keys should be excluded.

        Returns
        -------
        Iterable[KeyType]
            Sample keys conforming to the `local` argument spec.
        """
        _contains_remote_func = op_attrgetter('contains_remote_references')
        if local:
            if self._mode == 'r':
                yield from valfilterfalse(_contains_remote_func, self._samples).keys()
            else:
                yield from tuple(valfilterfalse(_contains_remote_func, self._samples).keys())
        else:
            if self._mode == 'r':
                yield from self._samples.keys()
            else:
                yield from tuple(self._samples.keys())

    @property
    def contains_remote_references(self) -> bool:
        """Bool indicating all subsamples in sample column exist on local disk.

        The data associated with subsamples referencing some remote server will
        need to be downloaded (``fetched`` in the hangar vocabulary) before
        they can be read into memory.

        Returns
        -------
        bool
            False if at least one subsample in the column references data
            stored on some remote server. True if all sample data is available
            on the machine's local disk.
        """
        _contains_remote_func = op_attrgetter('contains_remote_references')
        return any(map(_contains_remote_func, self._samples.values()))

    @property
    def remote_reference_keys(self) -> Tuple[KeyType]:
        """Compute subsample names whose data is stored in a remote server reference.

        Returns
        -------
        Tuple[KeyType]
            list of subsample keys in the column whose data references indicate
            they are stored on a remote server.
        """
        _remote_keys_func = op_attrgetter('remote_reference_keys')
        return tuple(valfilter(_remote_keys_func, self._samples).keys())

    @property
    def contains_subsamples(self) -> bool:
        """Bool indicating if sub-samples are contained in this column container.
        """
        return True

    @property
    def num_subsamples(self) -> int:
        """Calculate total number of subsamples existing in all samples in column
        """
        total = 0
        for sample in self._samples.values():
            total += len(sample)
        return total

    def keys(self, local: bool = False) -> Iterable[KeyType]:
        """Generator yielding the name (key) of every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned keys will only correspond to data which is
            available for reading on the local disk, by default False.

        Yields
        ------
        Iterable[KeyType]
            Keys of one subsample at a time inside the sample.
        """
        yield from self._mode_local_aware_key_looper(local)

    def values(self, local: bool = False) -> Iterable[Any]:
        """Generator yielding the tensor data for every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned values will only correspond to data which is
            available for reading on the local disk. No attempt will be made to
            read data existing on a remote server, by default False.

        Yields
        ------
        Iterable[Any]
            Values of one subsample at a time inside the sample.
        """
        for key in self._mode_local_aware_key_looper(local):
            yield self[key]

    def items(self, local: bool = False) -> Iterable[Tuple[KeyType, Any]]:
        """Generator yielding (name, data) tuple for every subsample.

        Parameters
        ----------
        local : bool, optional
            If True, returned keys/values will only correspond to data which is
            available for reading on the local disk, No attempt will be made to
            read data existing on a remote server, by default False.

        Yields
        ------
        Iterable[Tuple[KeyType, Any]]
            Name and stored value for every subsample inside the sample.
        """
        for key in self._mode_local_aware_key_looper(local):
            yield (key, self[key])

    def get(
            self, key: SampleGetKeysType, default: Any = None
    ) -> Union[FlatSubsampleReader, Union[Any, Dict[KeyType, Any]]]:
        """Retrieve data for some sample key(s) in the column.

        Parameters
        ----------
        key
            The name of the subsample(s) to retrieve
        default
            if a `key` parameter is not found, then return this value instead.
            By default, None.

        Returns
        -------
        Union[FlatSubsampleReader, Union[Any, Dict[KeyType, Any]]]
            Sample accessor class given by name ``key`` which can be used to
            access subsample data.
        """
        try:
            return self[key]
        except KeyError:
            return default


# ---------------- writer methods only after this point -------------------


class NestedSampleWriter(NestedSampleReader):

    __slots__ = ('_txnctx',)
    _attrs = __slots__ + NestedSampleReader.__slots__

    def __init__(self, aset_ctx=None, *args, **kwargs):

        super().__init__(*args, **kwargs)
        self._txnctx = aset_ctx

    def __enter__(self):
        with ExitStack() as stack:
            self._txnctx.open_write()
            stack.callback(self._txnctx.close_write)
            if self._enter_count == 0:
                for k in tuple(self._be_fs.keys()):
                    if k in ('enter_count', 'schema_spec'):
                        continue
                    stack.enter_context(self._be_fs[k])
            self._enter_count += 1
            self._stack = stack.pop_all()
        return self

    def __exit__(self, *exc):
        self._stack.close()
        self._enter_count -= 1

    def _set_arg_validate(self, sample_key, subsample_map):
        if not is_suitable_user_key(sample_key):
            raise ValueError(f'Sample name `{sample_key}` is not suitable.')

        for subsample_key, subsample_val in subsample_map.items():
            if not is_suitable_user_key(subsample_key):
                raise ValueError(f'Sample name `{sample_key}` is not suitable.')
            isCompat = self._schema.verify_data_compatible(subsample_val)
            if not isCompat.compatible:
                raise ValueError(isCompat.reason)

    def _perform_set(self, key, value) -> None:
        if key in self._samples:
            self._samples[key].update(value)
        else:
            self._samples[key] = FlatSubsampleWriter(
                schema=proxy(self._schema),
                aset_ctx=proxy(self._txnctx),
                repo_path=self._path,
                columnname=self._column_name,
                samplen=key,
                be_handles=proxy(self._be_fs),
                specs={},
                mode='a')
            try:
                self._samples[key].update(value)
            except Exception as e:
                del self._samples[key]
                raise e

    def __setitem__(self, key, value) -> None:
        """Store some subsample key / subsample data map, overwriting existing keys.

        .. seealso::

            :meth:`update` for alternative syntax for setting values.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)
            value = dict(value)
            self._set_arg_validate(key, value)
            self._perform_set(key, value)

    def update(self, other=None, **kwargs) -> None:
        """Store some data with the key/value pairs, overwriting existing keys.

        :meth:`update` implements functionality similar to python's builtin
        :meth:`dict.update` method, accepting either a dictionary or other
        iterable (of length two) listing key / value pairs.

        Parameters
        ----------
        other
            Dictionary mapping sample names to subsample data maps. Or Sequence
            (list or tuple) where element one is the sample name and element
            two is a subsample data map.
        **kwargs
            keyword arguments provided will be saved with keywords as sample
            keys (string type only) and values as a mapping of subarray keys
            to data values.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            if isinstance(other, dict):
                other = other.copy()
            elif other:
                other = dict(other)
            else:
                other = {}
            if kwargs:
                # we merge kwargs dict with `other` before operating on either
                # so all necessary validation and writing occur atomically
                other.update(kwargs)
            for sample in tuple(other.keys()):
                other[sample] = dict(other[sample])

            for key, val in other.items():
                self._set_arg_validate(key, val)
            for key, val in other.items():
                self._perform_set(key, val)

    def __delitem__(self, key: KeyType):
        """Remove a sample (including all contained subsamples) from the column.

        .. seealso::

            :meth:`pop` for alternative implementing a simultaneous get value
            and delete operation.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            sample = self._samples[key]
            subsample_keys = list(sample.keys())
            for subkey in subsample_keys:
                del sample[subkey]

            self._samples[key]._destruct()
            del self._samples[key]

    def pop(self, key: KeyType) -> Dict[KeyType, Any]:
        """Retrieve some value for some key(s) and delete it in the same operation.

        Parameters
        ----------
        key : KeysType
            sample key to remove

        Returns
        -------
        Dict[KeyType, KeyArrMap]
            Upon success, a nested dictionary mapping sample names to a dict of
            subsample names and subsample values for every sample key passed
            into this method.
        """
        res = self._samples[key].data
        del self[key]
        return res

    def change_backend(self, backend: str, backend_options: Optional[dict] = None):
        """Change the default backend and filters applied to future data writes.

        .. warning::

           This method is meant for advanced users only. Please refer to the
           hangar backend codebase for information on accepted parameters and
           options.

        Parameters
        ----------
        backend : str
            Backend format code to swtich to.
        backend_options
            Backend option specification to use (if specified). If left to
            default value of None, then default options for backend are
            automatically used.

        Raises
        ------
        RuntimeError
            If this method was called while this column is invoked in a
            context manager
        ValueError
            If the backend format code is not valid.
        """
        if self._is_conman:
            raise RuntimeError('Cannot call method inside column context manager.')

        self._schema.change_backend(backend, backend_options=backend_options)

        new_schema_digest = self._schema.schema_hash_digest()
        columnSchemaKey = schema_db_key_from_column(self._column_name, layout=self.column_layout)
        columnSchemaVal = schema_record_db_val_from_digest(new_schema_digest)
        hashSchemaKey = schema_hash_db_key_from_digest(new_schema_digest)
        hashSchemaVal = schema_hash_record_db_val_from_spec(self._schema.schema)

        # -------- set vals in lmdb only after schema is sure to exist --------

        with self._txnctx.write() as ctx:
            ctx.dataTxn.put(columnSchemaKey, columnSchemaVal)
            ctx.hashTxn.put(hashSchemaKey, hashSchemaVal, overwrite=False)

        new_backend = self._schema.backend
        if new_backend not in self._be_fs:
            fhands = open_file_handles(
                backends=[new_backend],
                path=self._path,
                mode='a',
                schema=self._schema)
            self._be_fs[new_backend] = fhands[new_backend]
        else:
            self._be_fs[new_backend].close()
        self._be_fs[new_backend].open(mode='a')
        self._be_fs[new_backend].backend_opts = self._schema.backend_options
        return


================================================
FILE: src/hangar/constants.py
================================================
from .utils import is_64bits, parse_bytes

# parsing constants

SEP_KEY = ':'
SEP_LST = ' '
SEP_CMT = ' << '
SEP_SLC = "*"
SEP_HSH = '$'

CMT_KV_JOIN_KEY = SEP_LST.encode()
CMT_DIGEST_JOIN_KEY = ''
CMT_REC_JOIN_KEY = SEP_HSH.encode()

K_INT = f'#'  # must be length 1 value
K_BRANCH = f'branch{SEP_KEY}'
K_HEAD = 'head'
K_REMOTES = f'remote{SEP_KEY}'
K_STGARR = f'a{SEP_KEY}'
K_STGMETA = f'l{SEP_KEY}'
K_SCHEMA = f's{SEP_KEY}'
K_HASH = f'h{SEP_KEY}'
K_WLOCK = f'writerlock{SEP_KEY}'
K_VERSION = 'software_version'

WLOCK_SENTINAL = 'LOCK_AVAILABLE'

# directory names

DIR_HANGAR = '.hangar'
DIR_HANGAR_SERVER = '.hangar_server'
DIR_DATA = 'data'
DIR_DATA_STORE = 'store_data'
DIR_DATA_STAGE = 'stage_data'
DIR_DATA_REMOTE = 'remote_data'

# configuration file names:

CONFIG_USER_NAME = 'config_user.ini'
CONFIG_SERVER_NAME = 'config_server.ini'

# LMDB database names and settings.


LMDB_SETTINGS = {
    # lmdb cannot open map larger than 2GB on 32 bit machines.
    'map_size': 50_000_000_000,
    'meminit': False,
    'subdir': False,
    'lock': False,
    'max_spare_txns': 10,
}

LMDB_REF_NAME = 'ref.lmdb'
LMDB_HASH_NAME = 'hash.lmdb'
LMDB_BRANCH_NAME = 'branch.lmdb'
LMDB_STAGE_REF_NAME = 'stage_ref.lmdb'
LMDB_STAGE_HASH_NAME = 'stage_hash.lmdb'

# readme file

README_FILE_NAME = 'README.txt'




================================================
FILE: src/hangar/context.py
================================================
import configparser
import os
from pathlib import Path
import platform
import shutil
import tempfile
import warnings
from typing import MutableMapping, Optional

import lmdb

from . import __version__
from .constants import (
    CONFIG_USER_NAME,
    DIR_DATA_REMOTE,
    DIR_DATA_STAGE,
    DIR_DATA_STORE,
    DIR_DATA,
    LMDB_BRANCH_NAME,
    LMDB_HASH_NAME,
    LMDB_REF_NAME,
    LMDB_SETTINGS,
    LMDB_STAGE_HASH_NAME,
    LMDB_STAGE_REF_NAME,
    README_FILE_NAME,
)
from .records.commiting import unpack_commit_ref
from .records.heads import (
    create_branch,
    get_branch_head_commit,
    get_staging_branch_head,
    set_staging_branch_head,
)
from .records.parsing import repo_version_raw_spec_from_raw_string
from .records.vcompat import (
    is_repo_software_version_compatible,
    set_repository_software_version,
    startup_check_repo_version,
)
from .utils import readme_contents, is_64bits


class Environments(object):

    def __init__(self, pth: Path):

        self.repo_path: Path = pth
        self.refenv: Optional[lmdb.Environment] = None
        self.hashenv: Optional[lmdb.Environment] = None
        self.stageenv: Optional[lmdb.Environment] = None
        self.branchenv: Optional[lmdb.Environment] = None
        self.stagehashenv: Optional[lmdb.Environment] = None
        self.cmtenv: MutableMapping[str, lmdb.Environment] = {}
        self._startup()

    @property
    def repo_is_initialized(self) -> bool:
        """Property to check if the repository is initialized, read-only attribute

        Returns
        -------
        bool
            True if repo environments are initialized, False otherwise
        """
        ret = True if isinstance(self.refenv, lmdb.Environment) else False
        return ret

    def _startup(self) -> bool:
        """When first access to the Repo starts, attempt to open the db envs.

        This function is designed to fail if a repository does not exist at the
        :py:attribute:`repo_path` which is specified, so the user can
        explicitly choose to initialize the repo. Once opened, the lmdb
        environments should not be closed until the program terminates.

        Returns
        -------
        bool False if no repository exists at the given path, otherwise True

        Warns
        -----
        UserWarning Should the repository not exist at the provided repo path.

        Raises
        ------
        RuntimeError If the repository version is not compatible with the
        current software.
        """

        if not self.repo_path.joinpath(LMDB_BRANCH_NAME).is_file():
            msg = f'No repository exists at {self.repo_path}, please use `repo.init()` method'
            warnings.warn(msg, UserWarning)
            return False

        if not is_64bits():
            raise OSError(f'Hangar cannot run on 32 bit machines')

        repo_ver = startup_check_repo_version(self.repo_path)
        curr_ver = repo_version_raw_spec_from_raw_string(v_str=__version__)
        if not is_repo_software_version_compatible(repo_ver, curr_ver):
            msg = f'repository written version: {repo_ver} is not comatible '\
                  f'with the current Hangar software version: {curr_ver}'
            raise RuntimeError(msg)

        self._open_environments()
        return True

    def init_repo(self,
                  user_name: str,
                  user_email: str,
                  remove_old: bool = False) -> Path:
        """Create a new hangar repositiory at the specified environment path.

        Parameters
        ----------
        user_name : str
            Name of the repository user.
        user_email : str
            Email address of the respository user.
        remove_old : bool, optional(default value = False)
            DEVELOPER USE ONLY --- Remove all data and records stored in the
            repository if this opetion is enabled, defaults to False.

        Returns
        -------
        Path
            The path to the newly created repository on disk.

        Raises
        ------
        OSError
            If a hangar repository exists at the specified path, and `remove_old`
            was not set to ``True``.
        """
        if self.repo_path.joinpath(LMDB_BRANCH_NAME).is_file():
            if remove_old is True:
                shutil.rmtree(str(self.repo_path))
            else:
                raise OSError(f'Hangar Directory: {self.repo_path} already exists')

        self.repo_path.mkdir()
        self.repo_path.joinpath(DIR_DATA_STORE).mkdir()
        self.repo_path.joinpath(DIR_DATA_STAGE).mkdir()
        self.repo_path.joinpath(DIR_DATA_REMOTE).mkdir()
        self.repo_path.joinpath(DIR_DATA).mkdir()
        print(f'Hangar Repo initialized at: {self.repo_path}')

        userConf = {'USER': {'name': user_name, 'email': user_email}}
        CFG = configparser.ConfigParser()
        CFG.read_dict(userConf)
        with self.repo_path.joinpath(CONFIG_USER_NAME).open('w') as f:
            CFG.write(f)

        readmeTxt = readme_contents(user_name, user_email)
        with self.repo_path.joinpath(README_FILE_NAME).open('w') as f:
            f.write(readmeTxt.getvalue())

        self._open_environments()
        set_repository_software_version(branchenv=self.branchenv, ver_str=__version__)
        create_branch(self.branchenv, 'master', '')
        set_staging_branch_head(self.branchenv, 'master')
        return self.repo_path

    def checkout_commit(self, branch_name: str = '', commit: str = '') -> str:
        """Set up db environment with unpacked commit ref records.

        Parameters
        ----------
        branch_name : str, optional
            name of the branch to read, defaults to ''
        commit : str, optional
            name of the commit to read, defaults to ''

        Returns
        -------
        str
            commit hash which was checked out
        """
        if commit != '':
            commit_hash = commit
            txt = f' * Checking out COMMIT: {commit_hash}'
        elif branch_name != '':
            commit_hash = get_branch_head_commit(self.branchenv, branch_name)
            txt = f' * Checking out BRANCH: {branch_name} with current HEAD: {commit_hash}'
        else:
            head_branch = get_staging_branch_head(self.branchenv)
            commit_hash = get_branch_head_commit(self.branchenv, head_branch)
            txt = f'\n Neither BRANCH or COMMIT specified.'\
                  f'\n * Checking out writing HEAD BRANCH: {head_branch}'
        print(txt)

        # On UNIX-like system, an open process still retains ability to
        # interact with disk space allocated to a file when it is removed from
        # disk. Windows does not, and will not allow file to be removed if a
        # process is interacting with it. While the CM form is cleaner, this
        # hack allows similar usage on Windows platforms.

        if platform.system() != 'Windows':
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, f'{commit_hash}.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **LMDB_SETTINGS)
                unpack_commit_ref(self.refenv, tmpDB, commit_hash)
                self.cmtenv[commit_hash] = tmpDB
        else:
            tempD = tempfile.mkdtemp()
            tmpDF = os.path.join(tempD, f'{commit_hash}.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **LMDB_SETTINGS)
            unpack_commit_ref(self.refenv, tmpDB, commit_hash)
            self.cmtenv[commit_hash] = tmpDB

        return commit_hash

    def _open_environments(self):
        """Open the standard lmdb databases at the repo path.

        If any commits are checked out (in an unpacked state), read those in as
        well.
        """
        ref_pth = str(self.repo_path.joinpath(LMDB_REF_NAME))
        hash_pth = str(self.repo_path.joinpath(LMDB_HASH_NAME))
        stage_pth = str(self.repo_path.joinpath(LMDB_STAGE_REF_NAME))
        branch_pth = str(self.repo_path.joinpath(LMDB_BRANCH_NAME))
        stagehash_pth = str(self.repo_path.joinpath(LMDB_STAGE_HASH_NAME))

        self.refenv = lmdb.open(path=ref_pth, **LMDB_SETTINGS)
        self.hashenv = lmdb.open(path=hash_pth, **LMDB_SETTINGS)
        self.stageenv = lmdb.open(path=stage_pth, **LMDB_SETTINGS)
        self.branchenv = lmdb.open(path=branch_pth, **LMDB_SETTINGS)
        self.stagehashenv = lmdb.open(path=stagehash_pth, **LMDB_SETTINGS)

    def _close_environments(self):

        self.refenv.close()
        self.hashenv.close()
        self.stageenv.close()
        self.branchenv.close()
        self.stagehashenv.close()
        for env in self.cmtenv.values():
            if platform.system() == 'Windows':
                envpth = env.path()
                env.close()
                os.remove(envpth)
            else:
                env.close()


================================================
FILE: src/hangar/dataset/__init__.py
================================================
__all__ = ('make_numpy_dataset', 'make_torch_dataset', 'make_tensorflow_dataset')

from typing import Sequence, Callable, TYPE_CHECKING, Union, List, Tuple

if TYPE_CHECKING:
    from ..columns import ModifierTypes as Columns
    from .torch_dset import TorchDataset
    from .numpy_dset import NumpyDataset
    from .tensorflow_dset import tf_Dataset
    KeyType = Union[str, int, List, Tuple]


def make_numpy_dataset(
        columns: Sequence['Columns'],
        keys: 'KeyType' = None,
        batch_size: int = None,
        drop_last: bool = False,
        shuffle: bool = True,
        collate_fn: Callable = None) -> 'NumpyDataset':
    """Group column into a single numpy dataset, provides iterative looping over data.

    This API also provides the options to batch the data which is a major difference
    between other dataset APIs. In traditional Machine learning applications, it's quite
    natural to load the whole dataset as a single batch because it's possible to fit into
    the system memory. Passing the size of the dataset as the batch size would make it
    possible here to do just that. This API also acts as an entry point for other
    non-supported frameworks to load data from hangar as batches into the training loop.

    Parameters
    ----------
    columns
        A column object, a tuple of column object or a list of column
        objects.
    keys
        An sequence collection of sample names. If given only those samples will
        fetched from the column
    batch_size
        Size of the batch. This will batch the dataset on the zeroth dimension. For
        example, if the data is of the shape (H x W x C) the batched data will be shaped
        as (B x H x W x C) where B is the batch size
    drop_last
        Should the last uncompleted batch be dropped
    shuffle
        Should the data be shuffled on each epoch
    collate_fn
        A function to collate samples together in a batch. In case this option is absent,
        the heuristics to collate the batch is
            1. If the column is an ndarray flat column, then `np.stack` will be used
            2. If the column is with any other properties, `list.append` will be used
        Note that the batch of data that comes to callate_fn will have each elements consist
        of datapoints from all the columns. For example, if the columns from where the data
        being fetched are col1 and col2 then the batch would look like

        ```python
        [
            (data0_col1, data0_col2),
            (data1_col1, data1_col2),
            (data2_col1, data2_col2),
            ...
        ]
        ```

    Examples
    --------
    >>> from hangar import Repository
    >>> from hangar.dataset import make_numpy_dataset
    >>> repo = Repository('.')
    >>> co = repo.checkout()
    >>> imgcol = co.columns['images']
    >>> classcol = co.columns['classes']
    >>> dataset = make_numpy_dataset((imgcol, classcol), batch_size=64)
    >>> for batch in dataset:
    ...     out = train_model(batch[0])
    ...     loss = loss_fn(out, batch[1])

    Returns
    -------
    :class: `~.numpy_dset.NumpyDataset`
    """
    from .numpy_dset import _make_numpy_dataset
    return _make_numpy_dataset(
        columns=columns,
        keys=keys,
        batch_size=batch_size,
        drop_last=drop_last,
        shuffle=shuffle,
        collate_fn=collate_fn)


def make_torch_dataset(
        columns: Sequence['Columns'],
        keys: 'KeyType' = None,
        as_dict: bool = False) -> 'TorchDataset':
    """Returns a :class:`torch.utils.data.Dataset` object which can be loaded into
    a :class:`torch.utils.data.DataLoader`.

    .. note::

        PyTorch's :class:`torch.utils.data.DataLoader` can effectively do custom
        operations such as shuffling, batching, multiprocessed read etc and hence we
        limit the surface area of the dataset API here just to open the channel for
        reading. Use DataLoaders for such operations

    .. warning::

       On Windows systems, setting the parameter ``num_workers`` in the
       resulting :class:`torch.utils.data.DataLoader` method will result in a
       RuntimeError or deadlock. This is due to limitations of multiprocess
       start methods on Windows itself. Using the default argument value
       (``num_workers=0``) will let the DataLoader work in single process mode
       as expected.

    Parameters
    ----------
    columns
        A column object, a tuple of column object or a list of column
        objects.
    keys
        An sequence collection of sample names. If given only those samples will
        fetched from the column
    as_dict
        Return the data as an OrderedDict with column names as keys. If False,
        it returns a tuple of arrays

    Examples
    --------
    >>> from hangar import Repository
    >>> from torch.utils.data import DataLoader
    >>> from hangar.dataset import make_torch_dataset
    >>> from collections import namedtuple
    >>> repo = Repository('.')
    >>> co = repo.checkout()
    >>> imgcol = co.columns['images']
    >>> classcol = co.columns['classes']
    >>> dataset = make_torch_dataset((imgcol, classcol), as_dict=True)
    >>> loader = DataLoader(dataset, batch_size=16)
    >>> for batch in loader:
    ...     out = train_model(batch['images'])
    ...     loss = loss_fn(out, batch['classes'])

    Returns
    -------
    :class:`torch.utils.data.Dataset`
    """
    from .torch_dset import _make_torch_dataset
    return _make_torch_dataset(columns=columns, keys=keys, as_dict=as_dict)


def make_tensorflow_dataset(
        columns: Sequence['Columns'],
        keys: 'KeyType' = None,
        shuffle: bool = False) -> 'tf_Dataset':
    """Make a tensorflow dataset from a hangar column.

    This method uses `from_generator` function from `tensorflow.data.Dataset` with a
    generator function that wraps all the hangar columns. This function also accepts an
    optional ``shuffle`` argument that does a global shuffle across all the samples.
    This is convenient since Tensorflow Dataset does shuffling by loading the subset
    of data which can fit into the memory and shuffle that subset.

    .. warning::

        This function relies on `tf.data.Dataset.from_generator` and which calls into the
        python interpreter for running the generator funciton. This generator function
        will not be serialized in a GraphDef and hence has limited portability. The
        operation must run in the same address space as the Python program that calls
        'make_tensorflow_dataset'. Also, since it calls back into the python interpreter,
        we'll have the GIL problem and is not parellel-izable even with a `Dataset.map`
        call. In fact, any attempts to parellelize the read will result in worse
        performance

    Parameters
    ----------
    columns
        A column object, a tuple of column object or a list of column objects`
    keys
        An sequence of sample names. If given only those samples will fetched from
        the column
    shuffle
        The generator uses this to decide a global shuffle across all the samples is
        required or not. But user doesn't have any restriction on doing`column.shuffle()`
        on the returned column

    Examples
    --------
    >>> from hangar import Repository
    >>> from hangar.dataset import make_tensorflow_dataset
    >>> import tensorflow as tf
    >>> tf.compat.v1.enable_eager_execution()
    >>> repo = Repository('')
    >>> co = repo.checkout()
    >>> data = co.columns['mnist_data']
    >>> target = co.columns['mnist_target']
    >>> tf_dset = make_tensorflow_dataset([data, target])
    >>> tf_dset = tf_dset.batch(512)
    >>> for bdata, btarget in tf_dset:
    ...     print(bdata.shape, btarget.shape)

    Returns
    -------
    :class:`tf_Dataset`
    """
    from .tensorflow_dset import _make_tensorflow_dataset
    return _make_tensorflow_dataset(columns=columns, keys=keys, shuffle=shuffle)


================================================
FILE: src/hangar/dataset/common.py
================================================
import typing
from typing import Union, Sequence, Tuple, List, Dict
from collections import OrderedDict

from ..columns import is_column, is_writer_column
from ..optimized_utils import is_ordered_sequence

if typing.TYPE_CHECKING:
    from hangar.columns.column import ModifierTypes as Columns
    KeyType = Union[str, int, List, Tuple]


class HangarDataset:
    """Dataset class that does the initial checks to verify whether the provided
    columns can be arranged together as a dataset. These verifications are done on the
    keys of each column. If ``keys`` argument is ``None``, initializer of this class
    makes the key list by checking the local keys across all columns.
    If ``keys`` argument is provided, then it assumes the provided keys are valid and
    restrain from doing any more check on it.
    It provides the ``__getitem__`` accessor for downstream process to consume the
    grouped data


    Parameters
    ----------
    columns
        A single column object of a sequence the column objects
    keys
        An sequence collection of sample names. If given only those samples will
        fetched from the column
    """

    def __init__(self,
                 columns: Union['Columns', Sequence['Columns']],
                 keys: 'KeyType' = None):

        self._columns: Dict[str, 'Columns'] = OrderedDict()
        self._is_conman_counter = 0

        if is_ordered_sequence(columns):
            if len(columns) == 0:
                raise TypeError(f'Atleast one element must exist in input sequence.')
        else:
            columns = (columns,)

        for obj in columns:
            if not is_column(obj):
                raise TypeError(
                    f'All elements of input sequence must be hangar column objects.')
            elif is_writer_column(obj):
                raise PermissionError(
                    f'Columns cannot be used while accessed via a `write-enabled` '
                    f'checkout. Please close the checkout and reopen the column in '
                    f'via a new checkout opened in `read-only` mode.')
            column_name = obj.column
            self._columns[column_name] = obj

        if keys:
            self._keys = keys
        else:
            if len(set((col.column_layout for col in self._columns.values()))) != 1:  # all same type
                raise ValueError(f"keys must be passed when all columns are not same type")

            keys = []
            standard_keys = set()
            for idx, col in enumerate(self._columns.values()):
                # only match top level keys, even for nested columns
                if idx == 0:
                    standard_keys = set(col.keys(local=True))
                    if len(standard_keys) == 0:
                        raise RuntimeError("No local data found")
                else:
                    key_set = set(col.keys(local=True))
                    if len(standard_keys.symmetric_difference(key_set)) != 0:
                        raise KeyError("Keys from multiple columns couldn't be matched. "
                                       "Pass keys explicitly while creating dataset")
                if col.column_layout == 'flat':
                    column_keys = (sample for sample in col.keys(local=True))
                elif col.column_layout == 'nested':
                    column_keys = ((sample, ...) for sample in col.keys(local=True))
                else:
                    raise RuntimeError(f'unknown column layout: {col}')

                keys.append(column_keys)
            if len(keys) == 1:
                self._keys = tuple(keys[0])
            else:
                self._keys = tuple(zip(*keys))

    @property
    def columns(self):
        return self._columns

    def __len__(self):
        return len(self._keys)

    def index_get(self, index: int):
        """It takes one sample index and returns a the items from each column for
        the given sample name for the given index.
        """
        keys = self._keys[index]
        if len(self._columns) == 1:
            for col in self.columns.values():
                return col[keys]
        else:
            if len(self.columns) != len(keys):
                raise RuntimeError(
                    f'Internal error setting up columns/keys. '
                    f'columns: {self.columns} keys: {keys}'
                )
            res = (column[key] for column, key in zip(self.columns.values(), keys))
            return tuple(res)


================================================
FILE: src/hangar/dataset/numpy_dset.py
================================================
from typing import Sequence, Callable, TYPE_CHECKING, Union, List, Tuple
import random

import numpy as np

from .common import HangarDataset

if TYPE_CHECKING:
    from ..columns import ModifierTypes
    Columns = ModifierTypes
    KeyType = Union[str, int, List, Tuple]


def default_collate_fn(batch):
    elem = batch[0]
    if isinstance(elem, np.ndarray):
        # TODO: stack to numpy array (out=) for performance
        return np.stack(batch)
    elif isinstance(elem, str):
        return batch
    elif isinstance(elem, dict):  # nested
        return batch
    elif isinstance(elem, tuple):  # multiple columns
        out = (default_collate_fn(dt) for dt in zip(*batch))
        return tuple(out)


class NumpyDataset:
    """NumpyDataset class provides interfaces for users to iterate over the batches of
    data from different columns. The only user facing APIs it exposes are ``__len__`` and
    ``__iter__``. Batch and shuffle operations are handled by `:func:`make_numpy_dataset`
    based on the arguments it gets and hence user should not interact with this class for
    such operations. Note that, user would never instantiate this class directly. Instead
    `:func:`make_numpy_dataset` act as the entry point and return an object of this class
    to the user

    Parameters
    ----------
    dataset
        Hangar's Dataset object that groups columns for downstream processing
    batch_size
        Size of the individual batch. If specified batches of this size will be returned
        on each iteration
    drop_last
        Should drop the last incomplete batch
    shuffle
        Should shuffle the batch on each epoch
    collate_fn
        A function to collate samples together in a batch. In case this option is absent,
        the heuristics to collate the batch is
            1. If the column is an ndarray flat column, then `np.stack` will be used
            2. If the column is with any other properties, `list.append` will be used
        Note that the batch of data that comes to callate_fn will have each elements consist
        of datapoints from all the columns. For example, if the columns from where the data
        being fetched are col1 and col2 then the batch would look like

        ```python
        [
            (data0_col1, data0_col2),
            (data1_col1, data1_col2),
            (data2_col1, data2_col2),
            ...
        ]
        ```
    """
    def __init__(self, dataset: HangarDataset, batch_size: int, drop_last: bool,
                 shuffle: bool, collate_fn: Callable = None):
        self._dataset = dataset
        self._num_batches = None
        self._batch_size = None
        if batch_size:
            self.collate_fn = collate_fn if collate_fn else default_collate_fn
            self._batch(batch_size, drop_last)
        else:
            if collate_fn:
                raise RuntimeError("Found `collate_fn` in the argument which is a no-op "
                                   "since batching is not enabled")
            if drop_last:
                raise RuntimeError("Setting `drop_last` is a no-op when batching is not enabled")
        self._shuffle = shuffle
        self._indices = list(range(len(self._dataset)))

    @property
    def dataset(self):
        return self._dataset

    @property
    def num_batches(self):
        return self._num_batches

    @property
    def batch_size(self):
        return self._batch_size

    @batch_size.setter
    def batch_size(self, value: int):
        if not isinstance(value, int):
            raise TypeError(f'Expected integer type, recieved {type(value)}')
        elif value < 1:
            raise ValueError(f'batch_size value must be >= 1, recieved {value}')
        self._batch_size = value

    @property
    def shuffle(self):
        return self._shuffle

    @shuffle.setter
    def shuffle(self, value: bool):
        if not isinstance(value, bool):
            raise TypeError(f'Expected bool type, recieved {type(value)}')
        self._shuffle = value

    def __len__(self):
        return len(self._dataset)

    def _batch(self, batch_size, drop_last=True) -> None:
        """Private function to this class to calculate the batch parameters. These
        calculated parameters will be considered by the ``__iter__`` method while
        fetching the batches for downstream process. This function will be called at
        the time of object instantiation and should not be triggered independently

        Parameters
        ----------
        batch_size : int
            Size of the individual batch. If specified batches of this size will be returned
            on each iteration
        drop_last : bool
            Should drop the last incomplete batch
        """
        num_batches, has_last = divmod(len(self._dataset), batch_size)
        if num_batches == 0:
            raise RuntimeError("Batch size exceeded the number of samples")
        if has_last and not drop_last:
            num_batches += 1
        self._num_batches = num_batches
        self._batch_size = batch_size

    def __iter__(self):
        if self._shuffle:
            random.shuffle(self._indices)
        if self._num_batches is None:
            for i in self._indices:
                yield self._dataset.index_get(i)
        else:
            start = 0
            end = self._batch_size
            for i in range(self._num_batches):
                batch = self._indices[start:end]
                out = [self._dataset.index_get(i) for i in batch]
                start = end
                end = end + self._batch_size
                yield self.collate_fn(out)


def _make_numpy_dataset(columns: Sequence['Columns'],
                        keys: 'KeyType' = None,
                        batch_size: int = None,
                        drop_last: bool = False,
                        shuffle: bool = True,
                        collate_fn: Callable = None) -> NumpyDataset:
    """Group column into a single numpy dataset, provides iterative looping over data.

    This API also provides the options to batch the data which is a major difference
    between other dataset APIs. In traditional Machine learning applications, it's quite
    natural to load the whole dataset as a single batch because it's possible to fit into
    the system memory. Passing the size of the dataset as the batch size would make it
    possible here to do just that. This API also acts as an entry point for other
    non-supported frameworks to load data from hangar as batches into the training loop.

    .. note::

        Column with layouts ``str`` or ``ndarray nested`` are not compatible with the
        dataset APIs in the current release. So making dataset is only possible for
        columns with layout ``ndarray flat``

    .. note::

        This is an experimental method in the current Hangar version. Please be aware
        that Significant changes may be introduced in future releases without advance
        notice or deprication warnings.

    Parameters
    ----------
    columns : :class:`~hangar.columns.column.Columns` or Sequence
        A column object, a tuple of column object or a list of column
        objects.
    keys : Union[str, int, List, Tuple]
        An sequence collection of sample names. If given only those samples will
        fetched from the column
    batch_size : int
        Size of the batch. This will batch the dataset on the zeroth dimension. For
        example, if the data is of the shape (H x W x C) the batched data will be shaped
        as (B x H x W x C) where B is the batch size
    drop_last : bool
        Should the last uncompleted batch be dropped
    shuffle : bool
        Should the data be shuffled on each epoch
    collate_fn : Callable
        A function to collate samples together in a batch. In case this option is absent,
        the heuristics to collate the batch is
            1. If the column is an ndarray flat column, then `np.stack` will be used
            2. If the column is with any other properties, `list.append` will be used
        Note that the batch of data that comes to callate_fn will have each elements consist
        of datapoints from all the columns. For example, if the columns from where the data
        being fetched are col1 and col2 then the batch would look like

        ```python
        [
            (data0_col1, data0_col2),
            (data1_col1, data1_col2),
            (data2_col1, data2_col2),
            ...
        ]
        ```

    Returns
    -------
    :class: `.NumpyDataset`

    DEVELOPER NOTE
    --------------
    - Any update to this function signature or docstring must be reflected in the
      equivalent loader function in hangar/dataset/__init__.py. This function is
      "copied" to a top level __init__.py to allow unified API and lazyloader access
    """
    dataset = HangarDataset(columns, keys)
    dataset = NumpyDataset(dataset, batch_size, drop_last, shuffle, collate_fn)
    return dataset


================================================
FILE: src/hangar/dataset/tensorflow_dset.py
================================================
from typing import Sequence, Callable, List, Tuple, Union
import typing
from functools import partial
import random

try:
    import tensorflow as tf
except (ImportError, ModuleNotFoundError):
    raise ImportError(
        'Could not import "tensorflow" library. Ensure library is '
        'installed correctly to use tensorflow dataloader functions') from None

from .common import HangarDataset

if typing.TYPE_CHECKING:
    tf_TensorType = tf.python.framework.dtypes.DType
    tf_TensorShape = tf.TensorShape
    tf_Dataset = tf.data.Dataset
    KeyType = Union[str, int, List, Tuple]
    from ..columns.column import ModifierTypes as Columns
    import numpy as np


def yield_data(dataset: HangarDataset, indices: list,
               shuffle: bool) -> Tuple['np.ndarray']:
    if shuffle:
        random.shuffle(indices)
    for i in indices:
        out = dataset.index_get(i)
        yield out if isinstance(out, tuple) else (out,)


def _make_tensorflow_dataset(columns: Sequence['Columns'],
                             keys: 'KeyType' = None,
                             shuffle: bool = False) -> 'tf_Dataset':
    """Make a tensorflow dataset from a hangar column.

    This method uses `from_generator` function from `tensorflow.data.Dataset` with a
    generator function that wraps all the hangar columns. This function also accepts an
    optional ``shuffle`` argument that does a global shuffle across all the samples.
    This is convenient since Tensorflow Dataset does shuffling by loading the subset
    of data which can fit into the memory and shuffle that subset.

    .. Note::

        Column with layouts ``str`` or ``ndarray nested`` are not compatible with the
        dataset APIs in the current release. So making dataset is only possible for
        columns with layout ``ndarray flat``

    .. warning::

        This function relies on `tf.data.Dataset.from_generator` and which calls into the
        python interpreter for running the generator funciton. This generator function
        will not be serialized in a GraphDef and hence has limited portability. The
        operation must run in the same address space as the Python program that calls
        'make_tensorflow_dataset'. Also, since it calls back into the python interpreter,
        we'll have the GIL problem and is not parellel-izable even with a `Dataset.map`
        call. In fact, any attempts to parellelize the read will result in worse
        performance

    .. note::

        This is an experimental method in the current Hangar version. Please be aware
        that Significant changes may be introduced in future releases without advance
        notice or deprication warnings.

    Parameters
    ----------
    columns
        A column object, a tuple of column object or a list of column objects`
    keys
        An sequence of sample names. If given only those samples will fetched from
        the column
    shuffle
        The generator uses this to decide a global shuffle across all the samples is
        required or not. But user doesn't have any restriction on doing`column.shuffle()`
        on the returned column


    Examples
    --------
    >>> from hangar import Repository
    >>> from hangar.dataset import make_tensorflow_dataset
    >>> import tensorflow as tf
    >>> tf.compat.v1.enable_eager_execution()
    >>> repo = Repository('')
    >>> co = repo.checkout()
    >>> data = co.columns['mnist_data']
    >>> target = co.columns['mnist_target']
    >>> tf_dset = make_tensorflow_dataset([data, target])
    >>> tf_dset = tf_dset.batch(512)
    >>> for bdata, btarget in tf_dset:
    ...     print(bdata.shape, btarget.shape)

    Returns
    -------
    :class:`tf_Dataset`

    DEVELOPER NOTE
    --------------
    - Any update to this function signature or docstring must be reflected in the
      equivalent loader function in hangar/dataset/__init__.py. This function is
      "coppied" to a top level __init__.py to allow unified API and lazyloader access
    """

    dataset = HangarDataset(columns, keys)
    indices = list(range(len(dataset)))
    generator: Callable = partial(yield_data, dataset, indices, shuffle)
    shapes: List[tf_TensorShape] = []
    types: List[tf_TensorType] = []

    for col in dataset.columns.values():
        if col.schema_type == 'variable_shape':
            shape = (None,) * len(col.shape)
        else:
            shape = col.shape
        shapes.append(tf.TensorShape(shape))
        types.append(tf.as_dtype(col.dtype))

    return tf.data.Dataset.from_generator(generator=generator,
                                          output_types=tuple(types),
                                          output_shapes=tuple(shapes))


================================================
FILE: src/hangar/dataset/torch_dset.py
================================================
from typing import Sequence, TYPE_CHECKING, Union, List, Tuple
from collections import OrderedDict

try:
    import torch
except (ImportError, ModuleNotFoundError):
    raise ImportError(
        'Could not import "pytorch" library. Ensure library is '
        'installed correctly to use pytorch dataloader functions')

from .common import HangarDataset

if TYPE_CHECKING:
    from ..columns.column import ModifierTypes as Columns
    KeyType = Union[str, int, List, Tuple]


class TorchDataset(torch.utils.data.Dataset):
    """TorchDataset inherits :class:`torch.utils.data.Dataset` and accepts few convenient
    arguments to wrap hangar columns to be used in :class:`torch.utils.data.DataLoaders`.
    It accepts a hangar Dataset object which exposes all the user requested columns and
    an array of keys to sample from. For more details, checkout
    `PyTorch Dataset `_
    """

    def __init__(self, hangar_dataset: HangarDataset, as_dict: bool = False):
        self.dataset = hangar_dataset
        self.column_names = list(hangar_dataset.columns.keys())
        self._as_dict = as_dict

    def __len__(self) -> int:
        return len(self.dataset)

    def __getitem__(self, index: int):
        data = self.dataset.index_get(index)
        if not self._as_dict:
            return data
        if len(self.column_names) == 1:
            return {self.column_names[0]: data}
        else:
            return OrderedDict(zip(self.column_names, data))


def _make_torch_dataset(columns: Sequence['Columns'],
                        keys: 'KeyType' = None,
                        as_dict: bool = False) -> TorchDataset:
    """Returns a :class:`torch.utils.data.Dataset` object which can be loaded into
    a :class:`torch.utils.data.DataLoader`.

    .. note::

        Column with layouts ``str`` or ``ndarray nested`` are not compatible with the
        dataset APIs in the current release. So making dataset is only possible for
        columns with layout ``ndarray flat``

    .. note::

        PyTorch's :class:`torch.utils.data.DataLoader` can effectively do custom
        operations such as shuffling, batching, multiprocessed read etc and hence we
        limit the surface area of the dataset API here just to open the channel for
        reading. Use DataLoaders for such operations

    .. warning::

       On Windows systems, setting the parameter ``num_workers`` in the
       resulting :class:`torch.utils.data.DataLoader` method will result in a
       RuntimeError or deadlock. This is due to limitations of multiprocess
       start methods on Windows itself. Using the default argument value
       (``num_workers=0``) will let the DataLoader work in single process mode
       as expected.

    .. note::

        This is an experimental method in the current Hangar version. Please be aware
        that Significant changes may be introduced in future releases without advance
        notice or deprication warnings.

    Parameters
    ----------
    columns
        A column object, a tuple of column object or a list of column
        objects.
    keys
        An sequence collection of sample names. If given only those samples will
        fetched from the column
    as_dict
        Return the data as an OrderedDict with column names as keys. If False, it returns
        a tuple of arrays

    Returns
    -------
    dict or tuple

    Examples
    --------
    >>> from hangar import Repository
    >>> from torch.utils.data import DataLoader
    >>> from hangar.dataset import make_torch_dataset
    >>> from collections import namedtuple
    >>> repo = Repository('.')
    >>> co = repo.checkout()
    >>> imgcol = co.columns['images']
    >>> classcol = co.columns['classes']
    >>> dataset = make_torch_dataset((imgcol, classcol), as_dict=True)
    >>> loader = DataLoader(dataset, batch_size=16)
    >>> for batch in loader:
    ...     out = train_model(batch['images'])
    ...     loss = loss_fn(out, batch['classes'])

    Returns
    -------
    :class:`torch.utils.data.Dataset`

    DEVELOPER NOTE
    --------------
    - Any update to this function signature or docstring must be reflected in the
      equivalent loader function in hangar/dataset/__init__.py. This function is
      "coppied" to a top level __init__.py to allow unified API and lazyloader access
    """
    hangar_dataset = HangarDataset(columns, keys)
    return TorchDataset(hangar_dataset, as_dict)


================================================
FILE: src/hangar/diagnostics/__init__.py
================================================
__version__ = '0.5.2'

from .graphing import Graph

__all__ = ['Graph']

================================================
FILE: src/hangar/diagnostics/ecosystem.py
================================================
from typing import Dict, List, Tuple, Union


required_packages = [
    ('hangar', lambda p: p.__version__),
    ('click', lambda p: p.__version__),
    ('lmdb', lambda p: p.__version__),
    ('h5py', lambda p: p.__version__),
    ('hdf5plugin', lambda p: p.version),
    ('numpy', lambda p: p.__version__),
    ('blosc', lambda p: p.__version__),
    ('tqdm', lambda p: p.__version__),
    ('wrapt', lambda p: p.__version__),
    ('grpc', lambda p: p.__version__),
    ('xxhash', lambda p: p.VERSION),
]


def get_versions() -> dict:
    """Return information on software, machine, installed versions of packages.

    dict
        host, package, and `optional` package info.
    """
    d = {'host': get_system_info(),
         'packages': get_package_info(required_packages),
         'optional': get_optional_info()}
    return d


def get_system_info() -> List[Tuple[str, str]]:
    """Return local computer python, OS, and Machine info

    Returns
    -------
    List[Tuple[str, str]]
        field collected and value of the system parameter.
    """
    import locale
    import os
    import platform
    import struct
    import sys

    (sysname, nodename, release,
     version, machine, processor) = platform.uname()

    try:
        loc = locale.getlocale()
    except ValueError:  # pragma: no cover
        loc = None

    host = [
        ('python', f'{sys.version_info[:]}'),
        ('python-bits', f'{struct.calcsize("P") * 8}'),
        ('OS', f'{sysname}'),
        ('OS-release', f'{release}'),
        ('machine', f'{machine}'),
        ('processor', f'{processor}'),
        ('byteorder', f'{sys.byteorder}'),
        ('LC_ALL', f'{os.environ.get("LC_ALL", "None")}'),
        ('LANG', f'{os.environ.get("LANG", "None")}'),
        ('LOCALE', f'{loc}'),
        ('cpu-count', f'{os.cpu_count()}'),
    ]

    return host


def get_optional_info() -> Dict[str, Union[str, bool]]:
    """Get optional package info (tensorflow, pytorch, hdf5_bloscfilter, etc.)

    Returns
    -------
    Dict[str, Union[str, False]]
        package name, package version (if installed, otherwise False)
    """
    res = {}
    try:
        import h5py
        bloscFilterAvail = h5py.h5z.filter_avail(32001)
    except ImportError:  # pragma: no cover
        bloscFilterAvail = False
    res['blosc-hdf5-plugin'] = bloscFilterAvail

    try:
        import torch
        torchVersion = torch.__version__
    except ImportError:  # pragma: no cover
        torchVersion = False
    res['pytorch'] = torchVersion

    try:
        import tensorflow
        tensorflowVersion = tensorflow.__version__
    except ImportError:  # pragma: no cover
        tensorflowVersion = False
    res['tensorflow'] = tensorflowVersion

    return res


def get_package_info(pkgs):
    """ get package versions for the passed required & optional packages.

    Using local imports to avoid import overhead on interpreter startup.
    """
    import importlib

    pversions = []
    for modname, ver_f in pkgs:
        try:
            mod = importlib.import_module(modname)
            ver = ver_f(mod)
            pversions.append((modname, ver))
        except Exception:  # pragma: no cover
            pversions.append((modname, None))

    return pversions


================================================
FILE: src/hangar/diagnostics/graphing.py
================================================
# -*- coding: utf-8 -*-
"""
Portions of this code have been taken and modified from the "asciidag" project.

URL:      https://github.com/sambrightman/asciidag/
File:     asciidag/graph.py
Commit:   7c1eefe3895630dc3906bbe9d553e0169202756a
Accessed: 25 MAR 2019

asciidag License
-------------------------------------------------------------------------------
License: Mozilla Public License 2.0
URL:     https://github.com/sambrightman/asciidag/blob/7c1eefe3895630dc3906bbe9d553e0169202756a/LICENSE
"""

import sys
import time
from enum import Enum

__all__ = ('Graph',)

COLOR_NORMAL = ""
COLOR_RESET = "\033[m"
COLOR_BOLD = "\033[1m"
COLOR_RED = "\033[31m"
COLOR_GREEN = "\033[32m"
COLOR_YELLOW = "\033[33m"
COLOR_BLUE = "\033[34m"
COLOR_MAGENTA = "\033[35m"
COLOR_CYAN = "\033[36m"
COLOR_BOLD_RED = "\033[1;31m"
COLOR_BOLD_GREEN = "\033[1;32m"
COLOR_BOLD_YELLOW = "\033[1;33m"
COLOR_BOLD_BLUE = "\033[1;34m"
COLOR_BOLD_MAGENTA = "\033[1;35m"
COLOR_BOLD_CYAN = "\033[1;36m"
COLOR_BG_RED = "\033[41m"
COLOR_BG_GREEN = "\033[42m"
COLOR_BG_YELLOW = "\033[43m"
COLOR_BG_BLUE = "\033[44m"
COLOR_BG_MAGENTA = "\033[45m"
COLOR_BG_CYAN = "\033[46m"

COLUMN_COLORS_ANSI = [
    COLOR_BOLD_RED,
    COLOR_BOLD_GREEN,
    COLOR_BOLD_YELLOW,
    COLOR_BOLD_BLUE,
    COLOR_BOLD_MAGENTA,
    COLOR_BOLD_CYAN,
    COLOR_RED,
    COLOR_GREEN,
    COLOR_YELLOW,
    COLOR_BLUE,
    COLOR_MAGENTA,
    COLOR_CYAN,
    COLOR_RESET,
]


class Column(object):  # pylint: disable=too-few-public-methods
    """A single column of output.

    Attributes:
        commit -- The parent commit of this column.
        color  -- The color to (optionally) print this column in.
                  This is an index into column_colors.

    """

    def __init__(self, commit, color):
        self.commit = commit
        self.color = color


class GraphState(Enum):  # pylint: disable=too-few-public-methods
    PADDING = 0
    SKIP = 1
    PRE_COMMIT = 2
    COMMIT = 3
    POST_MERGE = 4
    COLLAPSING = 5


class Graph(object):  # pragma: no cover
    """
    The commit currently being processed
        struct commit *commit

    The number of interesting parents that this commit has. Note that this is
    not the same as the actual number of parents. This count excludes parents
    that won't be printed in the graph output, as determined by
    is_interesting().
        int num_parents

    The width of the graph output for this commit. All rows for this commit are
    padded to this width, so that messages printed after the graph output are
    aligned.
        int width

    The next expansion row to print when state is GraphState.PRE_COMMIT
        int expansion_row

    The current output state. This tells us what kind of line next_line()
    should output.
        enum graph_state state

    The output state for the previous line of output. This is primarily used to
    determine how the first merge line should appear, based on the last line of
    the previous commit.
        enum graph_state prev_state

    The index of the column that refers to this commit. If none of the incoming
    columns refer to this commit, this will be equal to num_columns.
        int commit_index

    The commit_index for the previously displayed commit. This is used to
    determine how the first line of a merge graph output should appear, based
    on the last line of the previous commit.
        int prev_commit_index

    The maximum number of columns that can be stored in the columns and
    new_columns arrays. This is also half the number of entries that can be
    stored in the mapping and new_mapping arrays.
        int column_capacity

    The number of columns (also called "branch lines" in some places)
        int num_columns

    The number of columns in the new_columns array
        int num_new_columns

    The number of entries in the mapping array
        int mapping_size

    The column state before we output the current commit.
        struct column *columns

    The new column state after we output the current commit. Only valid when
    state is GraphState.COLLAPSING.
        struct column *new_columns

    An array that tracks the current state of each character in the output line
    during state GraphState.COLLAPSING. Each entry is -1 if this character is
    empty, or a non-negative integer if the character contains a branch line.
    The value of the integer indicates the target position for this branch
    line. (I.e., this array maps the current column positions to their desired
    positions.)

    The maximum capacity of this array is always sizeof(int) * 2 *
    column_capacity.
        int *mapping

    A temporary array for computing the next mapping state while we are
    outputting a mapping line. This is stored as part of the git_graph simply
    so we don't have to allocate a new temporary array each time we have to
    output a collapsing line.
        int *new_mapping

    The current default column color being used. This is stored as an index
    into the array column_colors.
        unsigned short default_column_color
    """
    def __init__(self,
                 fh=None,
                 first_parent_only=False,
                 use_color=True,
                 column_colors=None):
        """State machine for processing DAG nodes into ASCII graphs.

        show_nodes() deals with sorting the nodes from tips down into
        topological order. It then displays them line-by-line.

        """
        self.commit = None
        self.buf = ''

        if fh is None:
            self.outfile = sys.stdout
        else:
            self.outfile = fh
        self.first_parent_only = first_parent_only
        self.use_color = use_color
        if column_colors is None:
            self.column_colors = COLUMN_COLORS_ANSI
        else:
            self.column_colors = column_colors

        self.num_parents = 0
        self.width = 0
        self.expansion_row = 0
        self.state = GraphState.PADDING
        self.prev_state = GraphState.PADDING
        self.commit_index = 0
        self.prev_commit_index = 0
        self.num_columns = 0
        self.num_new_columns = 0
        self.mapping_size = 0
        # Start the column color at the maximum value, since we'll always
        # increment it for the first commit we output. This way we start at 0
        # for the first commit.
        self.default_column_color = len(self.column_colors) - 1

        self.columns = {}
        self.new_columns = {}
        self.mapping = {}
        self.new_mapping = {}

    def show_nodes(self, dag, spec, branch, start, order, stop='',
                   *, show_time=True, show_user=True):
        """Printing function that displays a DAG representing the commit history

        Print a revision history alongside a revision graph drawn with ASCII
        characters. Nodes printed as an * character are parents of the working
        directory. Any unreachable (but referenced nodes) are displayed at +

        Parameters
        ----------
        dag : dict
            directed acyclic graph of nodes and connections in commits. No more than
            2 connections per node
        spec: dict
            dictionary of commit specification (user name, email, message, etc).
        branch : dict
            dict of commit hash -> list of branch names whose HEAD commit is at
            that key.
        start : string
            commit hash to act as the top of the topological sort.
        order: list
            time based ordering of commit hashs
        stop : str, optional
            commit hash to stop generating the graph at if the DAG contains more
            history than is needed (the default is '', which is the "parent" of
            the initial repository commit.)
        """
        if start == stop:
            return

        fmtSpec = {}
        for cmt, cmtspec in spec.items():
            if show_time:
                t = f"({time.strftime('%d%b%Y %H:%M:%S', time.gmtime(cmtspec['commit_time']))})"
            else:
                t = ''
            if show_user:
                u = f"({cmtspec['commit_user']})"
            else:
                u = ''
            m = cmtspec['commit_message']
            br = ' '
            if cmt in branch:
                for branchName in branch[cmt]:
                    if self.use_color is True:
                        br = f'{br}({COLOR_BOLD_RED}{branchName}{COLOR_RESET}) '
                    else:
                        br = f'{br}({branchName}) '
            fmtSpec[cmt] = f'{cmt}{br}{t}{u}: {m}'

        for rev in order:
            parents = dag[rev]
            self._update(rev, parents)
            self._show_commit()
            self.outfile.write(fmtSpec[rev])
            if not self._is_commit_finished():
                self.outfile.write('\n')
                self._show_remainder()
            self.outfile.write('\n')

    def _write_column(self, col, col_char):
        if col.color is not None:
            self.buf += self.column_colors[col.color]
        self.buf += col_char
        if col.color is not None:
            self.buf += self.column_colors[-1]

    def _update_state(self, state):
        self.prev_state = self.state
        self.state = state

    def _interesting_parents(self):
        for parent in self.commit_parents:
            yield parent
            if self.first_parent_only:
                break

    def _get_current_column_color(self):
        if not self.use_color:
            return None
        return self.default_column_color

    def _increment_column_color(self):
        self.default_column_color = ((self.default_column_color + 1)
                                     % len(self.column_colors))

    def _find_commit_color(self, commit):
        for i in range(self.num_columns):
            if self.columns[i].commit == commit:
                return self.columns[i].color
        return self._get_current_column_color()

    def _insert_into_new_columns(self, commit, mapping_index):
        """
        If the commit is already in the new_columns list, we don't need to add
        it. Just update the mapping correctly.
        """
        for i in range(self.num_new_columns):
            if self.new_columns[i].commit == commit:
                self.mapping[mapping_index] = i
                return mapping_index + 2

        # This commit isn't already in new_columns. Add it.
        column = Column(commit, self._find_commit_color(commit))
        self.new_columns[self.num_new_columns] = column
        self.mapping[mapping_index] = self.num_new_columns
        self.num_new_columns += 1
        return mapping_index + 2

    def _update_width(self, is_commit_in_existing_columns):
        """
        Compute the width needed to display the graph for this commit. This is
        the maximum width needed for any row. All other rows will be padded to
        this width.

        Compute the number of columns in the widest row: Count each existing
        column (self.num_columns), and each new column added by this commit.
        """
        max_cols = self.num_columns + self.num_parents

        # Even if the current commit has no parents to be printed, it still
        # takes up a column for itself.
        if self.num_parents < 1:
            max_cols += 1

        # We added a column for the current commit as part of self.num_parents.
        # If the current commit was already in self.columns, then we have double
        # counted it.
        if is_commit_in_existing_columns:
            max_cols -= 1

        # Each column takes up 2 spaces
        self.width = max_cols * 2

    def _update_columns(self):
        """
        Swap self.columns with self.new_columns self.columns contains the state
        for the previous commit, and new_columns now contains the state for our
        commit.

        We'll re-use the old columns array as storage to compute the new columns
        list for the commit after this one.
        """
        self.columns, self.new_columns = self.new_columns, self.columns
        self.num_columns = self.num_new_columns
        self.num_new_columns = 0

        # Now update new_columns and mapping with the information for the commit
        # after this one.
        #
        # First, make sure we have enough room. At most, there will be
        # self.num_columns + self.num_parents columns for the next commit.
        max_new_columns = self.num_columns + self.num_parents

        # Clear out self.mapping
        self.mapping_size = 2 * max_new_columns
        for i in range(self.mapping_size):
            self.mapping[i] = -1

        # Populate self.new_columns and self.mapping
        #
        # Some of the parents of this commit may already be in self.columns. If
        # so, self.new_columns should only contain a single entry for each such
        # commit. self.mapping should contain information about where each
        # current branch line is supposed to end up after the collapsing is
        # performed.
        seen_this = False
        mapping_idx = 0
        is_commit_in_columns = True
        for i in range(self.num_columns + 1):
            if i == self.num_columns:
                if seen_this:
                    break
                is_commit_in_columns = False
                col_commit = self.commit
            else:
                col_commit = self.columns[i].commit

            if col_commit == self.commit:
                old_mapping_idx = mapping_idx
                seen_this = True
                self.commit_index = i
                for parent in self._interesting_parents():
                    # If this is a merge, or the start of a new childless
                    # column, increment the current color.
                    if self.num_parents > 1 or not is_commit_in_columns:
                        self._increment_column_color()
                    mapping_idx = self._insert_into_new_columns(
                        parent,
                        mapping_idx)
                # We always need to increment mapping_idx by at least 2, even if
                # it has no interesting parents. The current commit always takes
                # up at least 2 spaces.
                if mapping_idx == old_mapping_idx:
                    mapping_idx += 2
            else:
                mapping_idx = self._insert_into_new_columns(col_commit,
                                                            mapping_idx)

        # Shrink mapping_size to be the minimum necessary
        while (self.mapping_size > 1 and
               self.mapping[self.mapping_size - 1] < 0):
            self.mapping_size -= 1

        # Compute self.width for this commit
        self._update_width(is_commit_in_columns)

    def _update(self, commit, parents):
        self.commit = commit
        self.commit_parents = parents
        self.num_parents = len(list(self._interesting_parents()))

        # Store the old commit_index in prev_commit_index.
        # update_columns() will update self.commit_index for this commit.
        self.prev_commit_index = self.commit_index

        # Call update_columns() to update
        # columns, new_columns, and mapping.
        self._update_columns()
        self.expansion_row = 0

        # Update self.state.
        # Note that we don't call update_state() here, since we don't want to
        # update self.prev_state. No line for self.state was ever printed.
        #
        # If the previous commit didn't get to the GraphState.PADDING state, it
        # never finished its output. Goto GraphState.SKIP, to print out a line
        # to indicate that portion of the graph is missing.
        #
        # If there are 3 or more parents, we may need to print extra rows before
        # the commit, to expand the branch lines around it and make room for it.
        # We need to do this only if there is a branch row (or more) to the
        # right of this commit.
        #
        # If less than 3 parents, we can immediately print the commit line.
        if self.state != GraphState.PADDING:
            self.state = GraphState.SKIP
        elif (self.num_parents >= 3 and
              self.commit_index < (self.num_columns - 1)):
            self.state = GraphState.PRE_COMMIT  # noqa: E501 pylint: disable=redefined-variable-type
        else:
            self.state = GraphState.COMMIT

    def _is_mapping_correct(self):
        """
        The mapping is up to date if each entry is at its target, or is 1
        greater than its target. (If it is 1 greater than the target, '/' will
        be printed, so it will look correct on the next row.)
        """
        for i in range(self.mapping_size):
            target = self.mapping[i]
            if target < 0:
                continue
            if target == i // 2:
                continue
            return False
        return True

    def _pad_horizontally(self, chars_written):
        """Add spaces to string end so all lines of a commit have the same width.

        This way, fields printed to the right of the graph will remain aligned
        for the entire commit.
        """
        if chars_written >= self.width:
            return

        extra = self.width - chars_written
        self.buf += ' ' * extra

    def _output_padding_line(self):
        """Output a padding row, that leaves all branch lines unchanged
        """
        for i in range(self.num_new_columns):
            self._write_column(self.new_columns[i], '|')
            self.buf += ' '

        self._pad_horizontally(self.num_new_columns * 2)

    def _output_skip_line(self):
        """Output an ellipsis to indicate that a portion of the graph is missing.
        """
        self.buf += '...'
        self._pad_horizontally(3)

        if self.num_parents >= 3 and self.commit_index < self.num_columns - 1:
            self._update_state(GraphState.PRE_COMMIT)
        else:
            self._update_state(GraphState.COMMIT)

    def _output_pre_commit_line(self):
        """Formats a row with increased space around a commit with multiple parents.

        This is done in order to make room for the commit. It should only be
        called when there are 3 or more parents. We need 2 extra rows for every
        parent over 2.
        """
        assert self.num_parents >= 3, 'not enough parents to add expansion row'
        num_expansion_rows = (self.num_parents - 2) * 2

        # self.expansion_row tracks the current expansion row we are on.
        # It should be in the range [0, num_expansion_rows - 1]
        assert (0 <= self.expansion_row < num_expansion_rows), \
            'wrong number of expansion rows'

        # Output the row
        seen_this = False
        chars_written = 0
        for i in range(self.num_columns):
            col = self.columns[i]
            if col.commit == self.commit:
                seen_this = True
                self._write_column(col, '|')
                self.buf += ' ' * self.expansion_row
                chars_written += 1 + self.expansion_row
            elif seen_this and (self.expansion_row == 0):
                # This is the first line of the pre-commit output. If the
                # previous commit was a merge commit and ended in the
                # GraphState.POST_MERGE state, all branch lines after
                # self.prev_commit_index were printed as "\" on the previous
                # line. Continue to print them as "\" on this line. Otherwise,
                # print the branch lines as "|".
                if (self.prev_state == GraphState.POST_MERGE and
                        self.prev_commit_index < i):
                    self._write_column(col, '\\')
                else:
                    self._write_column(col, '|')
                chars_written += 1
            elif seen_this and (self.expansion_row > 0):
                self._write_column(col, '\\')
                chars_written += 1
            else:
                self._write_column(col, '|')
                chars_written += 1
            self.buf += ' '
            chars_written += 1

        self._pad_horizontally(chars_written)

        # Increment self.expansion_row, and move to state GraphState.COMMIT if
        # necessary
        self.expansion_row += 1
        if self.expansion_row >= num_expansion_rows:
            self._update_state(GraphState.COMMIT)

    # Draw an octopus merge and return the number of characters written.
    def _draw_octopus_merge(self):
        """
        Here dashless_commits represents the number of parents which don't
        need to have dashes (because their edges fit neatly under the commit).
        """
        dashless_commits = 2
        num_dashes = ((self.num_parents - dashless_commits) * 2) - 1
        for i in range(num_dashes):
            col_num = i // 2 + dashless_commits + self.commit_index
            self._write_column(self.new_columns[col_num], '-')
        col_num = num_dashes // 2 + dashless_commits + self.commit_index
        self._write_column(self.new_columns[col_num], '.')
        return num_dashes + 1

    def _output_commit_line(self):  # noqa: C901, E501 pylint: disable=too-many-branches
        """
        Output the row containing this commit Iterate up to and including
        self.num_columns, since the current commit may not be in any of the
        existing columns. (This happens when the current commit doesn't have
        any children that we have already processed.)
        """
        seen_this = False
        chars_written = 0
        for i in range(self.num_columns + 1):
            if i == self.num_columns:
                if seen_this:
                    break
                col_commit = self.commit
            else:
                col = self.columns[i]
                col_commit = self.columns[i].commit

            if col_commit == self.commit:
                seen_this = True
                self.buf += '*'
                chars_written += 1

                if self.num_parents > 2:
                    chars_written += self._draw_octopus_merge()
            elif seen_this and self.num_parents > 2:
                self._write_column(col, '\\')
                chars_written += 1
            elif seen_this and self.num_parents == 2:
                # This is a 2-way merge commit. There is no
                # GraphState.PRE_COMMIT stage for 2-way merges, so this is the
                # first line of output for this commit. Check to see what the
                # previous line of output was.
                #
                # If it was GraphState.POST_MERGE, the branch line coming into
                # this commit may have been '\', and not '|' or '/'. If so,
                # output the branch line as '\' on this line, instead of '|'.
                # This makes the output look nicer.
                if (self.prev_state == GraphState.POST_MERGE and
                        self.prev_commit_index < i):
                    self._write_column(col, '\\')
                else:
                    self._write_column(col, '|')
                chars_written += 1
            else:
                self._write_column(col, '|')
                chars_written += 1
            self.buf += ' '
            chars_written += 1

        self._pad_horizontally(chars_written)
        if self.num_parents > 1:
            self._update_state(GraphState.POST_MERGE)
        elif self._is_mapping_correct():
            self._update_state(GraphState.PADDING)
        else:
            self._update_state(GraphState.COLLAPSING)

    def _find_new_column_by_commit(self, commit):
        for i in range(self.num_new_columns):
            if self.new_columns[i].commit == commit:
                return self.new_columns[i]
        return None

    def _output_post_merge_line(self):
        seen_this = False
        chars_written = 0
        for i in range(self.num_columns + 1):
            if i == self.num_columns:
                if seen_this:
                    break
                col_commit = self.commit
            else:
                col = self.columns[i]
                col_commit = col.commit

            if col_commit == self.commit:
                # Since the current commit is a merge find the columns for the
                # parent commits in new_columns and use those to format the
                # edges.
                seen_this = True
                parents = self._interesting_parents()
                assert parents, 'merge has no parents'
                par_column = self._find_new_column_by_commit(next(parents))
                assert par_column, 'parent column not found'
                self._write_column(par_column, '|')
                chars_written += 1
                for parent in parents:
                    assert parent, 'parent is not valid'
                    par_column = self._find_new_column_by_commit(parent)
                    assert par_column, 'parent column not found'
                    self._write_column(par_column, '\\')
                    self.buf += ' '
                chars_written += (self.num_parents - 1) * 2
            elif seen_this:
                self._write_column(col, '\\')
                self.buf += ' '
                chars_written += 2
            else:
                self._write_column(col, '|')
                self.buf += ' '
                chars_written += 2

        self._pad_horizontally(chars_written)

        if self._is_mapping_correct():
            self._update_state(GraphState.PADDING)
        else:
            self._update_state(GraphState.COLLAPSING)

    def _output_collapsing_line(self):  # noqa: C901, E501 pylint: disable=too-many-branches
        used_horizontal = False
        horizontal_edge = -1
        horizontal_edge_target = -1

        # Clear out the new_mapping array
        for i in range(self.mapping_size):
            self.new_mapping[i] = -1

        for i in range(self.mapping_size):
            target = self.mapping[i]
            if target < 0:
                continue

            # Since update_columns() always inserts the leftmost column first,
            # each branch's target location should always be either its current
            # location or to the left of its current location.
            #
            # We never have to move branches to the right. This makes the graph
            # much more legible, since whenever branches cross, only one is
            # moving directions.
            assert target * 2 <= i, \
                'position {} targetting column {}'.format(i, target * 2)

            if target * 2 == i:
                # This column is already in the correct place
                assert self.new_mapping[i] == -1
                self.new_mapping[i] = target
            elif self.new_mapping[i - 1] < 0:
                # Nothing is to the left. Move to the left by one.
                self.new_mapping[i - 1] = target
                # If there isn't already an edge moving horizontally select this one.
                if horizontal_edge == -1:
                    horizontal_edge = i
                    horizontal_edge_target = target
                    # The variable target is the index of the graph column, and
                    # therefore target * 2 + 3 is the actual screen column of
                    # the first horizontal line.
                    for j in range((target * 2) + 3, i - 2, 2):
                        self.new_mapping[j] = target
            elif self.new_mapping[i - 1] == target:
                # There is a branch line to our left already, and it is our
                # target. We combine with this line, since we share the same
                # parent commit.
                #
                # We don't have to add anything to the output or new_mapping,
                # since the existing branch line has already taken care of it.
                pass
            else:
                # There is a branch line to our left, but it isn't our target.
                # We need to cross over it.
                #
                # The space just to the left of this branch should always be empty.
                #
                # The branch to the left of that space should be our eventual target.
                assert self.new_mapping[i - 1] > target
                assert self.new_mapping[i - 2] < 0
                assert self.new_mapping[i - 3] == target
                self.new_mapping[i - 2] = target
                # Mark this branch as the horizontal edge to prevent any other
                # edges from moving horizontally.
                if horizontal_edge == -1:
                    horizontal_edge = i

        # The new mapping may be 1 smaller than the old mapping
        if self.new_mapping[self.mapping_size - 1] < 0:
            self.mapping_size -= 1

        # Output a line based on the new mapping info
        for i in range(self.mapping_size):
            target = self.new_mapping[i]
            if target < 0:
                self.buf += ' '
            elif target * 2 == i:
                self._write_column(self.new_columns[target], '|')
            elif target == horizontal_edge_target and i != horizontal_edge - 1:
                # Set the mappings for all but the first segment to -1 so that
                # they won't continue into the next line.
                if i != (target * 2) + 3:
                    self.new_mapping[i] = -1
                used_horizontal = True
                self._write_column(self.new_columns[target], '_')
            else:
                if used_horizontal and i < horizontal_edge:
                    self.new_mapping[i] = -1
                self._write_column(self.new_columns[target], '/')

        self._pad_horizontally(self.mapping_size)
        self.mapping, self.new_mapping = self.new_mapping, self.mapping

        # If self.mapping indicates that all of the branch lines are already in
        # the correct positions, we are done. Otherwise, we need to collapse
        # some branch lines together.
        if self._is_mapping_correct():
            self._update_state(GraphState.PADDING)

    def _next_line(self):  # pylint: disable=too-many-return-statements
        if self.state == GraphState.PADDING:
            self._output_padding_line()
            return False
        elif self.state == GraphState.SKIP:
            self._output_skip_line()
            return False
        elif self.state == GraphState.PRE_COMMIT:
            self._output_pre_commit_line()
            return False
        elif self.state == GraphState.COMMIT:
            self._output_commit_line()
            return True
        elif self.state == GraphState.POST_MERGE:
            self._output_post_merge_line()
            return False
        elif self.state == GraphState.COLLAPSING:
            self._output_collapsing_line()
            return False
        else:
            return False

    def _padding_line(self):
        """Output a padding line in the graph.

        This is similar to next_line(). However, it is guaranteed to never print
        the current commit line. Instead, if the commit line is next, it will
        simply output a line of vertical padding, extending the branch lines
        downwards, but leaving them otherwise unchanged.
        """
        if self.state != GraphState.COMMIT:
            self._next_line()
            return

        # Output the row containing this commit
        # Iterate up to and including self.num_columns, since the current commit
        # may not be in any of the existing columns. (This happens when the
        # current commit doesn't have any children that we have already
        # processed.)
        for i in range(self.num_columns):
            col = self.columns[i]
            self._write_column(col, '|')
            if col.commit == self.commit and self.num_parents > 2:
                self.buf += ' ' * (self.num_parents - 2) * 2
            else:
                self.buf += ' '

        self._pad_horizontally(self.num_columns)

        # Update self.prev_state since we have output a padding line
        self.prev_state = GraphState.PADDING

    def _is_commit_finished(self):
        return self.state == GraphState.PADDING

    def _show_commit(self):
        shown_commit_line = False

        # When showing a diff of a merge against each of its parents, we are
        # called once for each parent without update having been called. In this
        # case, simply output a single padding line.
        if self._is_commit_finished():
            self._show_padding()
            shown_commit_line = True

        while not shown_commit_line and not self._is_commit_finished():
            shown_commit_line = self._next_line()
            self.outfile.write(self.buf)
            if not shown_commit_line:
                self.outfile.write('\n')
            self.buf = ''

    def _show_padding(self):
        self._padding_line()
        self.outfile.write(self.buf)
        self.buf = ''

    def _show_remainder(self):
        shown = False

        if self._is_commit_finished():
            return False

        while True:
            self._next_line()
            self.outfile.write(self.buf)
            self.buf = ''
            shown = True

            if not self._is_commit_finished():
                self.outfile.write('\n')
            else:
                break

        return shown


================================================
FILE: src/hangar/diagnostics/integrity.py
================================================
from pathlib import Path
import warnings

import lmdb
from tqdm import tqdm

from ..records import (
    hash_data_db_key_from_raw_key,
    hash_schema_db_key_from_raw_key,
)
from ..backends import BACKEND_ACCESSOR_MAP
from ..txnctx import TxnRegister
from ..records import commiting, hashmachine, hashs, parsing, queries, heads
from ..op_state import report_corruption_risk_on_parsing_error


@report_corruption_risk_on_parsing_error
def _verify_column_integrity(hashenv: lmdb.Environment, repo_path: Path):

    hq = hashs.HashQuery(hashenv)
    narrays, nremote = hq.num_data_records(), 0
    array_kvs = hq.gen_all_data_digests_and_parsed_backend_specs()
    try:
        bes = {}
        for digest, spec in tqdm(array_kvs, total=narrays, desc='verifying column data'):
            if spec.backend not in bes:
                bes[spec.backend] = BACKEND_ACCESSOR_MAP[spec.backend](repo_path, None, None)
                bes[spec.backend].open(mode='r')
            if spec.islocal is False:
                nremote += 1
                continue
            data = bes[spec.backend].read_data(spec)
            tcode = hashmachine.hash_type_code_from_digest(digest)

            hash_func = hashmachine.hash_func_from_tcode(tcode)
            calc_digest = hash_func(data)
            if calc_digest != digest:
                raise RuntimeError(
                    f'Data corruption detected for array. Expected digest `{digest}` '
                    f'currently mapped to spec `{spec}`. Found digest `{calc_digest}`')
        if nremote > 0:
            warnings.warn(
                'Can not verify integrity of partially fetched array data references. '
                f'For complete proof, fetch all remote data locally. Did not verify '
                f'{nremote}/{narrays} arrays', RuntimeWarning)
    finally:
        for be in bes.keys():
            bes[be].close()


@report_corruption_risk_on_parsing_error
def _verify_schema_integrity(hashenv: lmdb.Environment):

    hq = hashs.HashQuery(hashenv)
    schema_kvs = hq.gen_all_schema_digests_and_parsed_specs()
    nschemas = hq.num_schema_records()
    for digest, val in tqdm(schema_kvs, total=nschemas, desc='verifying schemas'):
        tcode = hashmachine.hash_type_code_from_digest(digest)
        hash_func = hashmachine.hash_func_from_tcode(tcode)
        calc_digest = hash_func(val)
        if calc_digest != digest:
            raise RuntimeError(
                f'Data corruption detected for schema. Expected digest `{digest}` '
                f'currently mapped to spec `{val}`. Found digest `{calc_digest}`')


@report_corruption_risk_on_parsing_error
def _verify_commit_tree_integrity(refenv: lmdb.Environment):

    initialCmt = None
    all_commits = set(commiting.list_all_commits(refenv))
    reftxn = TxnRegister().begin_reader_txn(refenv)
    try:
        for cmt in tqdm(all_commits, desc='verifying commit trees'):
            pKey = parsing.commit_parent_db_key_from_raw_key(cmt)
            pVal = reftxn.get(pKey, default=False)
            if pVal is False:
                raise RuntimeError(
                    f'Data corruption detected for parent ref of commit `{cmt}`. '
                    f'Parent ref not recorded in refs db.')

            p_val = parsing.commit_parent_raw_val_from_db_val(pVal)
            parents = p_val.ancestor_spec
            if parents.master_ancestor != '':
                if parents.master_ancestor not in all_commits:
                    raise RuntimeError(
                        f'Data corruption detected in commit tree. Commit `{cmt}` '
                        f'with ancestors val `{parents}` references non-existing '
                        f'master ancestor `{parents.master_ancestor}`.')
            if parents.dev_ancestor != '':
                if parents.dev_ancestor not in all_commits:
                    raise RuntimeError(
                        f'Data corruption detected in commit tree. Commit `{cmt}` '
                        f'with ancestors val `{parents}` references non-existing '
                        f'dev ancestor `{parents.dev_ancestor}`.')
            if (parents.master_ancestor == '') and (parents.dev_ancestor == ''):
                if initialCmt is not None:
                    raise RuntimeError(
                        f'Commit tree integrity compromised. Multiple "initial" (commits '
                        f'with no parents) found. First `{initialCmt}`, second `{cmt}`')
                else:
                    initialCmt = cmt
    finally:
        TxnRegister().abort_reader_txn(refenv)


@report_corruption_risk_on_parsing_error
def _verify_commit_ref_digests_exist(hashenv: lmdb.Environment, refenv: lmdb.Environment):

    all_commits = commiting.list_all_commits(refenv)
    datatxn = TxnRegister().begin_reader_txn(hashenv, buffer=True)
    try:
        with datatxn.cursor() as cur:
            for cmt in tqdm(all_commits, desc='verifying commit ref digests'):
                with commiting.tmp_cmt_env(refenv, cmt) as tmpDB:
                    rq = queries.RecordQuery(tmpDB)
                    array_data_digests = set(rq.data_hashes())
                    schema_digests = set(rq.schema_hashes())

                    for datadigest in array_data_digests:
                        dbk = hash_data_db_key_from_raw_key(datadigest)
                        exists = cur.set_key(dbk)
                        if exists is False:
                            raise RuntimeError(
                                f'Data corruption detected in commit refs. Commit `{cmt}` '
                                f'references array data digest `{datadigest}` which does not '
                                f'exist in data hash db.')

                    for schemadigest in schema_digests:
                        dbk = hash_schema_db_key_from_raw_key(schemadigest)
                        exists = cur.set_key(dbk)
                        if exists is False:
                            raise RuntimeError(
                                f'Data corruption detected in commit refs. Commit `{cmt}` '
                                f'references schema digest `{schemadigest}` which does not '
                                f'exist in data hash db.')

    finally:
        TxnRegister().abort_reader_txn(hashenv)


@report_corruption_risk_on_parsing_error
def _verify_branch_integrity(branchenv: lmdb.Environment, refenv: lmdb.Environment):

    branch_names = heads.get_branch_names(branchenv)
    if len(branch_names) < 1:
        raise RuntimeError(
            f'Branch map compromised. Repo must contain atleast one branch. '
            f'Found {len(branch_names)} branches.')

    for bname in tqdm(branch_names, desc='verifying branches'):
        bhead = heads.get_branch_head_commit(branchenv=branchenv, branch_name=bname)
        exists = commiting.check_commit_hash_in_history(refenv=refenv, commit_hash=bhead)
        if exists is False:
            raise RuntimeError(
                f'Branch commit map compromised. Branch name `{bname}` references '
                f'commit digest `{bhead}` which does not exist in refs db.')

    staging_bname = heads.get_staging_branch_head(branchenv)
    if staging_bname not in branch_names:
        raise RuntimeError(
            f'Brach commit map compromised. Staging head refers to branch name '
            f'`{staging_bname}` which does not exist in the branch db.')


def run_verification(branchenv: lmdb.Environment,
                     hashenv: lmdb.Environment,
                     refenv: lmdb.Environment,
                     repo_path: Path):

    _verify_branch_integrity(branchenv, refenv)
    _verify_commit_tree_integrity(refenv)
    _verify_commit_ref_digests_exist(hashenv, refenv)
    _verify_schema_integrity(hashenv)
    _verify_column_integrity(hashenv, repo_path)


================================================
FILE: src/hangar/diff.py
================================================
from itertools import starmap
from typing import Iterable, List, NamedTuple, Set, Tuple, Union

import lmdb

from .records import (
    dynamic_layout_data_record_from_db_key,
    schema_column_record_from_db_key,
    data_record_digest_val_from_db_val,
    ColumnSchemaKey,
    FlatColumnDataKey,
    NestedColumnDataKey,
)
from .records.commiting import (
    check_commit_hash_in_history,
    get_commit_ancestors_graph,
    get_commit_ref,
    get_commit_spec,
    tmp_cmt_env,
)
from .records.heads import get_branch_head_commit, get_branch_names
from .records.queries import RecordQuery
from .txnctx import TxnRegister

# ------------------------- Differ Types --------------------------------------


class HistoryDiffStruct(NamedTuple):
    masterHEAD: str
    devHEAD: str
    ancestorHEAD: str
    canFF: bool


class Changes(NamedTuple):
    schema: dict
    samples: tuple


class DiffOutDB(NamedTuple):
    added: Set[Tuple[bytes, bytes]]
    deleted: Set[Tuple[bytes, bytes]]
    mutated: Set[Tuple[bytes, bytes]]


class DiffOut(NamedTuple):
    added: Changes
    deleted: Changes
    mutated: Changes


ConflictKeys = Union[str, FlatColumnDataKey, NestedColumnDataKey, ColumnSchemaKey]


class Conflicts(NamedTuple):
    """Four types of conflicts are accessible through this object.

    Attributes
    ----------
    t1
        Addition of key in master AND dev with different values.
    t21
        Removed key in master, mutated value in dev.
    t22
        Removed key in dev, mutated value in master.
    t3
        Mutated key in both master AND dev to different values.
    conflict
        Bool indicating if any type of conflict is present.
    """
    t1: Iterable[ConflictKeys]
    t21: Iterable[ConflictKeys]
    t22: Iterable[ConflictKeys]
    t3: Iterable[ConflictKeys]
    conflict: bool


class DiffAndConflictsDB(NamedTuple):
    diff: DiffOutDB
    conflict: Conflicts


class DiffAndConflicts(NamedTuple):
    diff: DiffOut
    conflict: Conflicts


# ------------------------------- Differ Methods ------------------------------


def diff_envs(base_env: lmdb.Environment, head_env: lmdb.Environment, ) -> DiffOutDB:
    """Main diff algorithm to determine changes between unpacked lmdb environments.

    Parameters
    ----------
    base_env : lmdb.Environment
        starting point to calculate changes from
    head_env : lmdb.Environment
        some commit which should be compared to BASE

    Returns
    -------
    DiffOutDB
        iterable of db formatted key/value pairs for `added`, `deleted`,
        `mutated` fields
    """
    added, deleted, mutated = [], [], []

    baseTxn = TxnRegister().begin_reader_txn(base_env)
    headTxn = TxnRegister().begin_reader_txn(head_env)
    baseCur = baseTxn.cursor()
    headCur = headTxn.cursor()
    try:
        moreBase = baseCur.first()
        moreHead = headCur.first()

        while True:
            if moreBase and moreHead:
                bKey, bVal = baseCur.item()
                hKey, hVal = headCur.item()
            elif (not moreBase) and (not moreHead):
                break
            # necessary to avoid deadlock at last items
            elif not moreBase:
                bKey = b'x'
                bVal = b''
                hKey, hVal = headCur.item()
            else:  # (not moreHead)
                hKey = b'x'
                hVal = b''
                bKey, bVal = baseCur.item()

            # inserted
            if bKey > hKey:
                added.append((hKey, hVal))
                moreHead = headCur.next()
                continue
            # deleted
            elif bKey < hKey:
                deleted.append((bKey, bVal))
                moreBase = baseCur.next()
                continue
            # no change
            elif (bKey == hKey) and (bVal == hVal):
                moreBase = baseCur.next()
                moreHead = headCur.next()
                continue
            # mutated
            else:  # (bKey == hKey) and (bVal != hVal)
                mutated.append((hKey, hVal))
                moreBase = baseCur.next()
                moreHead = headCur.next()
                continue

    finally:
        baseCur.close()
        headCur.close()
        TxnRegister().abort_reader_txn(base_env)
        TxnRegister().abort_reader_txn(head_env)

    return DiffOutDB(set(added), set(deleted), set(mutated))


def _raw_from_db_change(changes: Set[Tuple[bytes, bytes]]) -> Changes:
    """Perform conversion for records from db -> raw

    Parameters
    ----------
    changes : Set[Tuple[bytes, bytes]]
        iterable of db formatted key/value pairs

    Returns
    -------
    Changes
        human readable formatted dict of key/value pairs.
    """
    columnKeys, metadataKeys, schemaKeyVals = [], [], []
    for k, v in changes:
        if k[:2] == b'f:':
            columnKeys.append(k)
            continue
        elif k[:2] == b'n:':
            columnKeys.append(k)
            continue
        elif k[:2] == b's:':
            schemaKeyVals.append((k, v))
            continue
        else:
            raise RuntimeError(f'Unknown record type prefix encountered: '
                               f'{k[:2]}. full record => k: {k} & v: {v}')

    columndata = map(dynamic_layout_data_record_from_db_key, columnKeys)
    schemas = {
        schema_column_record_from_db_key(k):
            data_record_digest_val_from_db_val(v) for k, v in schemaKeyVals
    }
    return Changes(schema=schemas, samples=tuple(columndata))


def _all_raw_from_db_changes(outDb: DiffAndConflictsDB) -> DiffAndConflicts:
    """Convert db formatted db diff/conflict results to human readable

    Parameters
    ----------
    outDb : DiffAndConflictsDB
        raw formatted structure containg `diff` and `conflict` fields

    Returns
    -------
    DiffAndConflicts
        Human readable struct containing ``diff`` and ``conflict`` fields.
    """
    it = (outDb.diff.added, outDb.diff.deleted, outDb.diff.mutated)
    out = map(_raw_from_db_change, it)  # significant perf improvement for large commits
    outRawDiff = DiffOut(*out)

    t1 = _raw_from_db_change(outDb.conflict.t1)
    t21 = _raw_from_db_change(outDb.conflict.t21)
    t22 = _raw_from_db_change(outDb.conflict.t22)
    t3 = _raw_from_db_change(outDb.conflict.t3)
    outRawConf = Conflicts(t1=t1, t21=t21, t22=t22, t3=t3, conflict=outDb.conflict.conflict)
    res = DiffAndConflicts(diff=outRawDiff, conflict=outRawConf)
    return res

# ------------------------- Commit Differ -------------------------------------


def _symmetric_difference_keys(pair1: Set[Tuple[bytes, bytes]],
                               pair2: Set[Tuple[bytes, bytes]]
                               ) -> List[Tuple[bytes, bytes]]:
    """Find all keys common to both input pairs AND which have different values.

    Essentially a moddified `symmetric_difference` set operation, which keeps
    track of all seen items. Note: This ignores any `count` tracking values in
    the input tuples (ie. lmdb keys ending in ":")

    Parameters
    ----------
    pair1 : Set[Tuple[bytes, bytes]]
        key/value pairs making up the first set
    pair2 : Set[Tuple[bytes, bytes]]
        key/value pairs making up the second set

    Returns
    -------
    List[Tuple[bytes, bytes]]
        keys which appear in both input pair sets but which have different values.
    """
    seen = set()
    conflict = []
    for k, v in pair1.symmetric_difference(pair2):
        if k in seen:
            conflict.append((k, v))
        else:
            seen.add(k)
    return conflict


def find_conflicts(master_diff: DiffOutDB, dev_diff: DiffOutDB) -> Conflicts:
    """Determine if/which type of conflicting changes occur in diverged commits.

    This function expects the output of :func:`diff_envs` for two commits
    between a base commit.

    Parameters
    ----------
    master_diff : DiffOutDB
        changes (adds, dels, mutations) between base and master HEAD
    dev_diff : DiffOutDB
        changes (adds, dels, mutations) between base and dev HEAD

    Returns
    -------
    Conflicts
        Tuple containing fields for `t1`, `t21`, `t22`, `t3`, and (bool)
        `conflicts` recording output info for if and what type of conflict has
        occured
    """
    t1 = _symmetric_difference_keys(master_diff.added, dev_diff.added)
    t21 = _symmetric_difference_keys(master_diff.deleted, dev_diff.mutated)
    t22 = _symmetric_difference_keys(master_diff.mutated, dev_diff.deleted)
    t3 = _symmetric_difference_keys(master_diff.mutated, dev_diff.mutated)
    isConflict = bool(any([t1, t21, t22, t3]))

    res = Conflicts(t1=t1, t21=t21, t22=t22, t3=t3, conflict=isConflict)
    return res


# ---------------------------- Differ Base  -----------------------------------


class BaseUserDiff(object):

    def __init__(self, branchenv: lmdb.Environment, refenv: lmdb.Environment, *args, **kwargs):

        self._branchenv: lmdb.Environment = branchenv
        self._refenv: lmdb.Environment = refenv

    def _determine_ancestors(self, mHEAD: str, dHEAD: str) -> HistoryDiffStruct:
        """Search the commit history to determine the closest common ancestor.

        The closest common ancestor is important because it serves as the "merge
        base" in a 3-way merge strategy. This is a very naive implementation, but it
        works well enough right now for simple branch histories.

        Parameters
        ----------
        mHEAD : str
            full commit hash to use as the `master` branch head commit
        dHEAD : str
            full commit hash to use as the `dev` branch head commit

        Returns
        -------
        HistoryDiffStruct
            indicating the masterHEAD, devHEAD, ancestorHEAD, and canFF which
            tells if this is a fast-forward-able commit.
        """
        mAncestors = get_commit_ancestors_graph(self._refenv, mHEAD)
        dAncestors = get_commit_ancestors_graph(self._refenv, dHEAD)
        cAncestors = set(mAncestors.keys()).intersection(set(dAncestors.keys()))
        canFF = True if mHEAD in cAncestors else False

        ancestorOrder = []
        for ancestor in cAncestors:
            timeOfCommit = get_commit_spec(self._refenv, ancestor).commit_time
            ancestorOrder.append((ancestor, timeOfCommit))

        ancestorOrder.sort(key=lambda t: t[1], reverse=True)
        commonAncestor = ancestorOrder[0][0]
        res = HistoryDiffStruct(
            masterHEAD=mHEAD, devHEAD=dHEAD, ancestorHEAD=commonAncestor, canFF=canFF)
        return res

    @staticmethod
    def _diff3(a_env: lmdb.Environment,
               m_env: lmdb.Environment,
               d_env: lmdb.Environment) -> DiffAndConflictsDB:
        """Three way diff and conflict finder from ancestor, master, and dev commits.

        Parameters
        ----------
        a_env : lmdb.Environment
            unpacked lmdb environment for the ancestor commit
        m_env : lmdb.Environment
            unpacked lmdb environment for the master commit, current HEAD
        d_env : lmdb.Environment
            unpacked lmdb environment for the dev commit, compare to HEAD

        Returns
        -------
        DiffAndConflictsDB
            structure containing (`additions`, `deletions`, `mutations`) for
            diff, as well as the ConflictRecord struct.
        """
        it = ((a_env, m_env), (a_env, d_env), (d_env, m_env))
        diffs = tuple(starmap(diff_envs, it))  # significant perf improvement by map.
        conflict = find_conflicts(diffs[0], diffs[1])
        return DiffAndConflictsDB(diff=diffs[2], conflict=conflict)

    @staticmethod
    def _diff(a_env: lmdb.Environment, m_env: lmdb.Environment) -> DiffAndConflictsDB:
        """Fast Forward differ from ancestor to master commit.

        Note: this method returns the same MasterDevDiff struct as the three
        way commit diff method, but the `dev` and `conflicts` fields will be
        empty

        Parameters
        ----------
        a_env : lmdb.Environment
            unpacked lmdb environment for the ancestor commit
        m_env : lmdb.Environment
            unpacked lmdb environment for the master commit

        Returns
        -------
        DiffAndConflictsDB
            structure containing (`additions`, `deletions`, `mutations`) for
            the ancestor -> master (head) env diff
        """
        m_diff = diff_envs(a_env, m_env)
        conflict = Conflicts(t1=[], t21=[], t22=[], t3=[], conflict=False)
        return DiffAndConflictsDB(diff=m_diff, conflict=conflict)


# ------------------------ Read-Only Checkouts Only ---------------------------


class ReaderUserDiff(BaseUserDiff):
    """Methods diffing contents of a :class:`~hangar.checkout.ReaderCheckout` instance.

    These provide diffing implementations to compare the current checkout
    ``HEAD`` of a to a branch or commit. The results are generally returned as
    a nested set of named tuples.

    When diffing of commits or branches is performed, if there is not a linear
    history of commits between current ``HEAD`` and the diff commit (ie. a
    history which would permit a ``"fast-forward" merge``), the result field
    named ``conflict`` will contain information on any merge conflicts that
    would exist if staging area ``HEAD`` and the (compared) ``"dev" HEAD`` were
    merged "right now". Though this field is present for all diff comparisons,
    it can only contain non-empty values in the cases where a three way merge
    would need to be performed.

    ::

       Fast Forward is Possible
       ========================

           (master)          (foo)
       a ----- b ----- c ----- d


       3-Way Merge Required
       ====================

                            (master)
       a ----- b ----- c ----- d
               \\
                \\               (foo)
                 \\----- ee ----- ff
    """

    def __init__(self, commit_hash, *args, **kwargs):

        super().__init__(*args, **kwargs)
        self._commit_hash = commit_hash

    def _run_diff(self, dev_commit_hash: str) -> DiffAndConflictsDB:
        """Compute diff between head and commit hash, returning DB formatted results

        Parameters
        ----------
        dev_commit_hash : str
            hash of the commit to be used as the comparison.

        Returns
        -------
        DiffAndConflictsDB
            two-tuple of `diff`, `conflict` (if any) calculated in the diff
            algorithm.
        """
        hist = self._determine_ancestors(self._commit_hash, dev_commit_hash)
        mH, dH, aH = hist.masterHEAD, hist.devHEAD, hist.ancestorHEAD
        with tmp_cmt_env(self._refenv, mH) as m_env, tmp_cmt_env(self._refenv, dH) as d_env:
            if hist.canFF is True:
                outDb = self._diff(m_env, d_env)
            else:
                with tmp_cmt_env(self._refenv, aH) as a_env:
                    outDb = self._diff3(a_env, m_env, d_env)
        return outDb

    def commit(self, dev_commit_hash: str) -> DiffAndConflicts:
        """Compute diff between HEAD and commit hash, returning user-facing results.

        Parameters
        ----------
        dev_commit_hash : str
            hash of the commit to be used as the comparison.

        Returns
        -------
        DiffAndConflicts
            two-tuple of ``diff``, ``conflict`` (if any) calculated in the diff
            algorithm.

        Raises
        ------
        ValueError
            if the specified ``dev_commit_hash`` is not a valid commit reference.
        """
        if not check_commit_hash_in_history(self._refenv, dev_commit_hash):
            msg = f'HANGAR VALUE ERROR: dev_commit_hash: {dev_commit_hash} does not exist'
            raise ValueError(msg)

        outDb = self._run_diff(dev_commit_hash=dev_commit_hash)
        outRaw = _all_raw_from_db_changes(outDb)
        return outRaw

    def branch(self, dev_branch: str) -> DiffAndConflicts:
        """Compute diff between HEAD and branch name, returning user-facing results.

        Parameters
        ----------
        dev_branch : str
            name of the branch whose HEAD will be used to calculate the diff of.

        Returns
        -------
        DiffAndConflicts
            two-tuple of ``diff``, ``conflict`` (if any) calculated in the diff
            algorithm.

        Raises
        ------
        ValueError
            If the specified `dev_branch` does not exist.
        """
        branchNames = get_branch_names(self._branchenv)
        if dev_branch in branchNames:
            dHEAD = get_branch_head_commit(self._branchenv, dev_branch)
        else:
            msg = f'HANGAR VALUE ERROR: dev_branch: {dev_branch} invalid branch name'
            raise ValueError(msg)

        outDb = self._run_diff(dev_commit_hash=dHEAD)
        outRaw = _all_raw_from_db_changes(outDb)
        return outRaw


# ---------------------- Write Enabled Checkouts Only -------------------------


class WriterUserDiff(BaseUserDiff):
    """Methods diffing contents of a :class:`~hangar.checkout.WriterCheckout` instance.

    These provide diffing implementations to compare the current ``HEAD`` of a
    checkout to a branch, commit, or the staging area ``"base"`` contents. The
    results are generally returned as a nested set of named tuples. In
    addition, the :meth:`status` method is implemented which can be used to
    quickly determine if there are any uncommitted changes written in the
    checkout.

    When diffing of commits or branches is performed, if there is not a linear
    history of commits between current ``HEAD`` and the diff commit (ie. a
    history which would permit a ``"fast-forward" merge``), the result field
    named ``conflict`` will contain information on any merge conflicts that
    would exist if staging area ``HEAD`` and the (compared) ``"dev" HEAD`` were
    merged "right now". Though this field is present for all diff comparisons,
    it can only contain non-empty values in the cases where a three way merge
    would need to be performed.

    ::

       Fast Forward is Possible
       ========================

           (master)          (foo)
       a ----- b ----- c ----- d


       3-Way Merge Required
       ====================

                            (master)
       a ----- b ----- c ----- d
               \\
                \\               (foo)
                 \\----- ee ----- ff
    """

    def __init__(self, stageenv: lmdb.Environment, branch_name: str, *args, **kwargs):

        super().__init__(*args, **kwargs)
        self._stageenv: lmdb.Environment = stageenv
        self._branch_name: str = branch_name

    def _run_diff(self, dev_commit_hash: str) -> DiffAndConflictsDB:
        """Compute diff between head and commit, returning DB formatted results.

        Parameters
        ----------
        dev_commit_hash : str
            hash of the commit to be used as the comparison.

        Returns
        -------
        DiffAndConflictsDB
            two-tuple of `diff`, `conflict` (if any) calculated in the diff
            algorithm.
        """
        commit_hash = get_branch_head_commit(self._branchenv, self._branch_name)
        hist = self._determine_ancestors(commit_hash, dev_commit_hash)
        with tmp_cmt_env(self._refenv, hist.devHEAD) as d_env:
            if hist.canFF is True:
                res = self._diff(self._stageenv, d_env)
            else:
                with tmp_cmt_env(self._refenv, hist.ancestorHEAD) as a_env:
                    res = self._diff3(a_env, self._stageenv, d_env)
        return res

    def commit(self, dev_commit_hash: str) -> DiffAndConflicts:
        """Compute diff between HEAD and commit, returning user-facing results.

        Parameters
        ----------
        dev_commit_hash : str
            hash of the commit to be used as the comparison.

        Returns
        -------
        DiffAndConflicts
            two-tuple of ``diff``, ``conflict`` (if any) calculated in the diff
            algorithm.

        Raises
        ------
        ValueError
            if the specified ``dev_commit_hash`` is not a valid commit reference.
        """
        if not check_commit_hash_in_history(self._refenv, dev_commit_hash):
            msg = f'HANGAR VALUE ERROR: dev_commit_hash: {dev_commit_hash} does not exist'
            raise ValueError(msg)

        outDb = self._run_diff(dev_commit_hash=dev_commit_hash)
        outRaw = _all_raw_from_db_changes(outDb)
        return outRaw

    def branch(self, dev_branch: str) -> DiffAndConflicts:
        """Compute diff between HEAD and branch, returning user-facing results.

        Parameters
        ----------
        dev_branch : str
            name of the branch whose HEAD will be used to calculate the diff of.

        Returns
        -------
        DiffAndConflicts
            two-tuple of ``diff``, ``conflict`` (if any) calculated in the diff
            algorithm.

        Raises
        ------
        ValueError
            If the specified ``dev_branch`` does not exist.
        """
        branchNames = get_branch_names(self._branchenv)
        if dev_branch in branchNames:
            dHEAD = get_branch_head_commit(self._branchenv, dev_branch)
        else:
            msg = f'HANGAR VALUE ERROR: dev_branch: {dev_branch} invalid branch name'
            raise ValueError(msg)

        outDb = self._run_diff(dev_commit_hash=dHEAD)
        outRaw = _all_raw_from_db_changes(outDb)
        return outRaw

    def staged(self) -> DiffAndConflicts:
        """Return diff of staging area to base, returning user-facing results.

        Returns
        -------
        DiffAndConflicts
            two-tuple of ``diff``, ``conflict`` (if any) calculated in the diff
            algorithm.
        """
        commit_hash = get_branch_head_commit(self._branchenv, self._branch_name)
        with tmp_cmt_env(self._refenv, commit_hash) as base_env:
            outDb = self._diff(base_env, self._stageenv)
        outRaw = _all_raw_from_db_changes(outDb)
        return outRaw

    def status(self) -> str:
        """Determine if changes have been made in the staging area

        If the contents of the staging area and it's parent commit are the
        same, the status is said to be "CLEAN". If even one column or
        metadata record has changed however, the status is "DIRTY".

        Returns
        -------
        str
            "CLEAN" if no changes have been made, otherwise "DIRTY"
        """
        head_commit = get_branch_head_commit(self._branchenv, self._branch_name)
        if head_commit == '':
            base_refs = ()
        else:
            base_refs = get_commit_ref(self._refenv, head_commit)

        stage_refs = tuple(RecordQuery(self._stageenv)._traverse_all_records())
        status = 'DIRTY' if (base_refs != stage_refs) else 'CLEAN'
        return status


================================================
FILE: src/hangar/external/__init__.py
================================================
from ._external import load, save, show, board_show
from .plugin_manager import PluginManager
from .base_plugin import BasePlugin


__all__ = ['load', 'save', 'show', 'board_show', 'BasePlugin', 'PluginManager']



================================================
FILE: src/hangar/external/_external.py
================================================
"""
High level methods let user interact with hangar without diving into the internal
methods of hangar. We have enabled four basic entry points as high level methods

1. :func:`.load`
2. :func:`.save`
3. :func:`.show`
4. :func:`.board_show`

These entry points by itself is not capable of doing anything. But they are entry
points to the same methods in the `hangar.external` plugins available in pypi. These
high level entry points are used by the CLI for doing import, export and view
operations as well as the `hangarboard `_
for visualization (using ``board_show``)
"""
from typing import Tuple

import numpy as np

from .plugin_manager import PluginManager


pm = PluginManager()


def load(fpath: str,
         plugin: str = None,
         extension: str = None,
         **plugin_kwargs) -> Tuple[np.ndarray, str]:
    """
    Wrapper to load data from file into memory as numpy arrays using
    plugin's `load` method

    Parameters
    ----------
    fpath : str
        Data file path, e.g. ``path/to/test.jpg``
    plugin : str, optional
        Name of plugin to use.  By default, the preferred plugin for the
        given file format tried until a suitable. This cannot be `None` if
        `extension` is also `None`
    extension : str, optional
        Format of the file. This is used to infer which plugin to use
        in case plugin name is not provided. This cannot be `None` if
        `plugin` is also `None`

    Other Parameters
    ----------------
    plugin_kwargs : dict
        Plugin specific keyword arguments. If the function is being called from
        command line argument, all the unknown keyword arguments will be collected
        as ``plugin_kwargs``

    Returns
    -------
    img_array : :class:`numpy.ndarray`
        data returned from the given plugin.

    """
    if not pm.plugins_loaded:
        pm.reset_plugins()
    func = pm.get_plugin('load', plugin=plugin, extension=extension)
    return func(fpath, **plugin_kwargs)


def save(arr: np.ndarray, outdir: str, sample_det: str, extension: str,
         plugin: str = None, **plugin_kwargs):
    """Wrapper plugin ``save`` methods which dump :class:`numpy.ndarray` to disk.

    Parameters
    ----------
    arr : :class:`numpy.ndarray`
        Numpy array to be saved to file
    outdir : str
        Target directory
    sample_det : str
        Sample name and type of the sample name formatted as
        ``sample_name_type:sample_name``
    extension : str
        Format of the file. This is used to infer which plugin to use in case
        plugin name is not provided. This cannot be ``None`` if ``plugin`` is
        also ``None``
    plugin : str, optional
        Name of plugin to use.  By default, the preferred plugin for the given
        file format tried until a suitable. This cannot be ``None`` if
        ``extension`` is also ``None``

    Other Parameters
    ----------------
    plugin_kwargs : dict
        Plugin specific keyword arguments. If the function is being called from
        command line argument, all the unknown keyword arguments will be
        collected as ``plugin_kwargs``

    Notes
    -----
    CLI or this method does not create the file name where to save. Instead
    they pass the required details downstream to the plugins to do that once
    they verify the given ``outdir`` is a valid directory. It is because we
    expect to get data entries where one data entry is one file (like images)
    and also data entries where multiple entries goes to single file (like
    CSV). With these ambiguous cases in hand, it's more sensible to let the
    plugin handle the file handling accordingly.
    """
    if not pm.plugins_loaded:
        pm.reset_plugins()
    func = pm.get_plugin('save', plugin=plugin, extension=extension)
    func(arr, outdir, sample_det, extension, **plugin_kwargs)


def show(arr: np.ndarray, plugin: str = None,
         extension: str = None, **plugin_kwargs):  # pragma: no cover
    """Wrapper to display :class:`numpy.ndarray` via plugin ``show`` method.

    Parameters
    ----------
    arr : :class:`numpy.ndarray`
        Data to process into some human understandable representation.
    plugin : str, optional
        Name of plugin to use.  By default, the preferred plugin for the
        given file format tried until a suitable. This cannot be ``None`` if
        ``extension`` is also ``None``
    extension : str, optional
        Format of the file. This is used to infer which plugin to use
        in case plugin name is not provided. This cannot be ``None`` if
        ``plugin`` is also ``None``

    Other Parameters
    ----------------
    plugin_kwargs : dict
        Plugin specific keyword arguments. If the function is being called from
        command line argument, all the unknown keyword arguments will be
        collected as ``plugin_kwargs``
    """
    if not pm.plugins_loaded:
        pm.reset_plugins()
    func = pm.get_plugin('show', plugin=plugin, extension=extension)
    return func(arr, **plugin_kwargs)


def board_show(arr: np.ndarray, plugin: str = None,
               extension: str = None, **plugin_kwargs):
    """
    Wrapper to convert the numpy array using the ``board_show`` method
    of the plugin to make it displayable in the web UI

    Parameters
    ----------
    arr : :class:`numpy.ndarray`
        Data to process into some human understandable representation.
    plugin : str, optional
        Name of plugin to use.  By default, the preferred plugin for the
        given file format tried until a suitable. This cannot be ``None`` if
        ``extension`` is also ``None``
    extension : str, optional
        Format of the file. This is used to infer which plugin to use
        in case plugin name is not provided. This cannot be ``None`` if
        ``plugin`` is also ``None``

    Other Parameters
    ----------------
    plugin_kwargs : dict
        Plugin specific keyword arguments. If the function is being called from
        command line argument, all the unknown keyword arguments will be
        collected as ``plugin_kwargs``
    """
    if not pm.plugins_loaded:
        pm.reset_plugins()
    func = pm.get_plugin('board_show', plugin=plugin, extension=extension)
    return func(arr, **plugin_kwargs)


================================================
FILE: src/hangar/external/base_plugin.py
================================================
"""
Hangar's external plugin system is designed to make it flexible for users to
write custom plugins for custom data formats. External plugins should be python
installables and should make itself discoverable using package meta data. A
`detailed documentation `_
can be found in the official python doc. But for a headstart and to avoid going
through this somewhat complex process, we have made a `cookiecutter
`_ package.
All the hangar plugins follow the naming standard similar to Flask plugins i.e
`hangar_pluginName`
"""

import os


class BasePlugin(object):
    """Base plugin class from where all the external plugins should be inherited.

    Child classes can have four methods to expose - ``load``, ``save``,
    ``show`` and ``board_show``. These are considered as valid methods and
    should be passed as the first argument while initializing the parent from
    child. Child should also inform the parent about the acceptable file
    formats by passing that as second argument. :class:`.BasePlugin` accepts
    ``provides`` and ``accepts`` on init and exposes them which is then used by
    plugin manager while loading the modules. BasePlugin also provides
    ``sample_name`` function to figure out the sample name from the file path.
    This function is used by ``load`` method to return the sample name which is
    then used by hangar as a key to save the data
    """
    def __init__(self, provides, accepts):
        if not provides:
            raise ValueError("Argument ``provides`` cannot be empty")
        if not accepts:
            raise ValueError("Argument ``accepts`` cannot be empty")
        self._provides = provides
        self._accepts = accepts

    @property
    def provides(self):
        return self._provides

    @property
    def accepts(self):
        return self._accepts

    def load(self, fpath, *args, **kwargs):
        """Load some data file on disk to recover it in :class:`numpy.ndarray` form.

        Loads the data provided from the disk for the file path given and
        returns the data as :class:`numpy.ndarray` and name of the data sample.
        Names returned from this function will be used by the import cli system
        as the key for the returned data. This function can return either a
        single :class:`numpy.ndarray`, sample name, combination, or a generator
        that produces one of the the above combinations. This helps when the
        input file is not a single data entry like an image but has multiple
        data points like CSV files.

        An example implementation that returns a single data point:

        .. code-block:: python

            def load(self, fpath, *args, **kwargs):
                data = create_np_array('myimg.jpg')
                name = create_sample_name('myimg.jpg')  # could use `self.sample_name`
                return data, name

        An example implementation that returns a generator could look like this:

        .. code-block:: python

            def load(self, fpath, *args, **kwargs):
                for i, line in enumerate('myfile.csv'):
                    data = create_np_array(line)
                    name = create_sample_name(fpath, i)
                    yield data, name
        """
        raise NotImplementedError

    def save(self, arr, outdir, sample_detail, extension, *args, **kwargs):
        """Save data in a :class:`numpy.ndarray` to a specific file format on disk.

        If the plugin is developed for files like CSV, JSON, etc - where
        multiple data entry would go to the same file - this should check
        whether the file exist already and weather it should modify / append
        the new data entry to the structure, instead of overwriting it or
        throwing an exception.

        Note
        ----
        Name of the file and the whole path to save the data should be constructed
        by this function. This can be done using the information gets as arguments
        such as, ``outdir``, ``sample_detail`` and ``extension``. It has been
        offloaded to this function instead of handling it before because, decisions
        like whether the multiple data entry should go to a single file or mutltpile
        file cannot be predicted before hand as are always data specific (and hence
        plugin specific)

        Note
        ----
        If the call to this function is initiated by the CLI, ``sample_detail`` argument
        will be a string formatted as `sample_name_type:sample_name`. For example, if
        the sample name is `sample1` (and type of sample name is `str`) then
        ``sample_detail`` will be `str:sample1`. This is to avoid the ambiguity that
        could arise by having both integer and string form of numerical as the sample
        name (ex: if column[123] and column["123"] exist). Formatting
        ``sample_detail`` to make a proper filename (not necessary) is upto the
        plugin developer.
        """
        raise NotImplementedError

    def show(self, arr, *args, **kwargs):
        """Show/Display the data to the user.

        This function should process the input :class:`numpy.ndarray` and show
        that to the user using a data dependant display mechanism. A good
        example for such a system is ``matplotlib.pyplot``'s ``plt.show``,
        which displays the image data inline in the running terminal / kernel
        ui.
        """
        raise NotImplementedError

    def board_show(self, arr, *args, **kwargs):
        """Show/display data in hangarboard format.

        Hangarboard is capable of displaying three most common data formats:
        image, text and audio. This function should process the input
        :class:`numpy.ndarray` data and convert it to any of the supported
        formats.
        """
        raise NotImplementedError

    @staticmethod
    def sample_name(fpath: os.PathLike) -> str:
        """Sample the name from file path.

        This function comes handy since the :meth:`.load` method needs to
        ``yield`` or ``return`` both data and sample name. If there no specific
        requirements regarding sample name creation, you can use this function
        which removes the extension from the file name and returns just the
        name. For example, if filepath is ``/path/to/myfile.ext``, then it
        returns ``myfile``

        Parameters
        ----------
        fpath : os.PathLike
            Path to the file which is being loaded by `load`
        """
        return os.path.splitext(os.path.basename(fpath))[0]


================================================
FILE: src/hangar/external/plugin_manager.py
================================================
import pkg_resources
from typing import Callable


class PluginManager(object):
    """
    Container class that holds the information about available plugins and
    provides required method to fetch and clean up the plugin systems
    """

    valid_provides = ['load', 'save', 'show', 'board_show']

    def __init__(self):
        self._plugin_store = {}  # ex: {'pil': loaded_pil_module}
        self._default_plugins = {}  # ex: {'jpg': {'save': 'pil'}}
        self.plugins_loaded = False

    def reset_plugins(self):
        """
        Reset plugin clears the existing storages and then scans
        `hangar.external.plugins` for plugins. Once `plugin store` is populated,
        it creates finds the default plugins to make auto-inference based on
        file format possible
        """
        self._clear_plugins()
        self._scan_plugins()
        self._read_defaults()

    def _clear_plugins(self):
        """
        Clear the plugin state to the default, i.e., where no plugins are loaded
        """
        self._plugin_store.clear()
        self._default_plugins.clear()
        self.plugins_loaded = False

    def _scan_plugins(self):
        """
        Scan for entry points, find the plugins and store them in provided storage
        containers
        """
        for entry_point in pkg_resources.iter_entry_points('hangar.external.plugins'):
            PluginClass = entry_point.load()
            self._plugin_store[entry_point.name] = PluginClass()
        self.plugins_loaded = True

    def _read_defaults(self):
        """
        Populate default plugin dict that maps file formats to plugins and methods. This
        is used to infer which plugin to use at runtime based on file format
        """
        for fname, plugin in self._plugin_store.items():
            generator = ((ext, method) for ext in plugin.accepts for method in plugin.provides)
            for pair in generator:
                if pair not in self._default_plugins:
                    self._default_plugins[pair] = fname

    def get_plugin(self, method: str, plugin: str = None, extension: str = None) -> Callable:
        """Load installed plugin.

        User either needs to specify which plugin to load or should provide
        file format to infer which plugin to use

        Parameters
        ----------
        method : str
            Which method to import from the plugin. Methods implemented by the
            extension author should be declared as arguments passed into the
            BasePlugin superclass constructor
        plugin : str, optional
            Which plugin to load the method from. Cannot leave as ``None`` if
            ``extension`` is also ``None``
        extension : str, optional
            format of the data on the disk. This information is used to infer
            which plugin to use in case ``plugin`` is not provided explicitly.
            Cannot leave as ``None`` if ``plugin`` is also ``None``

        Returns
        -------
        plugin_method : function
            requested method from the plugin
        """

        if not plugin:
            if not extension:
                raise ValueError("Both `plugin` and `extension` cannot be empty together")

            plugin = self._default_plugins.get((extension, method))
            if plugin is None:
                raise ValueError(f"No plugins found for the file extension {extension} that could "
                                 f"do {method}")
        else:
            if plugin not in self._plugin_store:
                raise ValueError(f"Plugin {plugin} not found")
        loaded_plugin = self._plugin_store[plugin]
        try:
            return getattr(loaded_plugin, method)
        except AttributeError:
            raise RuntimeError(f"Method {method} found in `plugin.provides` but could "
                               f"not invoke from {plugin}. You might have forgot to define "
                               f"the function")


================================================
FILE: src/hangar/external_cpython.pxd
================================================
""" Additional bindings to Python's C-API.
These differ from Cython's bindings in ``cpython``.
"""
from cpython.ref cimport PyObject

cdef extern from "Python.h":
    PyObject* PtrIter_Next "PyIter_Next"(object o)
    PyObject* PtrObject_Call "PyObject_Call"(object callable_object, object args, object kw)
    PyObject* PtrObject_GetItem "PyObject_GetItem"(object o, object key)
    int PyDict_Next_Compat "PyDict_Next"(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pvalue) except -1


================================================
FILE: src/hangar/merger.py
================================================
"""Merge Methods

In the current implementation only fast-forward and a competent, but limited,
three-way merge algorithm are implemented. All user facing API calls should be
funneled through the :function:`select_merge_algorithm` function

.. note::

    In the current implementation, it is not possible to stop a merge in progress or
    to revert a bad merge commit. All revert like operations should be made by
    creating new branches from the last "good" state, after which new merge
    operations can be attempted (if desired.)
"""
from pathlib import Path

import lmdb

from .diff import WriterUserDiff, diff_envs, find_conflicts
from .records.commiting import (
    tmp_cmt_env,
    replace_staging_area_with_commit,
    replace_staging_area_with_refs,
    commit_records,
)
from .records.hashs import clear_stage_hash_records, backends_remove_in_process_data
from .records.heads import (
    get_staging_branch_head,
    get_branch_head_commit,
    set_staging_branch_head,
    set_branch_head_commit,
    release_writer_lock,
    acquire_writer_lock,
)


def select_merge_algorithm(message: str,
                           branchenv: lmdb.Environment,
                           stageenv: lmdb.Environment,
                           refenv: lmdb.Environment,
                           stagehashenv: lmdb.Environment,
                           master_branch: str,
                           dev_branch: str,
                           repo_path: Path,
                           *,
                           writer_uuid: str = 'MERGE_PROCESS') -> str:
    """Entry point to perform a merge.

    Automatically selects algorithm and does the operation if no conflicts are
    found. This call requires that the staging area status be "CLEAN", if
    a "DIRTY" staging environment is found, an RuntimeError will be thrown.

    Parameters
    ----------
    message : str
        user message describing the commit
    branchenv : lmdb.Environment
        where the branch references are stored
    stageenv : lmdb.Environment
        where the staging area is open
    refenv : lmdb.Environment
        where commit history is stored
    stagehashenv: lmdb.Environment
        where the stage hash environment data is stored
    master_branch : str
        name of the branch to serve as a merge master
    dev_branch : str
        name of the branch to use as the feature branch
    repo_path: Path
        path to the repository on disk
    writer_uuid : str, optional, kwarg only
        if the merge method is called from the repo level, the default writer
        lock `MERGE_PROCESS` is used to ensure that a writer is active. If
        called from within a write-enabled checkout, the writer lock is set to
        the writer_uuid of the writer checkout so that the lock can be acquired.

    Raises
    ------
    RuntimeError
        if the staging area is not `CLEAN` of other changes
    PermissionError
        if the writer lock is currently held
    ValueError
        if a conflict is found in a three way merge, no operation will be performed.

    Returns
    -------
    str
        commit hash of the merge if this was a successful operation.
    """
    current_head = get_staging_branch_head(branchenv)
    wDiffer = WriterUserDiff(stageenv=stageenv,
                             branchenv=branchenv,
                             refenv=refenv,
                             branch_name=current_head)
    if wDiffer.status() != 'CLEAN':
        e = RuntimeError(
            'Changes are currently pending in the staging area To avoid mangled '
            'histories, the staging area must exist in a clean state Please '
            'reset or commit any changes before the merge operation')
        raise e from None

    try:
        acquire_writer_lock(branchenv=branchenv, writer_uuid=writer_uuid)
    except PermissionError as e:
        raise e from None

    try:
        mHEAD = get_branch_head_commit(branchenv, branch_name=master_branch)
        dHEAD = get_branch_head_commit(branchenv, branch_name=dev_branch)
        branchHistory = wDiffer._determine_ancestors(mHEAD=mHEAD, dHEAD=dHEAD)

        if branchHistory.canFF is True:
            print('Selected Fast-Forward Merge Strategy')
            success = _fast_forward_merge(
                branchenv=branchenv,
                stageenv=stageenv,
                refenv=refenv,
                stagehashenv=stagehashenv,
                master_branch=master_branch,
                new_masterHEAD=branchHistory.devHEAD,
                repo_path=repo_path)
        else:
            print('Selected 3-Way Merge Strategy')
            success = _three_way_merge(
                message=message,
                master_branch=master_branch,
                masterHEAD=branchHistory.masterHEAD,
                dev_branch=dev_branch,
                devHEAD=branchHistory.devHEAD,
                ancestorHEAD=branchHistory.ancestorHEAD,
                branchenv=branchenv,
                stageenv=stageenv,
                refenv=refenv,
                stagehashenv=stagehashenv,
                repo_path=repo_path)

    except ValueError as e:
        raise e from None

    finally:
        if writer_uuid == 'MERGE_PROCESS':
            release_writer_lock(branchenv=branchenv, writer_uuid=writer_uuid)

    return success


# ------------------ Fast Forward Merge Methods -------------------------------


def _fast_forward_merge(branchenv: lmdb.Environment,
                        stageenv: lmdb.Environment,
                        refenv: lmdb.Environment,
                        stagehashenv: lmdb.Environment,
                        master_branch: str,
                        new_masterHEAD: str,
                        repo_path: Path) -> str:
    """Update branch head pointer to perform a fast-forward merge.

    This method does not check that it is safe to do this operation, all
    verification should happen before this point is reached

    Parameters
    ----------
    branchenv : lmdb.Environment
        db with the branch head pointers
    stageenv : lmdb.Environment
        db where the staging area records are stored.
    refenv : lmdb.Environment
        db where the merge commit records are stored.
    stagehashenv: lmdb.Environment
        db where the staged hash records are stored
    master_branch : str
        name of the merge_master branch which should be updated
    new_masterHEAD : str
        commit hash to update the master_branch name to point to.
    repo_path: Path
        path to the repository on disk.

    Returns
    -------
    str
        if successful, returns the commit hash the master branch name was
        updated to.
    """
    try:
        replace_staging_area_with_commit(
            refenv=refenv, stageenv=stageenv, commit_hash=new_masterHEAD)

        outBranchName = set_branch_head_commit(
            branchenv=branchenv, branch_name=master_branch, commit_hash=new_masterHEAD)
        set_staging_branch_head(branchenv=branchenv, branch_name=master_branch)

        backends_remove_in_process_data(repo_path=repo_path)
        clear_stage_hash_records(stagehashenv=stagehashenv)

    except ValueError as e:
        raise e from None

    return outBranchName


# ----------------------- Three-Way Merge Methods -----------------------------


def _three_way_merge(message: str,
                     master_branch: str,
                     masterHEAD: str,
                     dev_branch: str,
                     devHEAD: str,
                     ancestorHEAD: str,
                     branchenv: lmdb.Environment,
                     stageenv: lmdb.Environment,
                     refenv: lmdb.Environment,
                     stagehashenv: lmdb.Environment,
                     repo_path: Path) -> str:
    """Merge strategy with diff/patch computed from changes since last common ancestor.

    Parameters
    ----------
    message : str
        commit message to apply to this merge commit (specified by the user)
    master_branch : str
        name of the merge master branch
    masterHEAD : str
        commit hash of the merge master HEAD
    dev_branch : str
        name of the merge dev branch
    devHEAD : str
        commit hash of the merge dev HEAD
    ancestorHEAD : str
        commit hash of the nearest common ancestor which the merge_master and
        merge_dev branches both share in their commit history.
    branchenv : lmdb.Environment
        db where the branch head records are stored
    stageenv : lmdb.Environment
        db where the staging area records are stored.
    refenv : lmdb.Environment
        db where the merge commit records are stored.
    stagehashenv: lmdb.Environment
        db where the staged hash records are stored
    repo_path: Path
        path to the repository on disk.

    Returns
    -------
    str
        commit hash of the new merge commit if the operation was successful.

    Raises
    ------
    ValueError
        If a conflict is found, the operation will abort before completing.
    """
    with tmp_cmt_env(refenv, ancestorHEAD) as aEnv, tmp_cmt_env(
            refenv, masterHEAD) as mEnv, tmp_cmt_env(refenv, devHEAD) as dEnv:

        m_diff = diff_envs(aEnv, mEnv)
        d_diff = diff_envs(aEnv, dEnv)
        conflict = find_conflicts(m_diff, d_diff)
        if conflict.conflict is True:
            msg = f'HANGAR VALUE ERROR:: Merge ABORTED with conflict: {conflict}'
            raise ValueError(msg) from None

        with mEnv.begin(write=True) as txn:
            for k, _ in d_diff.deleted:
                txn.delete(k)
            for k, v in d_diff.mutated:
                txn.put(k, v, overwrite=True)
            for k, v in d_diff.added:
                txn.put(k, v, overwrite=True)

        dbcont = []
        with mEnv.begin(write=False) as txn:
            with txn.cursor() as cur:
                cur.first()
                for kv in cur.iternext(keys=True, values=True):
                    dbcont.append(kv)

    backends_remove_in_process_data(repo_path=repo_path)
    replace_staging_area_with_refs(stageenv=stageenv, sorted_content=dbcont)

    commit_hash = commit_records(
        message=message,
        branchenv=branchenv,
        stageenv=stageenv,
        refenv=refenv,
        repo_path=repo_path,
        is_merge_commit=True,
        merge_master=master_branch,
        merge_dev=dev_branch)

    clear_stage_hash_records(stagehashenv=stagehashenv)
    return commit_hash


================================================
FILE: src/hangar/mixins/__init__.py
================================================
from .checkout_iteration import CheckoutDictIteration
from .datasetget import GetMixin
from .recorditer import CursorRangeIterator

__all__ = ['GetMixin', 'CursorRangeIterator', 'CheckoutDictIteration']


================================================
FILE: src/hangar/mixins/checkout_iteration.py
================================================


class CheckoutDictIteration:
    """Mixin class for checkout objects which mock common iter methods

    Methods
    -------
    __len__
    __contains__
    __iter__
    keys
    values
    items
    """

    def __len__(self):
        """Returns number of columns in the checkout.
        """
        self._verify_alive()
        return len(self.columns)

    def __contains__(self, key):
        """Determine if some column name (key) exists in the checkout.
        """
        self._verify_alive()
        return bool(key in self.columns)

    def __iter__(self):
        """Iterate over column keys"""
        self._verify_alive()
        return iter(self.columns)

    def keys(self):
        """Generator yielding the name (key) of every column
        """
        self._verify_alive()
        yield from self.columns.keys()

    def values(self):
        """Generator yielding accessor object of every column
        """
        self._verify_alive()
        yield from self.columns.values()

    def items(self):
        """Generator yielding tuple of (name, accessor object) of every column
        """
        self._verify_alive()
        yield from self.columns.items()


================================================
FILE: src/hangar/mixins/datasetget.py
================================================
from functools import reduce
from operator import getitem as op_getitem
from contextlib import ExitStack

# noinspection PyUnresolvedReferences
class GetMixin:
    """Mixin methods for the checkout object classes.

    Used since the read and write enabled checkouts have the same :meth:`__get__`
    and :meth:`get` methods
    """

    def __getitem__(self, index):
        """Dictionary style access to columns and samples

        Checkout object can be thought of as a "dataset" ("dset") mapping a
        view of samples across columns.

            >>> dset = repo.checkout(branch='master')
            >>>
            # Get an column contained in the checkout.
            >>> dset['foo']
            ColumnDataReader
            >>>
            # Get a specific sample from ``'foo'`` (returns a single array)
            >>> dset['foo', '1']
            np.array([1])
            >>>
            # Get multiple samples from ``'foo'`` (returns a list of arrays, in order
            # of input keys)
            >>> dset[['foo', '1'], ['foo', '2'],  ['foo', '324']]
            [np.array([1]), np.ndarray([2]), np.ndarray([324])]
            >>>
            # Get sample from multiple columns, column/data returned is ordered
            # in same manner as input of func.
            >>> dset[['foo', '1'], ['bar', '1'],  ['baz', '1']]
            [np.array([1]), np.ndarray([1, 1]), np.ndarray([1, 1, 1])]
            >>>
            # Get multiple samples from multiple columns\
            >>> keys = [(col, str(samp)) for samp in range(2) for col in ['foo', 'bar']]
            >>> keys
            [('foo', '0'), ('bar', '0'), ('foo', '1'), ('bar', '1')]
            >>> dset[keys]
            [np.array([1]), np.array([1, 1]), np.array([2]), np.array([2, 2])]

        Arbitrary column layouts are supported by simply adding additional members
        to the keys for each piece of data. For example, getting data from a column
        with a nested layout:

            >>> dset['nested_col', 'sample_1', 'subsample_0']
            np.array([1, 0])
            >>>
            # a sample accessor object can be retrieved at will...
            >>> dset['nested_col', 'sample_1']
            (column_name='nested_col', sample_name='sample_1')
            >>>
            # to get all subsamples in a nested sample use the Ellipsis operator
            >>> dset['nested_col', 'sample_1', ...]
            {'subsample_0': np.array([1, 0]),
             'subsample_1': np.array([1, 1]),
             ...
             'subsample_n': np.array([1, 255])}

        Retrieval of data from different column types can be mixed and combined
        as desired. For example, retrieving data from both flat and nested columns
        simultaneously:

            >>> dset[('nested_col', 'sample_1', '0'), ('foo', '0')]
            [np.array([1, 0]), np.array([0])]
            >>> dset[('nested_col', 'sample_1', ...), ('foo', '0')]
            [{'subsample_0': np.array([1, 0]), 'subsample_1': np.array([1, 1])},
             np.array([0])]
            >>> dset[('foo', '0'), ('nested_col', 'sample_1')]
            [np.array([0]),
             (column_name='nested_col', sample_name='sample_1')]

        If a column or data key does not exist, then this method will raise a KeyError.
        As an alternative, missing keys can be gracefully handeled by calling :meth:`get()`
        instead. This method does not (by default) raise an error if a key is missing.
        Instead, a (configurable) default value is simply inserted in it's place.

            >>> dset['foo', 'DOES_NOT_EXIST']
            -------------------------------------------------------------------
            KeyError                           Traceback (most recent call last)
             in 
            ----> 1 res = co['foo', 'DOES_NOT_EXIST']
            KeyError: 'DOES_NOT_EXIST'

        Parameters
        ----------
        index
            column name, sample key(s) or sequence of list/tuple of column name,
            sample keys(s) which should be retrieved in the operation.

            Please see detailed explanation above for full explanation of accepted
            argument format / result types.

        Returns
        -------
        :class:`~.columns.column.Columns`
            single column parameter, no samples specified
        Any
            Single column specified, single sample key specified
        List[Any]
            arbitrary columns, multiple samples array data for each sample is
            returned in same order sample keys are received.
        """
        # not using kwargs since this could be in a tight loop.
        # kwargs: default-None, except_missing=True
        return self._get_in(index, None, True)

    def get(self, keys, default=None, except_missing=False):
        """View of sample data across columns gracefully handling missing sample keys.

        Please see :meth:`__getitem__()` for full description. This method is
        identical with a single exception: if a sample key is not present in an
        column, this method will plane a null ``None`` value in it's return
        slot rather than throwing a ``KeyError`` like the dict style access
        does.

        Parameters
        ----------
        keys
            sequence of column name (and optionally) sample key(s) or sequence of
            list/tuple of column name, sample keys(s) which should be retrieved in
            the operation.

            Please see detailed explanation in :meth:`__getitem__()` for full
            explanation of accepted argument format / result types.

        default: Any, optional
            default value to insert in results for the case where some column
            name / sample key is not found, and the `except_missing` parameter
            is set to False.

        except_missing: bool, optional
            If False, will not throw exceptions on missing sample key value.
            Will raise KeyError if True and missing key found.

        Returns
        -------
        :class:`~.columns.column.Columns`
            single column parameter, no samples specified
        Any
            Single column specified, single sample key specified
        List[Any]
            arbitrary columns, multiple samples array data for each sample is
            returned in same order sample keys are received.
        """
        return self._get_in(keys, default, except_missing)

    def _get_in(self, keys, default=None, except_missing=False,
                *, _EXCEPTION_CLASSES = (KeyError, IndexError, TypeError)):
        """Internal method to get data from columns within a nested set of dicts.

        Parameters
        ----------
        keys
            sequence of column name (and optionally) sample key(s) or sequence of
            list/tuple of column name, sample keys(s) which should be retrieved in
            the operation.

            Please see detailed explanation in :meth:`__getitem__()` for full
            explanation of accepted argument format / result types.

        default: Any, optional
            default value to insert in results for the case where some column
            name / sample key is not found, and the `except_missing` parameter
            is set to False.

        except_missing: bool, optional
            If False, will not throw exceptions on missing sample key value.
            Will raise KeyError if True and missing key found.

        Returns
        -------
        Any
            Single column specified, single sample key specified
        List[Any]
            arbitrary columns, multiple samples array data for each sample is
            returned in same order sample keys are received.
        """
        with ExitStack() as stack:
            if not self._is_conman:
                stack.enter_context(self)

            if isinstance(keys, str):
                return self.columns[keys]

            _COLUMNS = self._columns
            if len(keys) >= 2 and any([isinstance(k, (list, tuple)) for k in keys]):
                res = []
                for key in keys:
                    try:
                        tmp = reduce(op_getitem, key, _COLUMNS)
                        res.append(tmp)
                    except _EXCEPTION_CLASSES:
                        if except_missing:
                            raise
                        res.append(default)
                return res
            else:
                try:
                    return reduce(op_getitem, keys, _COLUMNS)
                except _EXCEPTION_CLASSES:
                    if except_missing:
                        raise
                    return default


================================================
FILE: src/hangar/mixins/recorditer.py
================================================
from typing import Iterable, Union, Tuple
import lmdb


class CursorRangeIterator:

    @staticmethod
    def cursor_range_iterator(datatxn: lmdb.Transaction, startRangeKey: bytes, keys: bool, values: bool
                              ) -> Iterable[Union[Tuple[bytes], Tuple[bytes, bytes]]]:
        """Common method used to implement cursor range iterators

        Parameters
        ----------
        datatxn : lmdb.Transaction
            open database transaction to read values from
        startRangeKey : bytes
            range in which to iterate cursor over until end of db or out of
            lexicographic range.
        keys : bool, optional
            If True, yield metadata keys encountered, if False only values
            are returned. By default, True.
        values : bool, optional
            If True, yield metadata hash values encountered, if False only
            keys are returned. By default, True.

        Yields
        ------
        Iterable[Union[Tuple[bytes], Tuple[bytes, bytes]]]:
            db keys or key/value tuple
        """
        len_RangeKey = len(startRangeKey)
        with datatxn.cursor() as cursor:
            rangeItemsExist = cursor.set_range(startRangeKey)
            if not rangeItemsExist:
                # break out prematurely in the case where no matching items exist.
                # Important to not disrupt callers who may expect to recieves some
                # iterable for processing.
                return iter([])

            # divide loop into returned type sections as perf optimization
            # (rather then if/else checking on every iteration of loop)
            if keys and not values:
                while rangeItemsExist:
                    recKey = cursor.key()
                    if recKey[:len_RangeKey] == startRangeKey:
                        yield recKey
                        rangeItemsExist = cursor.next()
                        continue
                    else:
                        rangeItemsExist = False
            elif values and not keys:
                while rangeItemsExist:
                    recKey, recVal = cursor.item()
                    if recKey[:len_RangeKey] == startRangeKey:
                        yield recVal
                        rangeItemsExist = cursor.next()
                        continue
                    else:
                        rangeItemsExist = False
            elif keys and values:
                while rangeItemsExist:
                    recKey, recVal = cursor.item()
                    if recKey[:len_RangeKey] == startRangeKey:
                        yield (recKey, recVal)
                        rangeItemsExist = cursor.next()
                        continue
                    else:
                        rangeItemsExist = False
            else:  # pragma: no cover
                raise RuntimeError(f'Internal hangar error while iterating cursor records for '
                                   f' {startRangeKey}. one of [`keys`, `values`] must be True.')


================================================
FILE: src/hangar/op_state.py
================================================
import types
import sys

import wrapt


@wrapt.decorator
def writer_checkout_only(wrapped, instance, args, kwargs) -> types.MethodType:
    """Only allow a method to be called in a write-enable checkout.

    Parameters
    ----------
    wrapped
        bound method which is being called
    instance
        class object being operated on ie. ``instance is self``
        in both (equality and identify).
    args
        argument list passed to the method
    kwargs
        keyword args dict passed to the method.

    Returns
    -------
    types.MethodType
        If instance._mode == 'a' (write enabled checkout) then
        operation is allowed and pass through args and kwargs
        to the method as specified.

    Raises
    ------
    PermissionError
        If the checkout is opened in read-only mode, then deny
        ability to call and raise error explaining why to user.
    """
    try:
        if instance._mode == 'a':  # user facing classes hide attribute
            return wrapped(*args, **kwargs)
        else:
            err = (f'Method "{wrapped.__func__.__name__}" '
                   f'cannot be called in a read-only checkout.')
            raise PermissionError(err) from None

    except AttributeError:
        if instance.mode == 'a':  # internal classes don't hide attribute
            return wrapped(*args, **kwargs)
        else:
            err = (f'Method "{wrapped.__func__.__name__}" '
                   f'cannot be called in a read-only checkout.')
            raise PermissionError(err) from None


@wrapt.decorator
def reader_checkout_only(wrapped, instance, args, kwargs) -> types.MethodType:
    """Only allow a method to be called in a read-only checkout.

    Parameters
    ----------
    wrapped
        bound method which is being called
    instance
        class object being operated on ie. ``instance is self``
        in both (equality and identify).
    args
        argument list passed to the method
    kwargs
        keyword args dict passed to the method.

    Returns
    -------
    types.MethodType
        If instance._mode == 'r' (read-only checkout) then
        operation is allowed and pass through args and kwargs
        to the method as specified.

    Raises
    ------
    PermissionError
        If the checkout is opened in write-enabled mode, then deny
        ability to call and raise error explaining why to user.
    """
    try:
        if instance._mode == 'r':  # user facing classes hide attribute
            return wrapped(*args, **kwargs)
        else:
            err = (f'Method "{wrapped.__func__.__name__}" '
                   f'cannot be called in a write-enabled checkout.')
            raise PermissionError(err) from None

    except AttributeError:
        if instance.mode == 'r':  # internal classes don't hide attribute
            return wrapped(*args, **kwargs)
        else:
            err = (f'Method "{wrapped.__func__.__name__}" '
                   f'cannot be called in a write-enabled checkout.')
            raise PermissionError(err) from None


def tb_params_last_called(tb: types.TracebackType) -> dict:
    """Get parameters of the last function called before exception thrown.

    Parameters
    ----------
    tb : types.TracebackType
        traceback object returned as the third item from sys.exc_info()
        corresponding to an exception raised in the last stack frame.

    Returns
    -------
    dict
        parameters passed to the last function called before the exception was
        thrown.
    """
    while tb.tb_next:
        tb = tb.tb_next
    frame = tb.tb_frame
    code = frame.f_code
    argcount = code.co_argcount
    if code.co_flags & 4:  # *args
        argcount += 1
    if code.co_flags & 8:  # **kwargs
        argcount += 1
    names = code.co_varnames[:argcount]
    params = {}
    for name in names:
        params[name] = frame.f_locals.get(name, '')
    return params


def report_corruption_risk_on_parsing_error(func):
    """Decorator adding try/except handling non-explicit exceptions.

    Explicitly raised RuntimeErrors generally point to corrupted data
    identified by a cryptographic hash mismatch. However, in order to get to
    the point where such quantities can be processes, a non-trivial amount of
    parsing machinery must be run. Should any error be thrown in the parse
    machinery due to corrupted values, this method raises the exception in a
    useful form; providing traceback context, likely root cause (displayed to
    users), and the offending arguments passed to the function which threw the
    error.
    """
    def wrapped(*args, **kwargs):
        try:
            func(*args, **kwargs)
        except RuntimeError as e:
            raise e
        except Exception as e:
            raise RuntimeError(
                f'Corruption detected during {func.__name__}. Most likely this is the '
                f'result of unparsable record values. Exception msg `{str(e)}`. Params '
                f'`{tb_params_last_called(sys.exc_info()[2])}`') from e
    return wrapped


================================================
FILE: src/hangar/optimized_utils.pxd
================================================
"""
Portions of this code have been taken and modified from the "cytoolz" project.

URL:      https://github.com/pytoolz/cytoolz
File:     cytoolz/dicttoolz.pyd
Commit:   b66732f7f51937e85f5112481baf9db9c97b2ad2
Accessed: 05 APR 2020

CyToolz License
-------------------------------------------------------------------------------
License: New BSD
URL:     https://github.com/pytoolz/cytoolz/blob/b66732f7f51937e85f5112481baf9db9c97b2ad2/LICENSE.txt
"""
from cpython.ref cimport PyObject

cdef class SizedDict(dict):
    cdef public int _maxsize
    cdef public object _stack
    cdef public dict _data
    cdef public int _stack_size

cpdef object is_iterable(object x)

cpdef object is_ordered_sequence(object x)

cpdef int find_next_prime(int N)

ctypedef int (*f_map_next)(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pval) except -1

# utility functions to perform iteration over dicts or generic mapping
cdef class _iter_mapping:
    cdef object it
    cdef object cur

cdef f_map_next get_map_iter(object d, PyObject* *ptr) except NULL

cdef int PyMapping_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pval) except -1

cpdef object valfilter(object predicate, object d, object factory=*)

cpdef object valfilterfalse(object predicate, object d, object factory=*)


================================================
FILE: src/hangar/optimized_utils.pyx
================================================
"""
Portions of this code have been taken and modified from the "cytoolz" project.

URL:      https://github.com/pytoolz/cytoolz
File:     cytoolz/dicttoolz.pyx
Commit:   b66732f7f51937e85f5112481baf9db9c97b2ad2
Accessed: 05 APR 2020

CyToolz License
-------------------------------------------------------------------------------
License: New BSD
URL:     https://github.com/pytoolz/cytoolz/blob/b66732f7f51937e85f5112481baf9db9c97b2ad2/LICENSE.txt
"""
from cpython.dict cimport PyDict_CheckExact
from cpython.ref cimport PyObject, Py_DECREF, Py_INCREF, Py_XDECREF

# Locally defined bindings that differ from `cython.cpython` bindings
from .external_cpython cimport PyDict_Next_Compat, PtrIter_Next
from collections import deque


__all__ = ['valfilter', 'valfilterfalse', 'find_next_prime', 'is_iterable',
           'is_ordered_sequence', 'SizedDict']


cdef class SizedDict:
    """Sized dictionary"""

    def __init__(self, int maxsize=1000):
        self._data = dict()
        self._maxsize = maxsize
        self._stack = deque()
        self._stack_size = 0

    @property
    def maxsize(self):
        return self._maxsize

    def __repr__(self):
        return repr(self._data)

    def __contains__(self, key):
        """Return True if d has a key key, else False."""
        cdef bint res
        res = key in self._data
        return res

    def __getitem__(self, key):
        """Return the item of d with key key. Raises a KeyError if key
        is not in the map.
        """
        return self._data[key]

    def get(self, key, default=None):
        """Return the value for key if key is in the dictionary, else default.

        If default is not given, it defaults to None, so that this method
        never raises a KeyError.
        """
        return self._data.get(key, default)

    def __len__(self):
        """Return the number of items in the dictionary d.
        """
        return self._stack_size

    def __iter__(self):
        """Return an iterator over the keys of the dictionary.

        This is a shortcut for iter(d.keys()).
        """
        return iter(self.keys())

    def __setitem__(self, key, value):
        """Set d[key] to value
        """
        if self._stack_size >= self._maxsize:
            k_pop = self._stack.popleft()
            del self._data[k_pop]
            self._stack_size = self._stack_size - 1
        self._stack.append(key)
        self._data[key] = value
        self._stack_size = self._stack_size + 1

    def __delitem__(self, key):
        """Remove d[key] from d. Raises a KeyError if key is not in the map.
        """
        del self._data[key]
        self._stack.remove(key)
        self._stack_size = self._stack_size - 1

    def keys(self):
        """Return a new view of the dictionary’s keys."""
        return self._data.keys()

    def values(self):
        """Return a new view of the dictionary’s values."""
        return self._data.values()

    def items(self):
        """Return a new view of the dictionary’s items (``(key, value)`` pairs).
        """
        return self._data.items()

    def clear(self):
        """Remove all items from the dictionary.
        """
        self._stack.clear()
        self._data.clear()
        self._stack_size = 0

    def pop(self, key, default=None):
        """If key is in the dictionary, remove it and return its value,
        else return default.

        If default is not given and key is not in the dictionary, a KeyError is raised.
        """
        cdef bint has_default

        has_default = not bool(default is None)
        if key in self._data:
            val = self._data.pop(key)
            self._stack.remove(key)
            self._stack_size = self._stack_size - 1
        elif has_default:
            val = default
        return val

    def popitem(self):
        """Remove and return a (key, value) pair from the dictionary.
        Pairs are returned in LIFO order.

        popitem() is useful to destructively iterate over a dictionary,
        as often used in set algorithms. If the dictionary is empty, calling
        popitem() raises a KeyError.
        """
        cdef object lifo_key, lifo_val
        lifo_key = self._stack.pop()
        lifo_val = self._data.pop(lifo_key)
        self._stack_size = self._stack_size - 1
        return lifo_key, lifo_val

    def update(self, other):
        """Update the dictionary with the key/value pairs from other, overwriting
        existing keys. Return None.

        update() accepts either another dictionary object or an iterable of
        key/value pairs (as tuples or other iterables of length two). If keyword
        arguments are specified, the dictionary is then updated with those
        key/value pairs: d.update(red=1, blue=2).
        """
        if not isinstance(other, dict):
            other = dict(other)
        for k, v in other.items():
            self[k] = v

    def setdefault(self, key, default=None):
        """If key is in the dictionary, return its value. If not, insert key
        with a value of default and return default. default defaults to None.
        """
        try:
            return self._data[key]
        except KeyError:
            self[key] = default
            return default


cpdef object is_iterable(object x):
    """Is x iterable?

    >>> is_iterable([1, 2, 3])
    True
    >>> is_iterable('abc')
    True
    >>> is_iterable(5)
    False
    """
    try:
        iter(x)
        return True
    except TypeError:
        pass
    return False


cpdef object is_ordered_sequence(object x):
    """Is x an ordered sequence? (list, tuple)

    >>> is_ordered_sequence([1, 2, 3])
    True
    >>> is_ordered_sequence('abc')
    False
    >>> is_ordered_sequence({4, '3', 2})
    False
    """
    if isinstance(x, list) or isinstance(x, tuple):
        return True
    return False


cdef bint _is_prime(int n):
    cdef int i

    if n % 2 == 0:
        return False
    i = 3
    while i * i <= n:
        if n % i != 0:
            i += 2
        else:
            return False
    return True


cpdef int find_next_prime(int N):
    """Find next prime >= N

    Parameters
    ----------
    N : int
        Starting point to find the next prime >= N.

    Returns
    -------
    int
        the next prime found after the number N
    """

    if N < 3:
        return 2
    if N % 2 == 0:
        N += 1
    for n in range(N, 2 * N, 2):
        if _is_prime(n):
            return n


cdef class _iter_mapping:
    """ Keep a handle on the current item to prevent memory clean up too early"""
    def __cinit__(self, object it):
        self.it = it
        self.cur = None

    def __iter__(self):
        return self

    def __next__(self):
        self.cur = next(self.it)
        return self.cur


cdef int PyMapping_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pval) except -1:
    """Mimic "PyDict_Next" interface, but for any mapping"""
    cdef PyObject *obj
    obj = PtrIter_Next(p)
    if obj is NULL:
        return 0
    pkey[0] = (obj)[0]
    pval[0] = (obj)[1]
    Py_XDECREF(obj)  # removing this results in memory leak
    return 1


cdef f_map_next get_map_iter(object d, PyObject* *ptr) except NULL:
    """Return function pointer to perform iteration over object returned in ptr.
    The returned function signature matches "PyDict_Next".  If ``d`` is a dict,
    then the returned function *is* PyDict_Next, so iteration wil be very fast.
    The object returned through ``ptr`` needs to have its reference count
    reduced by one once the caller "owns" the object.
    This function lets us control exactly how iteration should be performed
    over a given mapping.  The current rules are:
    1) If ``d`` is exactly a dict, use PyDict_Next
    2) If ``d`` is subtype of dict, use PyMapping_Next.  This lets the user
       control the order iteration, such as for ordereddict.
    3) If using PyMapping_Next, iterate using ``iteritems`` if possible,
       otherwise iterate using ``items``.
    """
    cdef object val
    cdef f_map_next rv
    if PyDict_CheckExact(d):
        val = d
        rv = &PyDict_Next_Compat
    elif hasattr(d, 'iteritems'):
        val = _iter_mapping(iter(d.iteritems()))
        rv = &PyMapping_Next
    else:
        val = _iter_mapping(iter(d.items()))
        rv = &PyMapping_Next
    Py_INCREF(val)
    ptr[0] = val
    return rv


cpdef object valfilter(object predicate, object d, object factory=dict):
    """
    Filter items in dictionary by value
    >>> iseven = lambda x: x % 2 == 0
    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> valfilter(iseven, d)
    {1: 2, 3: 4}
    See Also:
        keyfilter
        itemfilter
        valmap
    """
    cdef:
        object rv
        f_map_next f
        PyObject *obj
        PyObject *pkey
        PyObject *pval
        Py_ssize_t pos = 0

    rv = factory()
    f = get_map_iter(d, &obj)
    d = obj
    Py_DECREF(d)
    while f(d, &pos, &pkey, &pval):
        if predicate(pval):
            rv[pkey] = pval
    return rv


cpdef object valfilterfalse(object predicate, object d, object factory=dict):
    """ Filter items in dictionary by values which are false.

    >>> iseven = lambda x: x % 2 == 0
    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> valfilterfalse(iseven, d)
    {2: 3, 4: 5}

    See Also:
        valfilter
    """
    cdef:
        object rv
        f_map_next f
        PyObject *obj
        PyObject *pkey
        PyObject *pval
        Py_ssize_t pos = 0

    rv = factory()
    f = get_map_iter(d, &obj)
    d = obj
    Py_DECREF(d)
    while f(d, &pos, &pkey, &pval):
        if not predicate(pval):
            rv[pkey] = pval
    return rv


================================================
FILE: src/hangar/records/__init__.py
================================================
from .hashmachine import hash_func_from_tcode
from .column_parsers import *
from .recordstructs import (
    CompatibleData,
    ColumnSchemaKey,
    FlatColumnDataKey,
    NestedColumnDataKey,
    DataRecordVal,
)


__all__ = column_parsers.__all__ + [
    'hash_func_from_tcode',
    'CompatibleData',
    'ColumnSchemaKey',
    'FlatColumnDataKey',
    'NestedColumnDataKey',
    'DataRecordVal',
]


================================================
FILE: src/hangar/records/column_parsers.pyx
================================================
from .recordstructs cimport ColumnSchemaKey, \
    FlatColumnDataKey, \
    NestedColumnDataKey, \
    DataRecordVal

from .recordstructs import ColumnSchemaKey, \
    FlatColumnDataKey, \
    NestedColumnDataKey, \
    DataRecordVal

import ast

__all__ = [
    'schema_record_count_start_range_key',
    'schema_db_key_from_column',
    'schema_db_range_key_from_column_unknown_layout',
    'schema_column_record_from_db_key',
    'schema_hash_record_db_val_from_spec',
    'schema_spec_from_db_val',
    'schema_hash_db_key_from_digest',
    'data_record_digest_val_from_db_val',
    'data_record_db_val_from_digest',
    'flat_data_column_record_start_range_key',
    'flat_data_db_key_from_names',
    'flat_data_record_from_db_key',
    'nested_data_column_record_start_range_key',
    'nested_data_db_key_from_names',
    'nested_data_record_from_db_key',
    'dynamic_layout_data_record_from_db_key',
    'dynamic_layout_data_record_db_start_range_key',
    'dynamic_layout_data_record_db_key_from_names',
    'hash_schema_db_key_from_raw_key',
    'hash_data_db_key_from_raw_key',
    'hash_schema_raw_key_from_db_key',
    'hash_data_raw_key_from_db_key',
    'schema_record_db_val_from_digest',
]


# ----------------------- Schema Record Parsers -------------------------------


cpdef bytes schema_record_count_start_range_key():
    return 's:'.encode()


cpdef bytes schema_db_key_from_column(str column, str layout):
    """column schema db formated key from name and layout.

    Parameters
    ----------
    column: str
        name of the column
    layout: str
        layout of the column schema ('flat', 'nested', etc.)
    """
    cdef str serial

    if layout == 'flat':
        serial = f's:{column}:f'
    elif layout == 'nested':
        serial = f's:{column}:n'
    else:
        raise ValueError(f'layout {layout} not valid')
    return serial.encode()


cpdef bytes schema_db_range_key_from_column_unknown_layout(str column):
    """Find a cursor range key which will select a column schema key.
    
    Due to how information is appended onto the end of the schema db key,
    there is no need to know the column_layout or schema_digest to uniquely
    identify a column's schema record. set the cursor range and query the full
    key value (passed into a seperate parser) to recieve the column_layout 
    and schema_digest / hash type code. The schema spec is accessed at the 
    record value, or in the hash db under the corresponding schema_digest key.
    
    Parameters
    ----------
    column: str
        name of the column to query.
    """
    cdef str serial

    serial = f's:{column}:'
    return serial.encode()


cpdef ColumnSchemaKey schema_column_record_from_db_key(bytes raw):
    cdef str serial, column, layout

    serial = raw.decode()
    _, column, layout = serial.split(':')
    if layout == 'f':
        layout = 'flat'
    elif layout == 'n':
        layout = 'nested'
    else:
        raise ValueError(f'layout unknown for serial key {serial}')
    return ColumnSchemaKey(column, layout)


cpdef bytes schema_hash_record_db_val_from_spec(dict schema):
    cdef str serial

    serial = repr(schema).replace(' ', '')
    return serial.encode()


cpdef dict schema_spec_from_db_val(bytes raw):
    cdef str serialized
    cdef dict schema

    serialized = raw.decode()
    schema = ast.literal_eval(serialized)
    return schema


cpdef bytes schema_hash_db_key_from_digest(str digest):
    return f's:{digest}'.encode()


cpdef bytes schema_record_db_val_from_digest(str digest):
    return digest.encode()


# -------------------- Data Digest Record Value Parser -------------------------


cpdef DataRecordVal data_record_digest_val_from_db_val(bytes raw):
    """Convert and split a lmdb record value into data record val struct
    """
    cdef str serial

    serial = raw.decode()
    return DataRecordVal(serial)


cpdef bytes data_record_db_val_from_digest(str digest):
    """convert a data digest value spec into the appropriate lmdb record value
    """
    return digest.encode()


# -------------------------- flat parser --------------------------------------


cpdef bytes flat_data_column_record_start_range_key(str column):
    cdef str serial

    serial = f'f:{column}:'
    return serial.encode()


cpdef bytes flat_data_db_key_from_names(str column, sample):
    cdef str serial

    if isinstance(sample, int):
        serial = f'f:{column}:#{sample}'
    else:
        serial = f'f:{column}:{sample}'
    return serial.encode()


cpdef FlatColumnDataKey flat_data_record_from_db_key(bytes raw):
    cdef str serial, column, sample

    serial = raw.decode()
    _, column, sample = serial.split(':')
    return FlatColumnDataKey(column, sample)


# -------------------------- nested parser ------------------------------------


cpdef bytes nested_data_column_record_start_range_key(str column):
    cdef str serial

    serial = f'n:{column}:'
    return serial.encode()


cpdef bytes nested_data_db_key_from_names(str column, sample, subsample):
    cdef str serial

    if isinstance(sample, int):
        sample = f'#{sample}'
    if isinstance(subsample, int):
        subsample = f'#{subsample}'
    serial = f'n:{column}:{sample}:{subsample}'
    return serial.encode()


cpdef NestedColumnDataKey nested_data_record_from_db_key(bytes raw):
    cdef str serial, column, sample, subsample

    serial = raw.decode()
    _, column, sample, subsample = serial.split(':')
    return NestedColumnDataKey(column, sample, subsample)


# ----------------------- dynamic parser selection ----------------------------


cpdef object dynamic_layout_data_record_from_db_key(bytes raw):
    if raw[0:2] == b'f:':
        res = flat_data_record_from_db_key(raw)
    elif raw[0:2] == b'n:':
        res = nested_data_record_from_db_key(raw)
    elif raw[0:2] == b's:':
        res = schema_column_record_from_db_key(raw)
    else:
        raise ValueError(raw)
    return res


cpdef bytes dynamic_layout_data_record_db_start_range_key(ColumnSchemaKey column_record):
    cdef bytes res

    if column_record.layout == 'flat':
        res = flat_data_column_record_start_range_key(column_record.column)
    elif column_record.layout == 'nested':
        res = nested_data_column_record_start_range_key(column_record.column)
    else:
        raise ValueError(column_record)
    return res


def dynamic_layout_data_record_db_key_from_names(layout, column, *sample):
    if layout == 'flat':
        db_key = flat_data_db_key_from_names(column, sample[0])
    elif layout == 'nested':
        db_key = nested_data_db_key_from_names(column, sample[0], sample[1])
    else:
        raise ValueError(layout)
    return db_key



#
# Data Hash parsing functions used to convert db key/val to raw pyhon obj
# -----------------------------------------------------------------------


cpdef bytes hash_record_count_start_range_key():
    return 'h:'.encode()


cpdef bytes hash_schema_db_key_from_raw_key(str schema_hash):
    return f's:{schema_hash}'.encode()


cpdef bytes hash_data_db_key_from_raw_key(str data_hash):
    return f'h:{data_hash}'.encode()


cpdef str hash_schema_raw_key_from_db_key(bytes db_key):
    return db_key[2:].decode()


cpdef str hash_data_raw_key_from_db_key(bytes db_key):
    return db_key[2:].decode()


================================================
FILE: src/hangar/records/commiting.py
================================================
import configparser
import os
import shutil
import tempfile
import time
from contextlib import contextmanager, closing
from pathlib import Path

import lmdb

from .heads import (
    get_branch_head_commit,
    get_staging_branch_head,
    set_branch_head_commit,
    set_staging_branch_head,
)
from .parsing import (
    cmt_final_digest,
    commit_parent_db_key_from_raw_key,
    commit_parent_db_val_from_raw_val,
    commit_parent_raw_key_from_db_key,
    commit_parent_raw_val_from_db_val,
    commit_ref_db_key_from_raw_key,
    commit_ref_db_val_from_raw_val,
    commit_ref_raw_val_from_db_val,
    commit_spec_db_key_from_raw_key,
    commit_spec_db_val_from_raw_val,
    commit_spec_raw_val_from_db_val,
    DigestAndBytes,
)
from ..constants import (
    CONFIG_USER_NAME,
    DIR_DATA_REMOTE,
    DIR_DATA_STAGE,
    DIR_DATA_STORE,
    LMDB_SETTINGS,
    SEP_KEY,
)
from ..txnctx import TxnRegister

"""
Reading commit specifications and parents.
------------------------------------------
"""


def expand_short_commit_digest(refenv: lmdb.Environment, commit_hash: str) -> str:
    """Find the a full commit hash from a short version provided by the user

    Parameters
    ----------
    refenv : lmdb.Environment
        db where the commit references are stored
    commit_hash : str
        short commit hash to search for in the repository

    Returns
    -------
    str
        full commit hash if short maps to a unique digest in the repo history

    Raises
    ------
    KeyError
        If the short commit hash can reference two full commit digests
    KeyError
        if no expanded commit digest is found starting with the short version.
    """
    reftxn = TxnRegister().begin_reader_txn(refenv)
    commitParentStart = commit_parent_db_key_from_raw_key(commit_hash)
    with reftxn.cursor() as cursor:
        shortHashExists = cursor.set_range(commitParentStart)
        if shortHashExists is True:
            commitKey = cursor.key()
            commit_key = commit_parent_raw_key_from_db_key(commitKey)
            cursor.next()
            cursor.next()
            nextHashExist = cursor.next()
            if nextHashExist is False:
                return commit_key
            nextCommitKey = cursor.key()
            next_commit_key = commit_parent_raw_key_from_db_key(nextCommitKey)
            if next_commit_key.startswith(commit_hash) is True:
                raise KeyError(f'Non unique short commit hash: {commit_hash}')
            else:
                return commit_key
        else:
            raise KeyError(f'No matching commit hash found starting with: {commit_hash}')


def check_commit_hash_in_history(refenv, commit_hash):
    """Check if a commit hash exists in the repository history

    Parameters
    ----------
    refenv : lmdb.Environment
        refenv where the commit history is stored
    commit_hash : str
        hash of the commit to check for existence

    Returns
    -------
    bool
        True if exists, otherwise False
    """
    reftxn = TxnRegister().begin_reader_txn(refenv)
    try:
        commitParentKey = commit_parent_db_key_from_raw_key(commit_hash)
        commitParentVal = reftxn.get(commitParentKey, default=False)
        isCommitInHistory = True if commitParentVal is not False else False
    finally:
        TxnRegister().abort_reader_txn(refenv)
    return isCommitInHistory


def get_commit_spec(refenv, commit_hash):
    """Get the commit specifications of a particular hash.

    Parameters
    ----------
    refenv : lmdb.Environment
        refenv where the specs are stored
    commit_hash : str
        commit hash to query

    Returns
    -------
    namedtuple
        named tuple with all the commit specs included

    Raises
    ------
    ValueError
        if no commit exists with the provided hash
    """
    reftxn = TxnRegister().begin_reader_txn(refenv)
    try:
        parentCommitSpecKey = commit_spec_db_key_from_raw_key(commit_hash)
        parentCommitSpecVal = reftxn.get(parentCommitSpecKey, default=False)
    finally:
        TxnRegister().abort_reader_txn(refenv)

    if parentCommitSpecVal is False:
        raise ValueError(f'No commit exists with the hash: {commit_hash}')

    parentCommitSpec = commit_spec_raw_val_from_db_val(parentCommitSpecVal)
    return parentCommitSpec.user_spec


def get_commit_ancestors(refenv, commit_hash):
    """find the ancestors of a particular commit hash.

    Parameters
    ----------
    refenv : lmdb.Environment
        lmdb environment where the commit refs are stored
    commit_hash : string
        commit hash to find the ancestors for

    Returns
    -------
    namedtuple
        Namedtuple describing is_merge_commit, master_ancestor, &
        child_ancestor (in the even of merge commit)

    Raises
    ------
    ValueError
        if no commit exists with the provided hash
    """

    reftxn = TxnRegister().begin_reader_txn(refenv)
    try:
        parentCommitKey = commit_parent_db_key_from_raw_key(commit_hash)
        parentCommitVal = reftxn.get(parentCommitKey, default=False)
    finally:
        TxnRegister().abort_reader_txn(refenv)

    if parentCommitVal is False:
        raise ValueError(f'No commit exists with the hash: {commit_hash}')

    parentCommitAncestors = commit_parent_raw_val_from_db_val(parentCommitVal)
    return parentCommitAncestors.ancestor_spec


def get_commit_ancestors_graph(refenv, starting_commit):
    """returns a DAG of all commits starting at some hash pointing to the repo root.

    Parameters
    ----------
    refenv : lmdb.Environment
        lmdb environment where the commit refs are stored
    starting_commit : string
        commit hash to start creating the DAG from

    Returns
    -------
    dict
        a dictionary where each key is a commit hash encountered along the way,
        and it's value is a list containing either one or two elements which
        identify the child commits of that parent hash.
    """
    parent_commit = starting_commit
    commit_graph = {}
    seen = set(starting_commit)
    more_work = []
    end_commit = False

    if parent_commit == '':
        end_commit = True

    while end_commit is not True:
        childCommit = get_commit_ancestors(refenv, parent_commit)

        if (childCommit.master_ancestor == '') or (childCommit.master_ancestor in seen):
            end_commit = True
            commit_graph[parent_commit] = [childCommit.master_ancestor]
            if len(more_work) != 0:
                master_commit = more_work.pop(0)
                end_commit = False
            else:
                continue

        elif childCommit.is_merge_commit is True:
            master_commit = childCommit.master_ancestor
            dev_commit = childCommit.dev_ancestor
            more_work.append(dev_commit)
            commit_graph[parent_commit] = [master_commit, dev_commit]
            seen.add(master_commit)
            seen.add(dev_commit)

        else:
            master_commit = childCommit.master_ancestor
            commit_graph[parent_commit] = [master_commit]
            seen.add(master_commit)

        parent_commit = master_commit

    return commit_graph


"""
Methods for reading packed commit data and reconstructing an unpacked format.
-----------------------------------------------------------------------------
"""


def get_commit_ref(refenv, commit_hash):
    """Read the commit data record references from a specific commit.

    This only returns a list of tuples with binary encoded key/value pairs.

    Parameters
    ----------
    refenv : lmdb.Environment`
        lmdb environment where the references are stored
    commit_hash : string
        hash of the commit to retrieve.

    Returns
    -------
    tuple
        tuple of tuples containing encoded key/value pairs of the data
        records

    Raises
    ------
    ValueError
        if no commit exists with the provided hash
    """
    reftxn = TxnRegister().begin_reader_txn(refenv)
    try:
        cmtRefKey = commit_ref_db_key_from_raw_key(commit_hash)
        cmtSpecKey = commit_spec_db_key_from_raw_key(commit_hash)
        cmtParentKey = commit_parent_db_key_from_raw_key(commit_hash)

        cmtRefVal = reftxn.get(cmtRefKey, default=False)
        cmtSpecVal = reftxn.get(cmtSpecKey, default=False)
        cmtParentVal = reftxn.get(cmtParentKey, default=False)
    except lmdb.BadValsizeError:
        raise ValueError(f'No commit exists with the hash: {commit_hash}')
    finally:
        TxnRegister().abort_reader_txn(refenv)

    if (cmtRefVal is False) or (cmtSpecVal is False) or (cmtParentVal is False):
        raise ValueError(f'No commit exists with the hash: {commit_hash}')

    commitRefs = commit_ref_raw_val_from_db_val(cmtRefVal)
    commitSpecs = commit_spec_raw_val_from_db_val(cmtSpecVal)
    commitParent = commit_parent_raw_val_from_db_val(cmtParentVal)

    calculatedDigest = cmt_final_digest(
        parent_digest=commitParent.digest,
        spec_digest=commitSpecs.digest,
        refs_digest=commitRefs.digest)

    if calculatedDigest != commit_hash:
        raise IOError(
            f'Data Corruption Detected. On retrieval of stored references for '
            f'commit_hash: {commit_hash} validation of commit record/contents '
            f'integrity failed. Calculated digest: {calculatedDigest} != '
            f'expected: {commit_hash}. Please alert the Hangar development team to '
            f'this error if possible.')

    return commitRefs.db_kvs


def unpack_commit_ref(refenv, cmtrefenv, commit_hash):
    """unpack a commit record ref into a new key/val db for reader checkouts.

    This method also validates that the record data (parent, spec, and refs)
    have not been corrupted on disk (ie)

    Parameters
    ----------
    refenv : lmdb.Environment
        environment handle open for reading in the refenv
    cmtrefenv : lmdb.Environment
        environment handle open for writing on disk. this db must be empty.
    commit_hash : str
        hash of the commit to read in from refs and unpack in a checkout.
    """

    commitRefs = get_commit_ref(refenv=refenv, commit_hash=commit_hash)
    cmttxn = TxnRegister().begin_writer_txn(cmtrefenv)
    try:
        with cmttxn.cursor() as cursor:
            cursor.first()
            cursor.putmulti(commitRefs, append=True)
        try:
            cursor.close()
        except Exception as e:
            msg = f'could not close cursor cmttxn {cmttxn} commit_hash {commit_hash}'
            e.args = (*e.args, msg)
            raise e
    finally:
        TxnRegister().commit_writer_txn(cmtrefenv)

    return


@contextmanager
def tmp_cmt_env(refenv: lmdb.Environment, commit_hash: str):
    """create temporary unpacked lmdb environment from compressed structure

    Parameters
    ----------
    refenv : lmdb.Environment
        lmdb environment where the commit refs are stored
    commit_hash : str
        hash of the commit to get the contents of

    Returns
    -------
    lmdb.Environment
        environment with all db contents from ``commit`` unpacked
    """
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpDF = os.path.join(tmpdir, 'test.lmdb')
        with closing(
                lmdb.open(tmpDF, sync=False, writemap=True, **LMDB_SETTINGS)
        ) as tmpDB:
            unpack_commit_ref(refenv, tmpDB, commit_hash)
            yield tmpDB


"""
Methods to write new commits
----------------------------

The functions below act to:
    - Reading and formatting all record data from the staging area.
    - Determining the ancestor(s) of the new commit
    - Specify commit details (message, time, committer-info, etc.)
    - Coordinate record hashing
    - Write the commit record
    - Update the branch head to point to the new commit hash
"""

# ---------------- Functions to format the written values of a commit --------------------


def _commit_ancestors(branchenv: lmdb.Environment,
                      *,
                      is_merge_commit: bool = False,
                      master_branch: str = '',
                      dev_branch: str = '') -> DigestAndBytes:
    """Format the commit parent db value, finding HEAD commits automatically.

    This method handles formatting for both regular & merge commits through the
    the keyword only arguments.

    Parameters
    ----------
    branchenv : lmdb.Environment
        Lmdb environment where branch data is located. If not merge commit, head
        branch and commit will be found.
    is_merge_commit : bool, optional
        If this is a merge commit or now, defaults to False
    master_branch : str, optional
        If merge commit, the master branch name must be specified, and the
        branch HEAD commit hash will be determined automatically, defaults to ''
    dev_branch : str, optional
        If merge commit, the dev branch name must be specified, and the branch
        HEAD commit hash will be determined automatically, defaults to ''

    Returns
    -------
    DigestAndBytes
        Commit parent db value and digest of commit parent val formatted
        appropriately based on the repo state and any specified arguments.
    """
    if not is_merge_commit:
        masterBranch = get_staging_branch_head(branchenv)
        master_ancestor = get_branch_head_commit(branchenv, masterBranch)
        dev_ancestor = ''
    else:
        master_ancestor = get_branch_head_commit(branchenv, master_branch)
        dev_ancestor = get_branch_head_commit(branchenv, dev_branch)

    commitParentVal = commit_parent_db_val_from_raw_val(
        master_ancestor=master_ancestor,
        dev_ancestor=dev_ancestor,
        is_merge_commit=is_merge_commit)

    return commitParentVal


def _commit_spec(message: str, user: str, email: str) -> DigestAndBytes:
    """Format the commit specification according to the supplied username and email.

    This method currently only acts as a pass through to the parsing options
    (with time filled in).

    Parameters
    ----------
    message : string
        Commit message sent in by the user.
    user : str, optional
        Name of the committer
    email : str, optional
        Email of the committer

    Returns
    -------
    DigestAndBytes
        Formatted value for the specification field of the commit and digest of
        spec.
    """
    spec_db = commit_spec_db_val_from_raw_val(commit_time=time.time(),
                                              commit_message=message,
                                              commit_user=user,
                                              commit_email=email)
    return spec_db


def _commit_ref(stageenv: lmdb.Environment) -> DigestAndBytes:
    """Query and format all staged data records, and format it for ref storage.

    Parameters
    ----------
    stageenv : lmdb.Environment
        lmdb environment where the staged record data is actually stored.

    Returns
    -------
    DigestAndBytes
        Serialized and compressed version of all staged record data along with
        digest of commit refs.
    """
    from .queries import RecordQuery  # needed to avoid cyclic import

    querys = RecordQuery(dataenv=stageenv)
    allRecords = tuple(querys._traverse_all_records())
    res = commit_ref_db_val_from_raw_val(allRecords)
    return res


# -------------------- Format ref k/v pairs and write the commit to disk ----------------


def commit_records(message, branchenv, stageenv, refenv, repo_path: Path,
                   *, is_merge_commit=False, merge_master=None, merge_dev=None):
    """Commit all staged records to the repository, updating branch HEAD as needed.

    This method is intended to work for both merge commits as well as regular
    ancestor commits.

    Parameters
    ----------
    message : string
        Message the user associates with what has been added, removed, or
        changed in this commit. Must not be empty.
    branchenv : lmdb.Environment
        lmdb environment where branch records are stored.
    stageenv : lmdb.Environment
        lmdb environment where the staged data records are stored in
        uncompressed format.
    refenv : lmdb.Environment
        lmdb environment where the commit ref records are stored.
    repo_path : Path
        path to the hangar repository on disk
    is_merge_commit : bool, optional
        Is the commit a merge commit or not? defaults to False
    merge_master : string, optional
        If merge commit, specify the name of the master branch, defaults to None
    merge_dev : string, optional
        If merge commit, specify the name of the dev branch, defaults to None

    Returns
    -------
    string
        Commit hash of the newly added commit
    """
    cmtParent = _commit_ancestors(branchenv=branchenv,
                                  is_merge_commit=is_merge_commit,
                                  master_branch=merge_master,
                                  dev_branch=merge_dev)

    user_info_pth = Path(repo_path, CONFIG_USER_NAME)
    CFG = configparser.ConfigParser()
    CFG.read(user_info_pth)

    USER_NAME = CFG['USER']['name']
    USER_EMAIL = CFG['USER']['email']
    if (USER_NAME is None) or (USER_EMAIL is None):
        raise RuntimeError(f'Username and Email are required. Please configure.')

    cmtSpec = _commit_spec(message=message, user=USER_NAME, email=USER_EMAIL)
    cmtRefs = _commit_ref(stageenv=stageenv)

    commit_hash = cmt_final_digest(parent_digest=cmtParent.digest,
                                   spec_digest=cmtSpec.digest,
                                   refs_digest=cmtRefs.digest)

    commitSpecKey = commit_spec_db_key_from_raw_key(commit_hash)
    commitParentKey = commit_parent_db_key_from_raw_key(commit_hash)
    commitRefKey = commit_ref_db_key_from_raw_key(commit_hash)

    reftxn = TxnRegister().begin_writer_txn(refenv)
    try:
        reftxn.put(commitSpecKey, cmtSpec.raw, overwrite=False)
        reftxn.put(commitParentKey, cmtParent.raw, overwrite=False)
        reftxn.put(commitRefKey, cmtRefs.raw, overwrite=False)
    finally:
        TxnRegister().commit_writer_txn(refenv)

    # possible separate function
    move_process_data_to_store(repo_path)
    if is_merge_commit is False:
        headBranchName = get_staging_branch_head(branchenv)
        set_branch_head_commit(branchenv, headBranchName, commit_hash)
    else:
        set_staging_branch_head(branchenv=branchenv, branch_name=merge_master)
        set_branch_head_commit(branchenv, merge_master, commit_hash)

    return commit_hash


# --------------------- staging setup, may need to move this elsewhere ------------------


def replace_staging_area_with_commit(refenv, stageenv, commit_hash):
    """DANGER ZONE: Delete the stage db and replace it with a copy of a commit environment.

    .. warning::

        In the current implementation, this method will not validate that it is safe
        to do this operation. All validation logic must be handled upstream.

    Parameters
    ----------
    refenv : [type]
        lmdb environment opened to the long term storage commit env
    stageenv : lmdb.Environment
        lmdb environment opened to the staging area.
    commit_hash : str
        commit hash to read from the refenv and replace the stage contents with.
    """
    stagetxn = TxnRegister().begin_writer_txn(stageenv)
    with stagetxn.cursor() as cursor:
        positionExists = cursor.first()
        while positionExists:
            positionExists = cursor.delete()
    cursor.close()
    TxnRegister().commit_writer_txn(stageenv)

    unpack_commit_ref(refenv=refenv, cmtrefenv=stageenv, commit_hash=commit_hash)
    return


def replace_staging_area_with_refs(stageenv, sorted_content):
    """DANGER ZONE: Delete all stage db records and replace it with specified data.

    .. warning::

        In the current implementation, this method will not validate that it is safe
        to do this operation. All validation logic must be handled upstream.

    Parameters
    ----------
    stageenv : lmdb.Environment
        staging area db to replace all data in.
    sorted_content : iterable of tuple
        iterable containing two-tuple of byte encoded record data to place in
        the stageenv db. index 0 -> db key; index 1 -> db val, it is assumed
        that the order of the tuples is lexicographically sorted by index 0
        values, if not, this will result in unknown behavior.
    """
    stagetxn = TxnRegister().begin_writer_txn(stageenv)
    with stagetxn.cursor() as cursor:
        positionExists = cursor.first()
        while positionExists:
            positionExists = cursor.delete()
    cursor.close()
    TxnRegister().commit_writer_txn(stageenv)

    cmttxn = TxnRegister().begin_writer_txn(stageenv)
    try:
        with cmttxn.cursor() as cursor:
            cursor.first()
            cursor.putmulti(sorted_content, append=True)
        cursor.close()
    finally:
        TxnRegister().commit_writer_txn(stageenv)


def move_process_data_to_store(repo_path: Path, *, remote_operation: bool = False):
    """Move symlinks to hdf5 files from process directory to store directory

    In process writes never directly access files in the data directory.
    Instead, when the file is created is is symlinked to either the remote data
    or stage data directory. All access is handled through this intermediate
    symlink in order to prevent any ability to overwrite (even if there are
    major errors in the hash records). Once the write operation is packed in
    the staging or remote area, this method is called to move the symlinks from
    the write enabled directory to the (read only, fully-committed) storage
    dir.

    Parameters
    ----------
    repo_path : Path
        path to the repository on dir
    remote_operation : bool, optional
        If this operation is occurring from a remote fetch operation. (the
        default is False, which means that all changes will occur in the
        staging area)

    """
    store_dir = Path(repo_path, DIR_DATA_STORE)

    type_dir = DIR_DATA_REMOTE if remote_operation else DIR_DATA_STAGE
    process_dir = Path(repo_path, type_dir)

    store_fps = []
    for be_pth in process_dir.iterdir():
        if be_pth.is_dir():
            for fpth in be_pth.iterdir():
                if fpth.is_file() and not fpth.stem.startswith('.'):
                    store_fps.append(store_dir.joinpath(be_pth.name, fpth.name))

    for fpth in store_fps:
        if not fpth.parent.is_dir():
            fpth.parent.mkdir()
        fpth.touch()

    # reset before releasing control.
    shutil.rmtree(process_dir)
    process_dir.mkdir(exist_ok=False)


def list_all_commits(refenv):
    """returns a list of all commits stored in the repository

    Parameters
    ----------
    refenv : lmdb.Environment
        db where all commit data is stored

    Returns
    -------
    list
        list of all commit digests.
    """
    refTxn = TxnRegister().begin_reader_txn(refenv)
    try:
        commits = set()
        with refTxn.cursor() as cursor:
            cursor.first()
            for k in cursor.iternext(keys=True, values=False):
                commitKey, *_ = k.decode().split(SEP_KEY)
                commits.add(commitKey)
            cursor.close()
    finally:
        TxnRegister().abort_reader_txn(refenv)

    return list(commits)


def number_commits_recorded(refenv) -> int:
    """Returns the total number of commits made across all history.
    """
    return len(list_all_commits(refenv))



================================================
FILE: src/hangar/records/hashmachine.pyx
================================================
import array
from cpython cimport array

import numpy as np
from hashlib import blake2b


cpdef str hash_type_code_from_digest(str digest):
    return digest[0]


cpdef object hash_func_from_tcode(str tcode):
    if tcode == '0':
        return ndarray_hasher_tcode_0
    elif tcode == '1':
        return schema_hasher_tcode_1
    elif tcode == '2':
        return pystr_hasher_tcode_2
    elif tcode == '3':
        return pybytes_hasher_tcode_3
    else:
        raise ValueError(f'unknown hash function type code. tcode: {tcode}')


# ---------------------------- numpy ndarray data ------------------------------


cdef bytes ser_int_list(list lst):
    cdef Py_ssize_t n=len(lst)
    cdef array.array res=array.array('i')

    array.resize(res, n)  #preallocate memory
    for i in range(n):
        # lst.__get__() needs Python-Integer, so let i
        # be a python-integer (not cdef)
        res.data.as_ints[i] = lst[i]
    return res.data.as_chars[:n*sizeof(int)]


def ndarray_hasher_tcode_0(array not None):
    """Generate the hex digest of some array data.

    This method hashes the concatenation of both array data bytes as well as a
    binary struct with the array shape and dtype num packed in. This is in
    order to avoid hash collisions where an array can have the same bytes, but
    different shape. an example of a collision is: np.zeros((10, 10, 10)) and
    np.zeros((1000,))

    Parameters
    ----------
    array : np.ndarray
        array data to take the hash of

    Returns
    -------
    str
        hex digest of the array data with typecode prepended by '{tcode}='.
    """
    cdef str digest
    cdef bytes other_info
    cdef list shape = []

    shape = list(array.shape)
    shape.append(array.dtype.num)
    other_info = ser_int_list(shape)

    hasher = blake2b(array, digest_size=20)
    hasher.update(other_info)
    digest = hasher.hexdigest()
    return f'0={digest}'


# ------------------------------ Schema ---------------------------------------


def _make_hashable(o):
    """Sort container object and deterministically output frozen representation
    """
    if isinstance(o, (tuple, list)):
        return tuple((_make_hashable(e) for e in o))

    if isinstance(o, dict):
        return tuple(sorted((k, _make_hashable(v)) for k, v in o.items()))

    if isinstance(o, (set, frozenset)):
        return tuple(sorted(_make_hashable(e) for e in o))
    return o


cpdef str schema_hasher_tcode_1(dict schema):
    """Generate the schema hash for some schema specification

    Parameters
    ----------
    schema : dict
        dict representation of the schema spec.

    Returns
    -------
    str
        hex digest of this information with typecode prepended by '{tcode}='.
    """
    cdef bytes serialized
    cdef str digest, res

    frozenschema = _make_hashable(schema)
    serialized = repr(frozenschema).encode()
    digest = blake2b(serialized, digest_size=6).hexdigest()
    res = f'1={digest}'
    return res


# --------------------------- string type data ----------------------------------------


cpdef str pystr_hasher_tcode_2(str value):
    """Generate the hash digest of some str value

    Parameters
    ----------
    value : str
        data value to hash

    Returns
    -------
    str
        hex digest of the data value with typecode prepended by '{tcode}='.
    """
    cdef bytes raw
    cdef str digest, res

    raw = value.encode()
    digest = blake2b(raw, digest_size=20).hexdigest()
    res = f'2={digest}'
    return res



# --------------------------- bytes type data ----------------------------------------


cpdef str pybytes_hasher_tcode_3(bytes value):
    """Generate the hash digest of some bytes value

    Parameters
    ----------
    value : bytes
        data value to hash

    Returns
    -------
    str
        hex digest of the data value with typecode prepended by '{tcode}='.
    """
    cdef str digest, res

    digest = blake2b(value, digest_size=20).hexdigest()
    res = f'3={digest}'
    return res


================================================
FILE: src/hangar/records/hashs.py
================================================
from pathlib import Path
from typing import Iterable, List, Tuple, Union, Set

import lmdb

from .column_parsers import (
    hash_record_count_start_range_key,
    hash_schema_raw_key_from_db_key,
    hash_data_raw_key_from_db_key,
    schema_hash_db_key_from_digest,
    schema_spec_from_db_val,
    schema_record_count_start_range_key
)
from ..backends import BACKEND_ACCESSOR_MAP, backend_decoder
from ..txnctx import TxnRegister
from ..mixins import CursorRangeIterator
from ..utils import ilen


class HashQuery(CursorRangeIterator):
    """Traverse and query contents contained in ``hashenv`` db

    These methods operate on the database which store the mapping of some data
    digest to it's location on disk (or value in the case of metadata and
    schemas). These databases are not specific to a particular commit; the
    records are for every piece of data stored in every commit across history.

    There are relatively few procedures which require traversal and mapping
    across data records in this manner. The two most notable use cases are:

        1. Remote client-server negotiation operations
        2. Verifying the integrity of a repositories historical provenance, commit
        contents, and data stored on disk.
    """

    def __init__(self, hashenv: lmdb.Environment):
        self._hashenv = hashenv

    # ------------------ traversing the unpacked records ----------------------

    def _traverse_all_hash_records(self, keys: bool = True, values: bool = True
                                   ) -> Iterable[Union[bytes, Tuple[bytes, bytes]]]:
        """PUll out all binary encoded data hash records.

        Parameters
        ----------
        keys : bool, optional
            if True, returns keys, by default True
        values : bool, optional
            if True, return values, by default True

        Yields
        -------
        Union[bytes, Tuple[bytes, bytes]]
            Iterable of schema record keys, values, or items tuple
        """
        startHashRangeKey = hash_record_count_start_range_key()
        try:
            hashtxn = TxnRegister().begin_reader_txn(self._hashenv)
            yield from self.cursor_range_iterator(hashtxn, startHashRangeKey, keys, values)
        finally:
            TxnRegister().abort_reader_txn(self._hashenv)

    def _traverse_all_schema_records(self, keys: bool = True, values: bool = True
                                     ) -> Iterable[Union[bytes, Tuple[bytes, bytes]]]:
        """Pull out all binary encoded schema hash records.

        Parameters
        ----------
        keys : bool, optional
            if True, returns keys, by default True
        values : bool, optional
            if True, return values, by default True

        Yields
        -------
        Union[bytes, Tuple[bytes, bytes]]
            Iterable of schema record keys, values, or items tuple
        """
        startSchemaRangeKey = schema_record_count_start_range_key()
        try:
            hashtxn = TxnRegister().begin_reader_txn(self._hashenv)
            yield from self.cursor_range_iterator(hashtxn, startSchemaRangeKey, keys, values)
        finally:
            TxnRegister().abort_reader_txn(self._hashenv)

    def list_all_hash_keys_raw(self) -> List[str]:
        recs = self._traverse_all_hash_records(keys=True, values=False)
        return list(map(hash_data_raw_key_from_db_key, recs))

    def gen_all_hash_keys_db(self) -> Iterable[bytes]:
        return self._traverse_all_hash_records(keys=True, values=False)

    def intersect_keys_db(self, other: Set[bytes]):
        """Set intersection of provided keys and those contained in the database.

        Parameters
        ----------
        other: Set[bytes]
            Set of db formated keys to intersect with keys of the lmdb environment.

        Returns
        -------
        Set[bytes]
            intersection of input with the keys existing in the lmdb environment.
        """
        res = []
        hashtxn = TxnRegister().begin_reader_txn(self._hashenv)
        try:
            with hashtxn.cursor() as cur:
                # sort input sequence to reduce time spent moving cursor.
                for key in sorted(other):
                    if cur.set_key(key):
                        res.append(key)
        finally:
            TxnRegister().abort_reader_txn(self._hashenv)
        return set(res)

    def list_all_schema_digests(self) -> List[str]:
        recs = self._traverse_all_schema_records(keys=True, values=False)
        return list(map(hash_schema_raw_key_from_db_key, recs))

    def gen_all_schema_keys_db(self) -> Iterable[bytes]:
        return self._traverse_all_schema_records(keys=True, values=False)

    def num_data_records(self) -> int:
        """Total count of all data digests / backends specs stored over all repo history.
        """
        num_total = self._hashenv.stat()['entries']
        remaining = num_total - self.num_schema_records()
        return remaining

    def num_schema_records(self) -> int:
        """Total count of schema digests / spec defs stored over all repo history.
        """
        return ilen(self._traverse_all_schema_records(keys=True, values=False))

    def gen_all_data_digests_and_parsed_backend_specs(self):
        for dbk, dbv in self._traverse_all_hash_records(keys=True, values=True):
            rawk = hash_data_raw_key_from_db_key(dbk)
            rawv = backend_decoder(dbv)
            yield (rawk, rawv)

    def gen_all_schema_digests_and_parsed_specs(self) -> Iterable[Tuple[str, dict]]:
        for dbk, dbv in self._traverse_all_schema_records(keys=True, values=True):
            rawk = hash_schema_raw_key_from_db_key(dbk)
            rawv = schema_spec_from_db_val(dbv)
            yield (rawk, rawv)

    def get_schema_digest_spec(self, digest) -> dict:
        schemaHashKey = schema_hash_db_key_from_digest(digest)
        try:
            hashtxn = TxnRegister().begin_reader_txn(self._hashenv)
            schemaSpecVal = hashtxn.get(schemaHashKey)
        finally:
            TxnRegister().abort_reader_txn(self._hashenv)

        schema_spec = schema_spec_from_db_val(schemaSpecVal)
        return schema_spec


def backends_remove_in_process_data(repo_path: Path, *, remote_operation: bool = False):
    """DANGER! Permanently delete uncommitted data files/links for stage or remote area.

    This searches each backend accessors staged (or remote) folder structure for
    files, and if any are present the symlinks in stagedir and backing data
    files in datadir are removed.

    Parameters
    ----------
    repo_path : Path
        path to the repository on disk
    remote_operation : optional, kwarg only, bool
        If true, modify contents of the remote_dir, if false (default) modify
        contents of the staging directory.
    """
    for backend, accesor in BACKEND_ACCESSOR_MAP.items():
        if accesor is not None:
            accesor.delete_in_process_data(repo_path=repo_path,
                                           remote_operation=remote_operation)


def clear_stage_hash_records(stagehashenv):
    """Drop all records in the stagehashenv db

    This operation should be performed anytime a reset of the staging area is
    performed (including for commits, merges, and checkouts)

    Parameters
    ----------
    stagehashenv : lmdb.Environment
        db where staged data hash additions are recorded
    """
    stagehashtxn = TxnRegister().begin_writer_txn(stagehashenv)
    with stagehashtxn.cursor() as cursor:
        positionExists = cursor.first()
        while positionExists:
            positionExists = cursor.delete()
    cursor.close()
    TxnRegister().commit_writer_txn(stagehashenv)


def remove_stage_hash_records_from_hashenv(hashenv, stagehashenv):
    """Remove references to data additions during a hard reset

    For every hash record in stagehashenv, remove the corresponding k/v pair
    from the hashenv db. This is a dangerous operation if the stagehashenv was
    not appropriately constructed!!!

    Parameters
    ----------
    hashenv : lmdb.Environment
        db where all the permanent hash records are stored
    stagehashenv : lmdb.Environment
        db where all the staged hash records to be removed are stored.
    """
    stageHashKeys = HashQuery(stagehashenv).gen_all_hash_keys_db()
    hashtxn = TxnRegister().begin_writer_txn(hashenv)
    for hashKey in stageHashKeys:
        hashtxn.delete(hashKey)
    TxnRegister().commit_writer_txn(hashenv)


================================================
FILE: src/hangar/records/heads.py
================================================
import warnings
from collections import defaultdict
from typing import NamedTuple

import lmdb

from .parsing import (
    remote_db_key_from_raw_key,
    remote_db_val_from_raw_val,
    remote_raw_key_from_db_key,
    remote_raw_val_from_db_val,
    repo_branch_head_db_key_from_raw_key,
    repo_branch_head_db_val_from_raw_val,
    repo_branch_head_raw_key_from_db_key,
    repo_branch_head_raw_val_from_db_val,
    repo_head_db_key,
    repo_head_db_val_from_raw_val,
    repo_head_raw_val_from_db_val,
    repo_writer_lock_db_key,
    repo_writer_lock_db_val_from_raw_val,
    repo_writer_lock_force_release_sentinal,
    repo_writer_lock_sentinal_db_val,
)
from ..constants import K_REMOTES, K_BRANCH
from ..txnctx import TxnRegister


class BranchHead(NamedTuple):
    name: str
    digest: str


"""
Write operation enabled lock methods
------------------------------------

Any operation which wants to interact with the main storage services in a
write-enabled way must acquire a lock to perform the operation. See docstrings
below for more info
"""


def writer_lock_held(branchenv):
    """Check to see if the writer lock is free before attempting to acquire it.

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment where the writer lock is stored

    Returns
    -------
    bool
        True if the lock is available to take, False if it is currently held.
    """
    writerLockKey = repo_writer_lock_db_key()
    writerLockSentinalVal = repo_writer_lock_sentinal_db_val()
    branchtxn = TxnRegister().begin_reader_txn(branchenv)
    try:
        currentWriterLockVal = branchtxn.get(writerLockKey)
        if currentWriterLockVal == writerLockSentinalVal:
            lockAvailable = True
        elif currentWriterLockVal is None:
            # on first initialization, writer lock key/val is not set.
            lockAvailable = True
        else:
            lockAvailable = False
    finally:
        TxnRegister().abort_reader_txn(branchenv)
    return lockAvailable


def acquire_writer_lock(branchenv, writer_uuid):
    """Attempt to acquire the writer lock for a write-enabled checkout object.

    If the writer_uuid matches the recorded value, or the lock is available (or
    uninitialized entirely in the case of a brand-new repository), the lock will
    be updated with the requested uuid, and no other write-enabled checkout can
    be started until it is either released, or a force reset is performed (in
    the event of a system crash or user error.)

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment where the writer lock is stored
    writer_uuid : str
        uuid generated when a write enabled checkout instance starts

    Returns
    -------
    bool
        success of the operation, which will be validated by the writer class as
        a safety net incase the upstream in the event some user code tries to
        catch the exception.Z

    Raises
    ------
    PermissionError
        If the lock can not be acquired

    """
    writerLockKey = repo_writer_lock_db_key()
    writerLockSentinalVal = repo_writer_lock_sentinal_db_val()
    requestWriterLockVal = repo_writer_lock_db_val_from_raw_val(writer_uuid)

    branchtxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        currentWriterLockVal = branchtxn.get(writerLockKey)
        if currentWriterLockVal == requestWriterLockVal:
            success = True
        elif currentWriterLockVal == writerLockSentinalVal:
            branchtxn.put(writerLockKey, requestWriterLockVal)
            success = True
        elif currentWriterLockVal is None:
            # on first initialization, writer lock key/val is not set.
            branchtxn.put(writerLockKey, requestWriterLockVal)
            success = True
        else:
            err = 'Cannot acquire the writer lock. Only one instance of a writer checkout '\
                  'can be active at a time. If the last checkout of this repository did '\
                  'not properly close, or a crash occurred, the lock must be manually freed '\
                  'before another writer can be instantiated.'
            raise PermissionError(err)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return success


def release_writer_lock(branchenv, writer_uuid):
    """Internal method to release a writer lock held by a specified uuid.

    This method also accept the force-release sentinel by a caller in the
    writer_uuid field. If the writer_uuid does not match the lock value (and the
    force sentinel is not used), then a runtime error will be thrown and no-op
    performed

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment where the lock key/val lives
    writer_uuid : str
        uuid of the requested releaser

    Returns
    -------
    bool
        if the operation was successful or now

    Raises
    ------
    RuntimeError
        if the request uuid does not match the lock value.
    """
    writerLockKey = repo_writer_lock_db_key()
    forceReleaseSentinal = repo_writer_lock_force_release_sentinal()
    lockSentinalVal = repo_writer_lock_sentinal_db_val()
    requestWriterLockVal = repo_writer_lock_db_val_from_raw_val(writer_uuid)

    txn = TxnRegister().begin_writer_txn(branchenv)
    try:
        currentLockVal = txn.get(writerLockKey)
        if writer_uuid == forceReleaseSentinal:
            warnings.warn('Writer lock force successfully force released.', ResourceWarning)
            txn.put(writerLockKey, lockSentinalVal)
            success = True
        elif currentLockVal == requestWriterLockVal:
            txn.put(writerLockKey, lockSentinalVal)
            success = True
        elif currentLockVal == lockSentinalVal:
            warnings.warn('The lock is already available, no release is necessary.', UserWarning)
            success = True
        else:
            err = f'FATAL ERROR Requested release of writer lock: {currentLockVal} by '\
                  f'non-valid requestor: {requestWriterLockVal} -- How did this happen?'
            raise RuntimeError(err)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return success


"""
Methods to interact with the branch head records
------------------------------------------------

.. todo::
   Need a delete branch operation.
"""

# ---------------- branch creation and deletion operations ------------------------------


def create_branch(branchenv, name, base_commit) -> BranchHead:
    """Internal operations used to create a branch.

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment of the branch db
    name : str
        Name of the branch to create, if a branch with this name exists no
        operation  will occur and a `ValueError` will be thrown.
    base_commit : str
        The commit to start this branch from.

    Returns
    -------
    BranchHead
        NamedTuple[str, str] with fields for `name` and `digest` of the branch
        created (if the operation was successful)

    Raises
    ------
    ValueError
        If the branch already exists, no-op and raise this.
    RuntimeError
        If the repository does not have at-least one commit on the `default`
        (ie. `master`) branch.
    """
    if base_commit is None:
        headBranch = get_staging_branch_head(branchenv)
        base_commit = get_branch_head_commit(branchenv, headBranch)
        if (headBranch == 'master') and (base_commit == ''):
            msg = 'At least one commit must be made in the repository on the `default` '\
                  '(`master`) branch before new branches can be created'
            raise RuntimeError(msg)

    branchHeadKey = repo_branch_head_db_key_from_raw_key(name)
    branchHeadVal = repo_branch_head_db_val_from_raw_val(base_commit)

    branchtxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        success = branchtxn.put(branchHeadKey, branchHeadVal, overwrite=False)
        if success is False:
            err = f'A branch with the name {name} already exists, please specify'\
                  f'a different name or delete the branch.'
            raise ValueError(err)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return BranchHead(name=name, digest=base_commit)


def remove_branch(branchenv: lmdb.Environment,
                  refenv: lmdb.Environment,
                  name: str,
                  *,
                  force_delete: bool = False) -> BranchHead:
    """Remove a branch head pointer after verifying validity and safety

    Parameters
    ----------
    branchenv : lmdb.Environment
        db containing the branch head specs
    refenv : lmdb.Environment
        db containing the commit refs
    name : str
        name of the branch which should be deleted.
    force_delete : bool, optional
        If True, remove the branch pointer even if the changes are un-merged in
        other branch histories. by default False

    Returns
    -------
    BranchHead
        NamedTuple[str, str] with fields for `name` and `digest` of the branch
        pointer deleted.

    Raises
    ------
    ValueError
        If a branch with the provided name does not exist locally
    PermissionError
        If removal of the branch would result in a repository with zero local
        branches.
    PermissionError
        If a write enabled checkout is holding the writer-lock at time of this
        call.
    PermissionError
        If the branch to be removed was the last used in a write-enabled
        checkout, and whose contents form the base of the staging area.
    RuntimeError
        If the branch has not been fully merged into other branch histories,
        and ``force_delete`` option is not ``True``.
    """
    from .commiting import get_commit_ancestors_graph

    all_branches = get_branch_names(branchenv)
    alive_branches = [x for x in all_branches if '/' not in x]  # exclude remotes
    if name not in alive_branches:
        raise ValueError(f'Branch: {name} does not exist')

    alive_branches.remove(name)
    if len(alive_branches) == 0:
        msg = f'Not allowed to remove all branches from a repository! '\
              f'Operation aborted without completing removal of branch: {name}'
        raise PermissionError(msg)

    if writer_lock_held(branchenv) is False:
        msg = f'Cannot remove branch when a `write-enabled` checkout is active. '\
              f're-run after committing/closing the writer.'
        raise PermissionError(msg)

    staging_branch = get_staging_branch_head(branchenv)
    if staging_branch == name:
        msg = f'Branch: {name} cannot be deleted while acting as the base for '\
              f'contents of the staging area. re-run after checking out a '\
              f'different branch in `write` mode.'
        raise PermissionError(msg)

    HEAD = get_branch_head_commit(branchenv, name)
    if not force_delete:
        for branch in alive_branches:
            b_head = get_branch_head_commit(branchenv, branch)
            b_ancestors = get_commit_ancestors_graph(refenv, starting_commit=b_head)
            if HEAD in b_ancestors:
                break
        else:  # N.B. for-else conditional (ie. "no break")
            msg = f'The branch {name} is not fully merged. If you are sure '\
                  f'you want to delete it, re-run with force-remove parameter set'
            raise RuntimeError(msg)

    branchtxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        branchHeadKey = repo_branch_head_db_key_from_raw_key(name)
        branchtxn.delete(branchHeadKey)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return BranchHead(name=name, digest=HEAD)


# ------------- set and get with staging area HEAD branch name --------------------------


def get_staging_branch_head(branchenv):
    """Get the name of the current staging area HEAD branch

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment for the branch references

    Returns
    -------
    str
        name of the staging HEAD branch
    """
    headKey = repo_head_db_key()
    txn = TxnRegister().begin_reader_txn(branchenv)
    try:
        headBranchVal = txn.get(headKey)
    finally:
        TxnRegister().abort_reader_txn(branchenv)
    headBranch = repo_head_raw_val_from_db_val(headBranchVal)
    return headBranch


def set_staging_branch_head(branchenv, branch_name):
    """Set the writer HEAD to a branch name. Does not modify staging area contents.

    A writer-checkout must specify a branch name to use as it's ancestor. We do
    not allow a writer (or staging area) to exist in a "Detached HEAD" state. In
    order to make modifications starting from a specific commit, the user must
    create a branch with that commit hash as the specified "base".

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment of the branch db.
    branch_name : str
        name of the branch to checkout.

    Returns
    -------
    bool
        if the operation was successful.

    Raises
    ------
    ValueError
        If the specified branch name does not exist.
    """
    headKey = repo_head_db_key()
    requestedHeadVal = repo_head_db_val_from_raw_val(branch_name)
    requestedBranchKey = repo_branch_head_db_key_from_raw_key(branch_name)

    branchtxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        branchNameExists = branchtxn.get(requestedBranchKey, default=False)
        if branchNameExists is False:
            err = f'No branch with the name: {branch_name} exists, no-op performed'
            raise ValueError(err)
        else:
            branchtxn.put(headKey, requestedHeadVal)
            success = True
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return success


# ------------- get and set a named branch HEAD commit hash --------------------------===


def get_branch_head_commit(branchenv, branch_name):
    """Find the commit hash which corresponds to the HEAD of a particular branch.

    Parameters
    ----------
    branchenv: lmdb.Environment
        lmdb environment for the branch spec
    branch_name: str
        name of the branch to find the head commit hash for

    Returns
    -------
    str
        the commit hash of the branch head

    Raises
    ------
    ValueError
        if `branch_name` does not exist in the repository
    """
    requestedBranchKey = repo_branch_head_db_key_from_raw_key(branch_name)
    branchtxn = TxnRegister().begin_reader_txn(branchenv)
    try:
        branchNameVal = branchtxn.get(requestedBranchKey, default=False)
        if branchNameVal is False:
            err = f'branch with name: {branch_name} does not exist. cannot get head.'
            raise ValueError(err)
    finally:
        TxnRegister().abort_reader_txn(branchenv)

    commit_hash = repo_branch_head_raw_val_from_db_val(branchNameVal)
    return commit_hash


def set_branch_head_commit(branchenv, branch_name, commit_hash):
    """Update an existing branch HEAD to point to a new commit hash.

    Does not update stage or refenv contents. If the current HEAD of the branch
    == the new commit hash, no operation will occur and an exception will be
    thrown.

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment where the branch records are kept
    branch_name : string
        Name of the branch to update the HEAD commit of
    commit_hash : string
        Commit hash to update the branch HEAD to point to.

    Returns
    -------
    string
        Commit hash of the new branch head if the operation was successful.

    Raises
    ------
    ValueError
        If the current HEAD is the same as the new commit hash.
    """
    currentHeadCommit = get_branch_head_commit(branchenv=branchenv, branch_name=branch_name)
    if currentHeadCommit == commit_hash:
        err = f'Current branch: {branch_name} HEAD: {currentHeadCommit} is same as the '\
              f'requested updated HEAD: {commit_hash}, no-op performed'
        raise ValueError(err)

    branchtxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        branchHeadKey = repo_branch_head_db_key_from_raw_key(branch_name)
        branchHeadVal = repo_branch_head_db_val_from_raw_val(commit_hash)
        branchtxn.put(branchHeadKey, branchHeadVal)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return commit_hash


def get_branch_names(branchenv):
    """get a list of all branches in the repository.

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment storing the branch records.

    Returns
    -------
    list of str
        list of branch names active in the repository.
    """
    branchStartKey = K_BRANCH.encode()  # TODO: This is odd, why??
    branchNames = []
    branchTxn = TxnRegister().begin_reader_txn(branchenv)
    try:
        with branchTxn.cursor() as cursor:
            cursor.first()
            branchRangeExists = cursor.set_range(branchStartKey)
            while branchRangeExists:
                branchKey = cursor.key()
                if branchKey.startswith(branchStartKey):
                    name = repo_branch_head_raw_key_from_db_key(branchKey)
                    branchNames.append(name)
                    branchRangeExists = cursor.next()
                else:
                    branchRangeExists = False
        cursor.close()
    finally:
        TxnRegister().abort_reader_txn(branchenv)

    return branchNames


def commit_hash_to_branch_name_map(branchenv: lmdb.Environment) -> dict:
    """Determine branch names which map to commit hashs

    Parameters
    ----------
    branchenv : lmdb.Environment
        db where the branch references are stored

    Returns
    -------
    dict
        keys are commit hash strings, values are list of branch names (strings)
        whose HEAD are at the key commit
    """
    outMap = defaultdict(list)
    branchNames = get_branch_names(branchenv=branchenv)
    for branchName in branchNames:
        branchHEAD = get_branch_head_commit(branchenv=branchenv, branch_name=branchName)
        outMap[branchHEAD].append(branchName)

    return outMap


# ----------------------------- Remotes ---------------------------------------


def add_remote(branchenv: lmdb.Environment, name: str, address: str) -> bool:
    """add a remote server reference to the repository.

    This method does not check that the remote is actually accessible, rather it
    just records the reference. If a remote with the same name already exists,
    no change will occur.

    Parameters
    ----------
    branchenv : lmdb.Environment
        db where the branch (and remote) references are stored.
    name : str
        name of the remote to add the address for
    address : str
        IP:PORT where the remote server can be accessed

    Returns
    -------
    bool
        True if the new reference was saved, False if not.
    """
    dbKey = remote_db_key_from_raw_key(name)
    dbVal = remote_db_val_from_raw_val(address)

    branchTxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        succ = branchTxn.put(dbKey, dbVal, overwrite=False)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    return succ


def get_remote_address(branchenv: lmdb.Environment, name: str) -> str:
    """Retrieve the IO:PORT of the remote server for a given name

    Parameters
    ----------
    branchenv : lmdb.Environment
        db where the branch (and remote) references are stored
    name : str
        name of the remote to fetch

    Raises
    ------
    KeyError
        if a remote with the provided name does not exist

    Returns
    -------
    str
        IP:PORT of the recorded remote server.
    """
    dbKey = remote_db_key_from_raw_key(name)
    branchTxn = TxnRegister().begin_reader_txn(branchenv)
    try:
        dbVal = branchTxn.get(dbKey, default=False)
    finally:
        TxnRegister().abort_reader_txn(branchenv)

    if dbVal is False:
        msg = f'No remote with the name: {name} exists in the repo.'
        raise KeyError(msg)
    else:
        remote_address = remote_raw_val_from_db_val(dbVal)
        return remote_address


def remove_remote(branchenv: lmdb.Environment, name: str) -> str:
    """remove a remote reference with the provided name.

    Parameters
    ----------
    branchenv : lmdb.Environment
        db where the branch (and remote) records are stored.
    name : str
        name of the remote to remove from the repo

    Raises
    ------
    ValueError
        if a remote with the provided name does not exist

    Returns
    -------
    str
        IP:PORT of the remote with provided name (which was removed)
    """
    dbKey = remote_db_key_from_raw_key(name)
    branchTxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        dbVal = branchTxn.pop(dbKey)
    finally:
        TxnRegister().commit_writer_txn(branchenv)

    if dbVal is None:
        msg = f'No remote with the name: {name} exists in the repo.'
        raise ValueError(msg)

    remote_address = remote_raw_val_from_db_val(dbVal)
    return remote_address


def get_remote_names(branchenv):
    """get a list of all remotes in the repository.

    Parameters
    ----------
    branchenv : lmdb.Environment
        lmdb environment storing the branch records.

    Returns
    -------
    list of str
        list of remote names active in the repository.
    """
    remoteStartKey = K_REMOTES.encode()
    remoteNames = []
    branchTxn = TxnRegister().begin_reader_txn(branchenv)
    try:
        with branchTxn.cursor() as cursor:
            cursor.first()
            remoteRangeExists = cursor.set_range(remoteStartKey)
            while remoteRangeExists:
                remoteKey = cursor.key()
                if remoteKey.startswith(remoteStartKey):
                    name = remote_raw_key_from_db_key(remoteKey)
                    remoteNames.append(name)
                    remoteRangeExists = cursor.next()
                else:
                    remoteRangeExists = False
        cursor.close()
    finally:
        TxnRegister().abort_reader_txn(branchenv)

    return remoteNames


================================================
FILE: src/hangar/records/parsing.py
================================================
import json
from hashlib import blake2b
from itertools import cycle
from random import randint
from time import perf_counter, sleep
from typing import Union, NamedTuple, Tuple, Iterable

import blosc

from ..constants import (
    CMT_DIGEST_JOIN_KEY,
    CMT_KV_JOIN_KEY,
    CMT_REC_JOIN_KEY,
    K_BRANCH,
    K_HEAD,
    K_REMOTES,
    K_VERSION,
    K_WLOCK,
    SEP_CMT,
    SEP_KEY,
    WLOCK_SENTINAL,
)
from .._version import parse as version_parse
from .._version import Version


cycle_list = (str(c).rjust(4, '0') for c in range(9_999))
NAME_CYCLER = cycle(cycle_list)
RANDOM_NAME_SEED = str(randint(0, 999_999_999)).rjust(0, '0')
perf_counter()  # call to init monotonic start point


def generate_sample_name() -> str:
    ncycle = next(NAME_CYCLER)
    if ncycle == '0000':
        sleep(0.001)

    sec, subsec = str(perf_counter()).split('.')
    name = f'{RANDOM_NAME_SEED}{sec.rjust(6, "0")}{subsec.ljust(9, "0")}{ncycle}'
    return name


"""
Parsing functions used to deal with repository state The parsers defined in this
section handle repo/branch records.

Methods working with repository version specifiers
--------------------------------------------------
"""


def repo_version_raw_spec_from_raw_string(v_str: str) -> Version:
    """Convert from user facing string representation to Version object
    """
    return version_parse(v_str)


# ------------------------- db version key is fixed -----------------


def repo_version_db_key() -> bytes:
    """The db formated key which version information can be accessed at

    Returns
    -------
    bytes
        db formatted key to use to get/set the repository software version.
    """
    return K_VERSION.encode()


# ------------------------ raw -> db --------------------------------


def repo_version_db_val_from_raw_val(v_spec: Version) -> bytes:
    """determine repository version db specifier from version spec.

    Parameters
    ----------
    v_spec : Version
        This class abstracts handling of a project’s versions. A Version
        instance is comparison aware and can be compared and sorted using the
        standard Python interfaces.

    Returns
    -------
    bytes
        db formatted specification of version
    """
    return str(v_spec).encode()


# ---------------------------- db -> raw ----------------------------


def repo_version_raw_val_from_db_val(db_val: bytes) -> Version:
    """determine software version of hangar repository is written for.

    Parameters
    ----------
    db_val : bytes
        db formatted specification of version string

    Returns
    -------
    Version
        This class abstracts handling of a project’s versions. A Version
        instance is comparison aware and can be compared and sorted using the
        standard Python interfaces.
    """
    db_str = db_val.decode()
    return version_parse(db_str)


"""
Methods working with writer HEAD branch name
--------------------------------------------
"""

# --------------------- db HEAD key is fixed ------------------------


def repo_head_db_key() -> bytes:
    """db_key of the head staging branch name.

    Returns
    -------
    bytestring
        lmdb key to query while looking up the head staging branch name
    """
    return K_HEAD.encode()


# --------------------- raw -> db -----------------------------------


def repo_head_db_val_from_raw_val(branch_name: str) -> bytes:
    return f'{K_BRANCH}{branch_name}'.encode()


# --------------------- db -> raw -----------------------------------

def repo_head_raw_val_from_db_val(db_val: bytes) -> str:
    return db_val.decode()[len(K_BRANCH):]


"""
Methods working with branch names / head commit values
------------------------------------------------------
"""

# ---------------------- raw -> db --------------------------------


def repo_branch_head_db_key_from_raw_key(branch_name: str) -> bytes:
    return f'{K_BRANCH}{branch_name}'.encode()


def repo_branch_head_db_val_from_raw_val(commit_hash: str) -> bytes:
    return f'{commit_hash}'.encode()


# ---------------- db -> raw -----------------------------------


def repo_branch_head_raw_key_from_db_key(db_key: bytes) -> str:
    return db_key.decode()[len(K_BRANCH):]


def repo_branch_head_raw_val_from_db_val(db_val: bytes) -> str:
    try:
        commit_hash = db_val.decode()
    except AttributeError:
        commit_hash = ''
    return commit_hash


"""
Methods working with writer lock key/values
-------------------------------------------
"""

# ------------------- db key for lock is fixed -------------------


def repo_writer_lock_db_key() -> bytes:
    return K_WLOCK.encode()


def repo_writer_lock_sentinal_db_val() -> bytes:
    return WLOCK_SENTINAL.encode()


def repo_writer_lock_force_release_sentinal() -> str:
    return 'FORCE_RELEASE'


# ------------------------- raw -> db ------------------------------


def repo_writer_lock_db_val_from_raw_val(lock_uuid: str) -> bytes:
    return f'{lock_uuid}'.encode()


# -------------------------- db -> raw ------------------------------


def repo_writer_lock_raw_val_from_db_val(db_val: bytes) -> str:
    return db_val.decode()


# -------------------- Remote Work --------------------------------------------


def remote_db_key_from_raw_key(remote_name: str) -> bytes:
    """Get the remote db key val for a remote name

    Parameters
    ----------
    remote_name : str
        name of the remote location

    Returns
    -------
    bytes
        db key allowing access to address value at the name of the remote
    """
    return f'{K_REMOTES}{remote_name}'.encode()


def remote_raw_key_from_db_key(db_key: bytes, *, _SPLT=len(K_REMOTES)) -> str:
    """Get the remote name from a remote db key

    Parameters
    ----------
    db_key : bytes
        db key of the remote

    Returns
    -------
    str
        name of the remote
    """
    return db_key.decode()[_SPLT:]


def remote_db_val_from_raw_val(grpc_address: str) -> bytes:
    """Format a remote db value from it's grpc address string

    Parameters
    ----------
    grpc_address : str
        IP:PORT where the grpc server can be accessed

    Returns
    -------
    bytes
        formated representation of the grpc address suitable for storage in lmdb.
    """
    return grpc_address.encode()


def remote_raw_val_from_db_val(db_val: bytes) -> str:
    """Retrieve the address where a grpc server is running from a remote db value


    Parameters
    ----------
    db_val : bytes
        db value assigned to the desired remote name

    Returns
    -------
    str
        IP:PORT where the grpc server can be accessed.
    """
    return db_val.decode()


"""
Commit Parsing Methods
-----------------------

The parsers defined in this section handle commit (ref) records
"""


class CommitAncestorSpec(NamedTuple):
    is_merge_commit: bool
    master_ancestor: str
    dev_ancestor: str


class CommitUserSpec(NamedTuple):
    commit_time: float
    commit_message: str
    commit_user: str
    commit_email: str


class DigestAndUserSpec(NamedTuple):
    digest: str
    user_spec: CommitUserSpec


class DigestAndAncestorSpec(NamedTuple):
    digest: str
    ancestor_spec: CommitAncestorSpec


class DigestAndBytes(NamedTuple):
    digest: str
    raw: bytes


class DigestAndDbRefs(NamedTuple):
    digest: str
    db_kvs: Union[Tuple, Tuple[Tuple[bytes, bytes]]]


def _hash_func(recs: bytes) -> str:
    """hash a tuple of db formatted k, v pairs.

    Parameters
    ----------
    recs : bytes
        tuple to calculate the joined digest of

    Returns
    -------
    str
        hexdigest of the joined tuple data
    """
    return blake2b(recs, digest_size=20).hexdigest()


def cmt_final_digest(parent_digest: str, spec_digest: str, refs_digest: str,
                     *, tcode: str = 'a') -> str:
    """Determine digest of commit based on digests of its parent, specs, and refs.

    Parameters
    ----------
    parent_digest : str
        digest of the parent value
    spec_digest : str
        digest of the user spec value
    refs_digest : str
        digest of the data record values
    tcode : str, optional, kwarg-only
        hash calculation type code. Included to allow future updates to change
        hashing algorithm, kwarg-only, by default '0'

    Returns
    -------
    str
        digest of the commit with typecode prepended by '{tcode}='.
    """
    if tcode == 'a':
        sorted_digests = sorted([parent_digest, spec_digest, refs_digest])
        joined_bytes = CMT_DIGEST_JOIN_KEY.join(sorted_digests).encode()
        rawDigest = _hash_func(joined_bytes)
        digest = f'a={rawDigest}'
    else:
        raise ValueError(
            f'Invalid commit reference type code {tcode}. If encountered during '
            f'normal operation, please report to hangar development team.')
    return digest


"""
Commit Parent (ancestor) Lookup methods
---------------------------------------
"""

# ------------------------- raw -> db -----------------------------------------


def commit_parent_db_key_from_raw_key(commit_hash: str) -> bytes:
    return f'{commit_hash}'.encode()


def commit_parent_db_val_from_raw_val(master_ancestor: str,
                                      dev_ancestor: str = '',
                                      is_merge_commit: bool = False) -> DigestAndBytes:
    if is_merge_commit:
        str_val = f'{master_ancestor}{SEP_CMT}{dev_ancestor}'
    else:
        str_val = f'{master_ancestor}'
    db_val = str_val.encode()
    digest = _hash_func(db_val)
    return DigestAndBytes(digest=digest, raw=db_val)


# ------------------------------- db -> raw -----------------------------------


def commit_parent_raw_key_from_db_key(db_key: bytes) -> str:
    return db_key.decode()


def commit_parent_raw_val_from_db_val(db_val: bytes) -> DigestAndAncestorSpec:
    """Parse the value of a commit's parent value to find it's ancestors

    Parameters
    ----------
    db_val : bytes
        Lmdb value of the commit parent field.

    Returns
    -------
    DigestAndAncestorSpec
        `digest` of data writen to disk and `ancestor_spec`, Namedtuple
        containing fields for `is_merge_commit`, `master_ancestor`, and
        `dev_ancestor`
    """
    parentValDigest = _hash_func(db_val)

    commit_str = db_val.decode()
    commit_ancestors = commit_str.split(SEP_CMT)
    if len(commit_ancestors) == 1:
        is_merge_commit = False
        master_ancestor = commit_ancestors[0]
        dev_ancestor = ''
    else:
        is_merge_commit = True
        master_ancestor = commit_ancestors[0]
        dev_ancestor = commit_ancestors[1]

    ancestorSpec = CommitAncestorSpec(is_merge_commit, master_ancestor, dev_ancestor)
    return DigestAndAncestorSpec(digest=parentValDigest, ancestor_spec=ancestorSpec)


"""
Commit reference key and values.
--------------------------------
"""


def commit_ref_db_key_from_raw_key(commit_hash: str) -> bytes:
    return f'{commit_hash}{SEP_KEY}ref'.encode()


def _commit_ref_joined_kv_digest(joined_db_kvs: Iterable[bytes]) -> str:
    """reproducibly calculate digest from iterable of joined record k/v pairs.

    First calculate the digest of each element in the input iterable. As these
    elements contain the record type (meta key, column name, sample key) as
    well as the data hash digest, any modification of any reference record will
    result in a different digest for that element. Then join all elements into
    single serialized bytestring.

    The output of this method is the hash digest of the serialized bytestring.

    Parameters
    ----------
    joined_db_kvs : Iterable[bytes]
        list or tuple of bytes where each element is the joining of kv pairs
        from the full commit references

    Returns
    -------
    str
        calculated digest of the commit ref record component
    """
    kv_digests = map(_hash_func, joined_db_kvs)
    joined_digests = CMT_DIGEST_JOIN_KEY.join(kv_digests).encode()
    res = _hash_func(joined_digests)
    return res


def commit_ref_db_val_from_raw_val(db_kvs: Iterable[Tuple[bytes, bytes]]) -> DigestAndBytes:
    """serialize and compress a list of db_key/db_value pairs for commit storage

    Parameters
    ----------
    db_kvs : Iterable[Tuple[bytes, bytes]]
        Iterable collection binary encoded db_key/db_val pairs.

    Returns
    -------
    DigestAndBytes
        `raw` serialized and compressed representation of the object. `digest`
        digest of the joined db kvs.
    """
    joined = tuple(map(CMT_KV_JOIN_KEY.join, db_kvs))
    refDigest = _commit_ref_joined_kv_digest(joined)
    pck = CMT_REC_JOIN_KEY.join(joined)
    raw = blosc.compress(pck, typesize=1, clevel=8, shuffle=blosc.NOSHUFFLE, cname='zstd')
    return DigestAndBytes(digest=refDigest, raw=raw)


def commit_ref_raw_val_from_db_val(commit_db_val: bytes) -> DigestAndDbRefs:
    """Load and decompress a commit ref db_val into python object memory.

    Parameters
    ----------
    commit_db_val : bytes
        Serialized and compressed representation of commit refs.

    Returns
    -------
    DigestAndDbRefs
        `digest` of the unpacked commit refs if desired for verification. `db_kvs`
        Iterable of binary encoded key/value pairs making up the repo state at the
        time of that commit. key/value pairs are already in sorted order.
    """
    uncomp_db_raw = blosc.decompress(commit_db_val)
    # if a commit has nothing in it (completely empty), the return from query == ()
    # the stored data is b'' from which the hash is calculated. We manually set these
    # values as the expected unpacking routine will not work correctly.
    if uncomp_db_raw == b'':
        refsDigest = _hash_func(b'')
        raw_db_kv_list = ()
    else:
        raw_joined_kvs_list = uncomp_db_raw.split(CMT_REC_JOIN_KEY)
        refsDigest = _commit_ref_joined_kv_digest(raw_joined_kvs_list)
        raw_db_kv_list = tuple(map(tuple, map(bytes.split, raw_joined_kvs_list)))

    return DigestAndDbRefs(digest=refsDigest, db_kvs=raw_db_kv_list)


"""
Commit spec reference keys and values
-------------------------------------
"""


def commit_spec_db_key_from_raw_key(commit_hash: str) -> bytes:
    return f'{commit_hash}{SEP_KEY}spec'.encode()


def commit_spec_db_val_from_raw_val(commit_time: float, commit_message: str,
                                    commit_user: str,
                                    commit_email: str) -> DigestAndBytes:
    """Serialize a commit specification from user values to a db store value

    Parameters
    ----------
    commit_time : float
        time since unix epoch that the commit was made
    commit_message : str
        user specified commit message to attach to the record
    commit_user : str
        globally configured user name of the repository committer
    commit_email : str
        globally configured user email of the repository committer

    Returns
    -------
    DigestAndBytes
        Two tuple containing ``digest`` and ``raw`` compressed binary encoded
        serialization of commit spec
    """
    spec_dict = {
        'commit_time': commit_time,
        'commit_message': commit_message,
        'commit_user': commit_user,
        'commit_email': commit_email,
    }

    db_spec_val = json.dumps(spec_dict, separators=(',', ':')).encode()
    digest = _hash_func(db_spec_val)
    comp_raw = blosc.compress(
        db_spec_val, typesize=8, clevel=9, shuffle=blosc.SHUFFLE, cname='zlib')
    return DigestAndBytes(digest=digest, raw=comp_raw)


def commit_spec_raw_val_from_db_val(db_val: bytes) -> DigestAndUserSpec:
    uncompressed_db_val = blosc.decompress(db_val)
    digest = _hash_func(uncompressed_db_val)
    commit_spec = json.loads(uncompressed_db_val)
    user_spec = CommitUserSpec(**commit_spec)
    return DigestAndUserSpec(digest=digest, user_spec=user_spec)


================================================
FILE: src/hangar/records/queries.py
================================================
from typing import Dict, Iterable, Iterator, List, Set, Tuple, Union, Sequence

import lmdb

from .column_parsers import (
    data_record_digest_val_from_db_val,
    dynamic_layout_data_record_db_start_range_key,
    dynamic_layout_data_record_from_db_key,
    dynamic_layout_data_record_db_key_from_names,
    schema_column_record_from_db_key,
    schema_db_range_key_from_column_unknown_layout,
    schema_record_count_start_range_key,
)
from .recordstructs import (
    FlatColumnDataKey,
    NestedColumnDataKey,
    DataRecordVal,
)
from ..txnctx import TxnRegister
from ..utils import ilen
from ..mixins import CursorRangeIterator

RawDataTuple = Tuple[Union[FlatColumnDataKey, NestedColumnDataKey], DataRecordVal]
KeyType = Union[str, int]

class RecordQuery(CursorRangeIterator):

    def __init__(self, dataenv: lmdb.Environment):
        self._dataenv = dataenv

# ------------------ traversing the unpacked records ------------------------------------

    def _traverse_all_records(self) -> Iterator[Tuple[bytes, bytes]]:
        """Pull out all records in the database as a tuple of binary encoded

        Returns
        -------
        list of tuples of bytes
            list type stack of tuples with each db_key, db_val pair
        """
        try:
            datatxn = TxnRegister().begin_reader_txn(self._dataenv)
            with datatxn.cursor() as cursor:
                cursor.first()
                for db_kv in cursor.iternext(keys=True, values=True):
                    yield db_kv
        finally:
            TxnRegister().abort_reader_txn(self._dataenv)

    def _traverse_column_schema_records(self, keys: bool = True, values: bool = True
                                        ) -> Iterable[Union[Tuple[bytes], Tuple[bytes, bytes]]]:
        """Internal method to traverse all schema records and pull out k/v db pairs.

        Parameters
        ----------
        keys : bool, optional
            If True, yield metadata keys encountered, if False only values are returned.
            By default, True.
        values : bool, optional
            If True, yield metadata hash values encountered, if False only keys are returned.
            By default, True.

        Yields
        ------
        Iterable[Union[Tuple[bytes], Tuple[bytes, bytes]]]:
            db schema keys and db_values
        """
        startSchemaRangeKey = schema_record_count_start_range_key()
        try:
            datatxn = TxnRegister().begin_reader_txn(self._dataenv)
            yield from self.cursor_range_iterator(datatxn, startSchemaRangeKey, keys, values)
        finally:
            TxnRegister().abort_reader_txn(self._dataenv)

    def _traverse_column_data_records(self,
                                      column_name: str,
                                      *,
                                      keys: bool = True,
                                      values: bool = True) -> Iterable[Union[bytes, Tuple[bytes, bytes]]]:
        """Internal method to traverse column data records and get keys/db_values

        The column name is required because this method controls the cursor
        movement by first setting it's position on the column record count
        key, reading it's value "N" and then sequentially pulling records out of
        the db for N loops.

        Parameters
        ----------
        column_name : str
            name of the column to traverse records for.
        keys : bool, optional
            If True, yield metadata keys encountered, if False only values are returned.
            By default, True.
        values : bool, optional
            If True, yield metadata hash values encountered, if False only keys are returned.
            By default, True.

        Yields
        ------
        Iterable[Union[bytes, Tuple[bytes, bytes]]]:
            dict of db_key/db_values for each record traversed

        Raises
        ------
        KeyError
            if no column exists with the requested name.
        """
        try:
            datatxn = TxnRegister().begin_reader_txn(self._dataenv)
            schemaColumnRangeKey = schema_db_range_key_from_column_unknown_layout(column_name)
            with datatxn.cursor() as cur:
                if not cur.set_range(schemaColumnRangeKey):
                    raise KeyError(f'Traversal of commit references failed. '
                                   f'No column named `{column_name}` exists.')
                schemaColumnKey = cur.key()
            column_record = schema_column_record_from_db_key(schemaColumnKey)
            startRangeKey = dynamic_layout_data_record_db_start_range_key(column_record)
            yield from self.cursor_range_iterator(datatxn, startRangeKey, keys, values)
        finally:
            TxnRegister().abort_reader_txn(self._dataenv)

# ------------------------- process columns --------------------------------------------

    def column_names(self) -> List[str]:
        """Find all named columns in the checkout

        Returns
        -------
        List[str]
            list of all column names
        """
        recs = self._traverse_column_schema_records(keys=True, values=False)
        column_recs = map(schema_column_record_from_db_key, recs)
        return [x.column for x in column_recs]

    def column_count(self) -> int:
        """Return number of columns/schemas in the commit

        Returns
        -------
        int
            len of columns
        """
        return ilen(self._traverse_column_schema_records(keys=True, values=False))

    def data_hashes(self) -> List[str]:
        """Find all data hashes contained within all columns

        Note: this method does not deduplicate values

        Returns
        -------
        List[str]
            all hash values for all data pieces in the commit
        """
        all_hashes = []
        columns = self.column_names()
        for column in columns:
            recs = self._traverse_column_data_records(column, keys=False, values=True)
            data_rec = map(data_record_digest_val_from_db_val, recs)
            data_val_rec = [x.digest for x in data_rec]
            all_hashes.extend(data_val_rec)
        return all_hashes

# ------------------------ process column data records ----------------------

    def column_data_records(self, column_name: str) -> Iterable[RawDataTuple]:
        """Returns the raw data record key and record values for a specific column.

        Parameters
        ----------
        column_name : str
            name of the column to pull records for

        Yields
        ------
        Iterable[RawDataTuple]
            generator of key and value data record specs
        """
        for data_key, data_val in self._traverse_column_data_records(column_name):
            data_rec_key = dynamic_layout_data_record_from_db_key(data_key)
            data_rec_val = data_record_digest_val_from_db_val(data_val)
            yield (data_rec_key, data_rec_val)

    def column_data_hashes(self, column_name: str) -> Set[DataRecordVal]:
        """Find all data hashes contained within a particular column

        Note: this method does not remove any duplicates which may be present,
        if dedup is required, process it downstream

        Parameters
        ----------
        column_name : str
            name of the column to find the hashes contained in

        Returns
        -------
        Set[DataRecordVal]
            all hash values for all data pieces in the column
        """
        recs = self._traverse_column_data_records(column_name, keys=False, values=True)
        return set(map(data_record_digest_val_from_db_val, recs))

    def column_data_count(self, column_name: str) -> int:
        """Return the number of samples in an column with the provided name

        Parameters
        ----------
        column_name : str
            name of the column to query

        Returns
        -------
        int
            number of samples in the column with given name
        """
        recs = self._traverse_column_data_records(column_name, keys=True, values=False)
        return ilen(recs)  # regular len method not defined for generator iterable

# ------------------------- process schema ----------------------------------------------

    def schema_specs(self):
        """Return the all schema specs defined by all columns.

        Returns
        -------
        dict
            dict of column spec key and digest for each column schema
        """
        recs = {}
        for schema_key, schema_val in self._traverse_column_schema_records():
            schema_record = schema_column_record_from_db_key(schema_key)
            schema_val = data_record_digest_val_from_db_val(schema_val)
            recs[schema_record] = schema_val
        return recs

    def schema_hashes(self) -> List[str]:
        """Find all schema hashes inside of a commit

        Returns
        -------
        List[str]
            list of all schema hash digests in the commit
        """
        all_schema_hashes = []
        for schema_rec_val in self._traverse_column_schema_records(keys=False, values=True):
            digest = data_record_digest_val_from_db_val(schema_rec_val)
            all_schema_hashes.append(digest.digest)
        return all_schema_hashes

    def data_hash_to_schema_hash(self) -> Dict[str, str]:
        """For all hashes in the commit, map sample hash to schema hash.

        Returns
        -------
        Dict[str, str]
            mapping of sample hash to aset_schema_hash
        """
        odict = {}
        aset_names = self.column_names()
        aset_schema_specs = self.schema_specs()
        col_names_schema_digests = {k.column: v.digest for k, v in aset_schema_specs.items()}
        for asetn in aset_names:
            aset_hash_vals = self.column_data_hashes(asetn)
            aset_schema_hash = col_names_schema_digests[asetn]
            for aset_hash_val in aset_hash_vals:
                odict[aset_hash_val.digest] = aset_schema_hash
        return odict

    def column_schema_layout(self, column: str) -> str:
        """Return the column schema layout for a column name

        Parameters
        ----------
        column: str
            name of the column to query

        Returns
        -------
        str
            One of the valid colum layout types (ie. `flat`, `nested`, etc.)
        """
        for schema_key in self._traverse_column_schema_records(values=False):
            schema_record = schema_column_record_from_db_key(schema_key)
            if schema_record.column == column:
                return schema_record.layout


================================================
FILE: src/hangar/records/recordstructs.pxd
================================================
# header file for record containers

cdef class CompatibleData:
    cdef readonly bint compatible
    cdef readonly str reason


cdef class ColumnSchemaKey:
    cdef readonly str column
    cdef readonly str layout


cdef class FlatColumnDataKey:
    cdef readonly str column
    cdef str _sample
    cdef bint _s_int


cdef class NestedColumnDataKey:
    cdef readonly str column
    cdef str _sample, _subsample
    cdef bint _s_int, _ss_int


cdef class DataRecordVal:
    cdef readonly str digest


================================================
FILE: src/hangar/records/recordstructs.pyx
================================================

cdef class CompatibleData:
    """Bool recording if data `compatible` and if False the rejection `reason`.
    """

    def __init__(self, bint compatible, str reason):
        self.compatible = compatible
        self.reason = reason

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'compatible={self.compatible}, '
                f'reason="{self.reason}")')

    def __iter__(self):
        for attr in ['compatible', 'reason']:
            yield getattr(self, attr)

    def __eq__(self, other):
        return (isinstance(other, self.__class__) and
                self.compatible == other.compatible and
                self.reason == other.reason)

    def __hash__(self):
        return hash((self.__class__, self.compatible, self.reason))


cdef class ColumnSchemaKey:
    """Record listing `column` name and `layout` type.
    """

    def __init__(self, str column, str layout):
        self.column = column
        self.layout = layout

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'column="{self.column}", '
                f'layout="{self.layout}")')

    def __iter__(self):
        for attr in ['column', 'layout']:
            yield getattr(self, attr)

    def __eq__(self, other):
        return (isinstance(other, self.__class__) and
                self.column == other.column and
                self.layout == other.layout)

    def __hash__(self):
        return hash((self.__class__, self.column, self.layout))


cdef class FlatColumnDataKey:
    """Record listing `column` & `sample` name along with `layout` property
    """

    def __init__(self, str column, str sample):
        self.column = column
        self._sample = sample
        self._s_int = True if sample[0] == '#' else False

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'column="{self.column}", '
                f'sample={f"{self.sample if self._s_int else repr(self.sample)}"})')

    def __iter__(self):
        for attr in ['column', 'sample']:
            yield getattr(self, attr)

    def __eq__(self, other):
        return (isinstance(other, self.__class__) and
                self.column == other.column and
                self.sample == other.sample)

    def __hash__(self):
        return hash((self.__class__, self.column, self.sample))

    @property
    def sample(self):
        if self._s_int:
            return int(self._sample[1:])
        else:
            return self._sample

    @property
    def layout(self):
        return 'flat'


cdef class NestedColumnDataKey:
    """Record listing `column`, `sample`, & `subsample` name along with `layout` property
    """

    def __init__(self, str column, str sample, str subsample):
        self.column = column
        self._sample = sample
        self._subsample = subsample
        self._s_int = True if sample[0] == '#' else False
        self._ss_int = True if subsample[0] == '#' else False

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'column="{self.column}", '
                f'sample={f"{self.sample if self._s_int else repr(self.sample)}"}, '
                f'subsample={f"{self.subsample if self._ss_int else repr(self.subsample)}"})')

    def __iter__(self):
        for attr in ['column', 'sample', 'subsample']:
            yield getattr(self, attr)

    def __eq__(self, other):
        return (isinstance(other, self.__class__) and
                self.column == other.column and
                self.sample == other.sample and
                self.subsample == other.subsample)

    def __hash__(self):
        return hash((self.__class__, self.column, self.sample, self.subsample))

    @property
    def sample(self):
        if self._s_int:
            return int(self._sample[1:])
        else:
            return self._sample

    @property
    def subsample(self):
        if self._ss_int:
            return int(self._subsample[1:])
        else:
            return self._subsample

    @property
    def layout(self):
        return 'nested'


cdef class DataRecordVal:

    def __init__(self, str digest):
        self.digest = digest

    def __repr__(self):
        return (f'{self.__class__.__name__}('
                f'digest={repr(self.digest)})')

    def __iter__(self):
        for attr in ['digest']:
            yield getattr(self, attr)

    def __eq__(self, other):
        return (isinstance(other, self.__class__)
                and self.digest == other.digest)

    def __hash__(self):
        return hash((self.__class__, self.digest))


================================================
FILE: src/hangar/records/summarize.py
================================================
from pathlib import Path
import time
from io import StringIO

import lmdb

from .commiting import (
    get_commit_ancestors_graph,
    get_commit_spec,
    tmp_cmt_env,
)
from .heads import (
    get_staging_branch_head,
    get_branch_head_commit,
    commit_hash_to_branch_name_map,
)
from .queries import RecordQuery
from .hashs import HashQuery
from ..diff import DiffOut, Changes
from ..txnctx import TxnRegister
from ..utils import format_bytes, file_size, folder_size, unique_everseen
from ..diagnostics import graphing


def log(branchenv: lmdb.Environment,
        refenv: lmdb.Environment,
        branch: str = None,
        commit: str = None,
        *,
        return_contents: bool = False,
        show_time: bool = False,
        show_user: bool = False):
    """Displays a pretty printed commit log graph to the terminal.

    .. note::

        For programatic access, the return_contents value can be set to true
        which will retrieve relevant commit specifications as dictionary
        elements.

    Parameters
    ----------
    branchenv : lmdb.Environment
        db storing information on named branch HEADS
    refenv : lmdb.Environment
        db storing full commit history refs (compressed).
    branch : str, optional
        The name of the branch to start the log process from. (Default value
        = None)
    commit : str, optional
        The commit hash to start the log process from. (Default value = None)
    return_contents : bool, optional, kwarg only
        If true, return the commit graph specifications in a dictionary
        suitable for programatic access/evaluation.
    show_time : bool, optional, kwarg only
        If true and return_contents is False, show the time of each commit
        on the printed log graph
    show_user : bool, optional, kwarg only
        If true and return_contents is False, show the committer of each
        commit on the printed log graph
    Returns
    -------
    Optional[dict]
        Dict containing the commit ancestor graph, and all specifications.
    """
    res = list_history(
        refenv=refenv,
        branchenv=branchenv,
        branch_name=branch,
        commit_hash=commit)
    branchMap = dict(commit_hash_to_branch_name_map(branchenv=branchenv))

    if return_contents:
        for digest in list(branchMap.keys()):
            if digest not in res['order']:
                del branchMap[digest]
        res['branch_heads'] = branchMap
        return res
    else:
        g = graphing.Graph()
        g.show_nodes(dag=res['ancestors'],
                     spec=res['specs'],
                     branch=branchMap,
                     start=res['head'],
                     order=res['order'],
                     show_time=show_time,
                     show_user=show_user)


def list_history(refenv, branchenv, branch_name=None, commit_hash=None):
    """Traverse commit history to specifying ancestor DAG and all ancestor specs.

    Parameters
    ----------
    refenv : lmdb.Environment
        environment containing all repository commit data.
    branchenv : lmdb.Environment
        environment containing the current staging head branch and branch head
        commit hashes
    branch_name : string, optional
        if specified, get the history starting at the head commit of this named
        branch (the default is None, which will use the `commit_hash` arg if
        available, or staging area head)
    commit_hash : string, optional
        if specified, get the history starting at this specific commit,
        overrides branch name if both are specified (the default is `None`,
        which will use the branch_name arg if available, or staging area head)

    Returns
    -------
    dict
        dict containing information about the repo history. specifies fields for
        `head`, `ancestors` (DAG of commit), and `specs` of each commit, also `order`
        encountered.
    """

    if commit_hash is not None:
        head_commit = commit_hash
    elif branch_name is not None:
        head_commit = get_branch_head_commit(branchenv=branchenv, branch_name=branch_name)
    else:
        head_branch = get_staging_branch_head(branchenv)
        head_commit = get_branch_head_commit(branchenv, head_branch)

    ancestors = get_commit_ancestors_graph(
        refenv=refenv, starting_commit=head_commit)

    commitSpecs = {}
    for commit in ancestors.keys():
        commitSpecs[commit] = dict(get_commit_spec(refenv, commit_hash=commit)._asdict())

    cmtTimeSorter = [(k, v['commit_time']) for k, v in commitSpecs.items()]
    cmtTimeSorter.sort(key=lambda t: t[1], reverse=True)
    showparentsOrder = [x[0] for x in cmtTimeSorter]

    res = {
        'head': head_commit,
        'ancestors': ancestors,
        'specs': commitSpecs,
        'order': showparentsOrder,
    }
    return res


def details(env: lmdb.Environment, line_limit=100, line_length=100) -> StringIO:  # pragma: no cover
    """Print the details of an lmdb environment to stdout

    Parameters
    ----------
    env : lmdb.Environment
        environment handle to print records of
    line_limit : int, optional
        limit to the amount of record lines printed, by default 100
    line_length : int, optional
        limit the amount of text printed per line, by default 100

    Returns
    -------
    StringIO
        buffer containing detail data.
    """
    buf = StringIO()
    buf.write('\n======================\n')
    buf.write(f'{Path(env.path()).name}\n')
    try:
        buf.write(f'File Size: {format_bytes(file_size(Path(env.path())))}\n')
    except FileNotFoundError:
        pass
    buf.write('======================\n\n')
    txn = TxnRegister().begin_reader_txn(env)
    entries = txn.stat()['entries'] - 10
    with txn.cursor() as cursor:
        count, once = 0, False
        for key, value in cursor:
            if (count >= line_limit) and (count < entries):
                count += 1
                if (once is False) and (count < entries):
                    once = True
                    buf.write('...\n...\n...\n')
                continue
            else:
                if len(value) >= line_length:
                    buf.write(f'{key} long binary\n')
                else:
                    buf.write(f'{key} {value}\n')
            count += 1
    cursor.close()
    TxnRegister().abort_reader_txn(env)
    return buf


def summary(env, *, branch='', commit='') -> StringIO:
    """Summary of data set stored in repository.

    Parameters
    ----------
    env : :class:`..context.Environments`
        class which contains all of the lmdb environments pre-initialized for use.
    commit : str
        commit hash to query. if left empty, HEAD commit is used (Default value = '')
    branch : str
        branch name to query, if left empty, HEAD will be used. (Default value = '')

    Returns
    -------
    StringIO:
        buffer formatting the contents of the commit ref at the queried commit.
    """
    if commit != '':
        cmt = commit
    elif branch != '':
        cmt = get_branch_head_commit(env.branchenv, branch)
    else:
        headBranch = get_staging_branch_head(env.branchenv)
        cmt = get_branch_head_commit(env.branchenv, headBranch)

    spec = get_commit_spec(env.refenv, cmt)._asdict()
    if cmt == '':
        buf = StringIO()
        buf.write('No commits made')
        return buf

    def _schema_digest_spec_dict(hashenv, digest):
        hq = HashQuery(hashenv)
        res = hq.get_schema_digest_spec(digest)
        return res

    with tmp_cmt_env(env.refenv, cmt) as cmtrefenv:
        query = RecordQuery(cmtrefenv)

        nbytes = folder_size(env.repo_path, recurse=True)
        humanBytes = format_bytes(nbytes)
        buf = StringIO()
        buf.write(f'Summary of Contents Contained in Data Repository \n')
        buf.write(f' \n')
        buf.write(f'================== \n')
        buf.write(f'| Repository Info \n')
        buf.write(f'|----------------- \n')
        buf.write(f'|  Base Directory: {str(env.repo_path.parent)} \n')
        buf.write(f'|  Disk Usage: {humanBytes} \n')
        buf.write(f' \n')

        buf.write(f'=================== \n')
        buf.write(f'| Commit Details \n')
        buf.write(f'------------------- \n')
        buf.write(f'|  Commit: {cmt} \n')
        buf.write(f'|  Created: {time.asctime(time.gmtime(spec["commit_time"]))} \n')
        buf.write(f'|  By: {spec["commit_user"]} \n')
        buf.write(f'|  Email: {spec["commit_email"]} \n')
        buf.write(f'|  Message: {spec["commit_message"]} \n')
        buf.write(f' \n')
        buf.write(f'================== \n')
        buf.write(f'| DataSets \n')
        buf.write(f'|----------------- \n')

        buf.write(f'|  Number of Named Columns: {query.column_count()} \n')
        for asetn, asetnSchema in query.schema_specs().items():
            buf.write(f'|\n')
            buf.write(f'|  * Column Name: {asetn} \n')
            buf.write(f'|    Num Data Pieces: {query.column_data_count(asetn.column)} \n')

            buf.write(f'|    Details: \n')
            schema_dict = _schema_digest_spec_dict(env.hashenv, asetnSchema.digest)
            for k, v in schema_dict.items():
                buf.write(f'|    - {k}: {v} \n')

    return buf


def status(hashenv: lmdb.Environment, branch_name: str, diff: DiffOut) -> StringIO:
    """Format human readable string buffer of changes in a staging area

    Parameters
    ----------
    hashenv : lmdb.Environment
        hashenv to pull usefull schema spec info from.
    branch_name : str
        Name of the branch the diff is from.
    diff : DiffOut
        diff struct tuple returned from standard diff tool.

    Returns
    -------
    StringIO
        Buffer containing human readable printable string of change summary
    """
    def _schema_digest_spec_dict(digest):
        hq = HashQuery(hashenv)
        res = hq.get_schema_digest_spec(digest)
        return res

    def _diff_info(df: Changes) -> StringIO:
        """Format buffer for each of `ADDED`, `DELETED`, `MUTATED` changes
        """
        buf = StringIO()
        buf.write(f'|---------- \n')
        buf.write(f'| Schema: {len(df.schema)} \n')
        for k, v in df.schema.items():
            digest = v.digest
            buf.write(f'|  - "{k.column}": \n')
            buf.write(f'|       digest="{digest}" \n')
            schema_spec = _schema_digest_spec_dict(digest)
            for schema_key, schema_val in schema_spec.items():
                buf.write(f'|       {schema_key}: {schema_val} \n')

        buf.write('|---------- \n')
        buf.write(f'| Samples: {len(df.samples)} \n')
        unique = unique_everseen(df.samples, lambda x: x.column)
        for u in unique:
            un = u.column
            count = sum((1 for k in df.samples if k.column == un))
            buf.write(f'|  - "{un}": {count} \n')
        buf.write(' \n')
        return buf

    buf = StringIO()
    buf.write('============ \n')
    buf.write(f'| Branch: {branch_name} \n')
    buf.write(' \n')
    for changes, changeType in zip(diff, diff.__annotations__.keys()):
        buf.write('============ \n')
        buf.write(f'| {changeType.upper()} \n')
        change_buf = _diff_info(changes)
        buf.write(change_buf.getvalue())
    return buf


================================================
FILE: src/hangar/records/vcompat.py
================================================
from pathlib import Path

import lmdb

from .parsing import (
    repo_version_db_key,
    repo_version_db_val_from_raw_val,
    repo_version_raw_spec_from_raw_string,
    repo_version_raw_val_from_db_val,
)
from .._version import Version
from ..constants import LMDB_SETTINGS, LMDB_BRANCH_NAME
from ..txnctx import TxnRegister
from ..utils import pairwise


def set_repository_software_version(branchenv: lmdb.Environment,
                                    ver_str: str,
                                    *,
                                    overwrite: bool = False) -> bool:
    """Write the repository software version to a particular value

    Parameters
    ----------
    branchenv : lmdb.Environment
        db where the head, branch, and version specs are stored
    ver_str : str
        semantic version style string representing version (ie. "0.1.0",
        "1.2.1", etc)
    overwrite : bool, optional
        If True, replace current value with new value; If False, do not
        overwrite if this key exists, by default False

    Returns
    -------
    bool
        True if successful, False otherwise
    """
    versionKey = repo_version_db_key()
    ver_spec = repo_version_raw_spec_from_raw_string(v_str=ver_str)
    versionVal = repo_version_db_val_from_raw_val(v_spec=ver_spec)
    branchTxn = TxnRegister().begin_writer_txn(branchenv)
    try:
        success = branchTxn.put(versionKey, versionVal, overwrite=overwrite)
    finally:
        TxnRegister().commit_writer_txn(branchenv)
    return success


def get_repository_software_version_spec(branchenv: lmdb.Environment) -> Version:
    """Get the repository version specification tuple.

    Parameters
    ----------
    branchenv : lmdb.Environment
        db where the head, branch, and version specs are stored

    Returns
    -------
    Version
        This class abstracts handling of a project’s versions. A Version
        instance is comparison aware and can be compared and sorted using the
        standard Python interfaces.

    Raises
    ------
    KeyError
        If no version key is set for the repository
    """
    versionKey = repo_version_db_key()
    branchTxn = TxnRegister().begin_reader_txn(branchenv)
    try:
        versionVal = branchTxn.get(versionKey, default=False)
    finally:
        TxnRegister().abort_reader_txn(branchenv)

    if versionVal is False:
        raise KeyError('No version string is set for the repository')
    else:
        version_val = repo_version_raw_val_from_db_val(versionVal)
        return version_val


"""
Initial checking of repository versions
---------------------------------------
"""


def startup_check_repo_version(repo_path: Path) -> Version:
    """Determine repo version without having to have Environments ctx opened.

    Parameters
    ----------
    repo_path : Path
        path to the repository directory on disk

    Returns
    -------
    Version
        This class abstracts handling of a project’s versions. A Version
        instance is comparison aware and can be compared and sorted using the
        standard Python interfaces.

    Raises
    ------
    RuntimeError
        If for whatever reason, the branch file does not exist on disk.
        Execution should not reach this point.
    """
    brch_fp = repo_path.joinpath(LMDB_BRANCH_NAME)
    if not brch_fp.is_file():
        msg = f'Hangar Internal Error, startup_check_repo_version did not find '\
              f'brch db at: {brch_fp}. Execution should never reach this point. '\
              f'Please report this error to Hangar developers.'
        raise RuntimeError(msg)

    branchenv = lmdb.open(path=str(brch_fp), readonly=True, create=False, **LMDB_SETTINGS)
    spec = get_repository_software_version_spec(branchenv=branchenv)
    branchenv.close()
    return spec


incompatible_changes_after = [
    Version('0.2.0'),
    Version('0.3.0'),
    Version('0.4.0'),
    Version('0.5.0.dev0'),
    Version('0.5.0.dev1'),
]


def is_repo_software_version_compatible(repo_v: Version, curr_v: Version) -> bool:
    """Determine if the repo on disk and the current Hangar versions iscompatible.

    Parameters
    ----------
    repo_v : Version
        repository software writtern version.
    curr_v : Version
        currently active software version specification

    Returns
    -------
    bool
        True if compatible, False if not.
    """
    for start, end in pairwise(incompatible_changes_after):
        if (repo_v >= start) and (repo_v < end):
            if (curr_v < start) or (curr_v >= end):
                return False
            elif (curr_v >= start) and (curr_v < end):
                return True
    if (repo_v >= end) and (curr_v < end):
        return False
    return True


================================================
FILE: src/hangar/remote/__init__.py
================================================


================================================
FILE: src/hangar/remote/chunks.py
================================================
import math
import struct
from io import BytesIO
from typing import NamedTuple, List, Union, Tuple, Iterable

import blosc
import numpy as np

from . import hangar_service_pb2
from ..utils import set_blosc_nthreads

set_blosc_nthreads()


def chunk_bytes(bytesData, *, chunkSize: int = 32_000) -> Iterable[bytes]:
    """Slice a bytestring into subelements and store the data in a list

    Arguments
    ---------
        bytesData : bytes
            bytestring buffer of the array data
        chunkSize : int, optional, kwarg-only
            number of bytes which each chunk should be split into.

    Yields
    ------
    bytes
        data split into 32kb chunk sizes.
    """
    numIters = math.ceil(len(bytesData) / chunkSize)
    currentStart = 0
    currentEnd = chunkSize
    for i in range(numIters):
        yield bytesData[currentStart:currentEnd]
        currentStart += chunkSize
        currentEnd += chunkSize


def clientCommitChunkedIterator(commit: str, parentVal: bytes, specVal: bytes,
                                refVal: bytes) -> hangar_service_pb2.PushCommitRequest:
    """Generator splitting commit specs into chunks sent from client to server

    Parameters
    ----------
    commit : str
        commit hash which is being sent
    parentVal : bytes
        bytes representing the commits immediate parents
    specVal : bytes
        bytes representing the commit message/user specifications
    refVal : bytes
        bytes containing all records stored in the repository

    Yields
    ------
    hangar_service_pb2.PushCommitRequest
        Chunked generator of the PushCommitRequest protobuf.
    """
    commit_proto = hangar_service_pb2.CommitRecord(
        parent=parentVal,
        spec=specVal)
    byteSize = len(refVal)
    chunkIterator = chunk_bytes(refVal)
    for refChunk in chunkIterator:
        commit_proto.ref = refChunk
        request = hangar_service_pb2.PushCommitRequest(
            commit=commit,
            total_byte_size=byteSize,
            record=commit_proto)
        yield request


def tensorChunkedIterator(buf, uncomp_nbytes, pb2_request,
                          *,
                          err=None, chunkSize: int = 32_000):

    compBytes = blosc.compress(
        buf, clevel=3, cname='blosclz', shuffle=blosc.NOSHUFFLE)

    request = pb2_request(
        comp_nbytes=len(compBytes),
        uncomp_nbytes=uncomp_nbytes,
        error=err)

    chunkIterator = chunk_bytes(compBytes, chunkSize=chunkSize)
    for dchunk in chunkIterator:
        request.raw_data = dchunk
        yield request


def missingHashIterator(commit, hash_bytes, err, pb2_func):
    comp_bytes = blosc.compress(
        hash_bytes, cname='zlib', clevel=3, typesize=1, shuffle=blosc.SHUFFLE)

    rpc_method = pb2_func(
        commit=commit,
        total_byte_size=len(comp_bytes),
        error=err)

    chunkIterator = chunk_bytes(comp_bytes)
    for bchunk in chunkIterator:
        rpc_method.hashs = bchunk
        yield rpc_method


def missingHashRequestIterator(commit, hash_bytes, pb2_func):
    comp_bytes = blosc.compress(
        hash_bytes, cname='zlib', clevel=3, typesize=1, shuffle=blosc.SHUFFLE)

    rpc_method = pb2_func(
        commit=commit,
        total_byte_size=len(comp_bytes))

    chunkIterator = chunk_bytes(comp_bytes)
    for bchunk in chunkIterator:
        rpc_method.hashs = bchunk
        yield rpc_method


# ------------------------ serialization formats -------------------------


class DataIdent(NamedTuple):
    digest: str
    schema: str


class DataRecord(NamedTuple):
    data: Union[np.ndarray, str, bytes]
    digest: str
    schema: str


def _serialize_arr(arr: np.ndarray) -> bytes:
    """
    dtype_num ndim dim1_size dim2_size ... dimN_size array_bytes
    """
    buf = BytesIO()
    np.save(buf, arr, allow_pickle=False, fix_imports=False)
    raw = buf.getvalue()
    return raw


def _deserialize_arr(raw: bytes) -> np.ndarray:
    buf = BytesIO(initial_bytes=raw)
    buf.seek(0)
    arr = np.load(buf, allow_pickle=False, fix_imports=False)
    return arr


def _serialize_str(data: str) -> bytes:
    """
    data_bytes
    """
    return data.encode()


def _deserialize_str(raw: bytes) -> str:
    return raw.decode()


def _serialize_bytes(data: bytes) -> bytes:
    """
    data_bytes
    """
    return data


def _deserialize_bytes(data: bytes) -> bytes:
    return data


def serialize_ident(digest: str, schema: str) -> bytes:
    """
    len_digest len_schema digest_str schema_str
    """
    raw = struct.pack(
        f' DataIdent:
    digestLen, schemaLen = struct.unpack(' Tuple[int, bytes]:
    if isinstance(data, np.ndarray):
        return (0, _serialize_arr(data))
    elif isinstance(data, str):
        return (2, _serialize_str(data))
    elif isinstance(data, bytes):
        return (3, _serialize_bytes(data))
    else:
        raise TypeError(type(data))


def deserialize_data(dtype_code: int, raw_data: bytes) -> Union[np.ndarray, str, bytes]:
    if dtype_code == 0:
        return _deserialize_arr(raw_data)
    elif dtype_code == 2:
        return _deserialize_str(raw_data)
    elif dtype_code == 3:
        return _deserialize_bytes(raw_data)
    else:
        raise ValueError(f'dtype_code unknown {dtype_code}')


def serialize_record(data: Union[np.ndarray, str, bytes], digest: str, schema: str) -> bytes:
    """
    dtype_code len_raw_ident len_raw_data raw_ident, raw_data
    """
    dtype_code, raw_data = serialize_data(data)
    raw_ident = serialize_ident(digest, schema)
    raw = struct.pack(
        f' DataRecord:
    identStart = 17  # 1 + 2 * 8 bytes
    dtype_code, identLen, dataLen = struct.unpack(f' bytes:
    """
    num_records len_rec1 raw_rec1 len_rec2 raw_rec2 ... len_recN raw_recN
    """
    raw_num_records = struct.pack(f' List[bytes]:
    numRecords = struct.unpack(f' str:
        """Ping server to ensure that connection is working

        Returns
        -------
        str
            Should be value 'PONG'
        """
        request = hangar_service_pb2.PingRequest()
        response: hangar_service_pb2.PingReply = self.stub.PING(request)
        return response.result

    def push_branch_record(self, name: str, head: str
                           ) -> hangar_service_pb2.PushBranchRecordReply:
        """Create a branch (if new) or update the server branch HEAD to new commit.

        Parameters
        ----------
        name : str
            branch name to be pushed
        head : str
            commit hash to update the server head to

        Returns
        -------
        hangar_service_pb2.PushBranchRecordReply
            code indicating success, message with human readable info
        """
        rec = hangar_service_pb2.BranchRecord(name=name, commit=head)
        request = hangar_service_pb2.PushBranchRecordRequest(rec=rec)
        response = self.stub.PushBranchRecord(request)
        return response

    def fetch_branch_record(self, name: str
                            ) -> hangar_service_pb2.FetchBranchRecordReply:
        """Get the latest head commit the server knows about for a given branch

        Parameters
        ----------
        name : str
            name of the branch to query on the server

        Returns
        -------
        hangar_service_pb2.FetchBranchRecordReply
            rec containing name and head commit if branch exists, along with
            standard error proto if it does not exist on the server.
        """
        rec = hangar_service_pb2.BranchRecord(name=name)
        request = hangar_service_pb2.FetchBranchRecordRequest(rec=rec)
        response = self.stub.FetchBranchRecord(request)
        return response

    def push_commit_record(self, commit: str, parentVal: bytes, specVal: bytes,
                           refVal: bytes
                           ) -> hangar_service_pb2.PushBranchRecordReply:
        """Push a new commit reference to the server.

        Parameters
        ----------
        commit : str
            hash digest of the commit to send
        parentVal : bytes
            lmdb ref parentVal of the commit
        specVal : bytes
            lmdb ref specVal of the commit
        refVal : bytes
            lmdb ref refVal of the commit

        Returns
        -------
        hangar_service_pb2.PushBranchRecordReply
            standard error proto
        """
        cIter = chunks.clientCommitChunkedIterator(commit=commit,
                                                   parentVal=parentVal,
                                                   specVal=specVal,
                                                   refVal=refVal)
        response = self.stub.PushCommit(cIter)
        return response

    def fetch_commit_record(self, commit: str) -> Tuple[str, bytes, bytes, bytes]:
        """get the refs for a commit digest

        Parameters
        ----------
        commit : str
            digest of the commit to retrieve the references for

        Returns
        -------
        Tuple[str, bytes, bytes, bytes]
            ['commit hash', 'parentVal', 'specVal', 'refVal']
        """
        request = hangar_service_pb2.FetchCommitRequest(commit=commit)
        replies = self.stub.FetchCommit(request)
        for idx, reply in enumerate(replies):
            if idx == 0:
                refVal = bytearray(reply.total_byte_size)
                specVal = reply.record.spec
                parentVal = reply.record.parent
                offset = 0
            size = len(reply.record.ref)
            refVal[offset: offset + size] = reply.record.ref
            offset += size

        if reply.error.code != 0:
            logger.error(reply.error)
            return False
        return (commit, parentVal, specVal, refVal)

    def fetch_schema(self, schema_hash: str) -> Tuple[str, bytes]:
        """get the schema specification for a schema hash

        Parameters
        ----------
        schema_hash : str
            schema hash to retrieve from the server

        Returns
        -------
        Tuple[str, bytes]
            ['schema hash', 'schemaVal']
        """
        schema_rec = hangar_service_pb2.SchemaRecord(digest=schema_hash)
        request = hangar_service_pb2.FetchSchemaRequest(rec=schema_rec)
        reply = self.stub.FetchSchema(request)
        if reply.error.code != 0:
            logger.error(reply.error)
            return False

        schemaVal = reply.rec.blob
        return (schema_hash, schemaVal)

    def push_schema(self, schema_hash: str,
                    schemaVal: bytes) -> hangar_service_pb2.PushSchemaReply:
        """push a schema hash record to the remote server

        Parameters
        ----------
        schema_hash : str
            hash digest of the schema being sent
        schemaVal : bytes
            ref value of the schema representation

        Returns
        -------
        hangar_service_pb2.PushSchemaReply
            standard error proto indicating success
        """
        rec = hangar_service_pb2.SchemaRecord(digest=schema_hash,
                                              blob=schemaVal)
        request = hangar_service_pb2.PushSchemaRequest(rec=rec)
        response = self.stub.PushSchema(request)
        return response

    def fetch_data(
            self,
            origins: Sequence[hangar_service_pb2.DataOriginReply],
            datawriter_cm: 'DataWriter',
            schema: str,
            pbar: 'tqdm'
    ) -> Sequence[str]:
        """Fetch data hash digests for a particular schema.

        As the total size of the data to be transferred isn't known before this
        operation occurs, if more tensor data digests are requested then the
        Client is configured to allow in memory at a time, only a portion of the
        requested digests will actually be materialized. The received digests
        are listed as the return value of this function, be sure to check that
        all requested digests have been received!

        Parameters
        ----------
        origins : Sequence[hangar_service_pb2.DataOriginReply],
        datawriter_cm : 'DataWriter',
        schema : str,
        pbar : 'tqdm'

        Returns
        -------
        Sequence[str]

        Raises
        ------
        RuntimeError
            if received digest != requested or what was reported to be sent.

         client.fetch_data(origins, DW_CM, schema, pbar):
            _ = DW_CM.data(schema, data_digest=returned_digest, data=returned_data)
        """

        def fetch_write_data_parallel(
                pb: 'hangar_service_pb2.DataOriginReply',
                dw_cm: 'DataWriter',
                schema: str,
                lock: 'Lock'
        ) -> str:
            requested_uri = pb.uri
            request = hangar_service_pb2.FetchDataRequest(uri=requested_uri)
            replies = self.stub.FetchData(request)
            for idx, reply in enumerate(replies):
                if idx == 0:
                    dBytes = bytearray(reply.nbytes)
                    offset = 0
                    if reply.uri != requested_uri:
                        raise ValueError(f'requested uri: {requested_uri}, returned: {reply.uri}')
                size = len(reply.raw_data)
                if size > 0:
                    dBytes[offset:offset + size] = reply.raw_data
                    offset += size

            if pb.compression is True:
                codex = pb.compression_opts['id']
                if codex == 'blosc':
                    returned_raw = blosc.decompress(dBytes)
                else:
                    raise ValueError(f'compression id: {codex}')
            else:
                returned_raw = dBytes

            dtype_code = pb.data_type
            returned_data = chunks.deserialize_data(dtype_code, returned_raw)
            hash_func = hash_func_from_tcode(str(dtype_code))
            received_hash = hash_func(returned_data)
            if received_hash != pb.digest:
                raise RuntimeError(f'MANGLED! got: {received_hash} != requested: {pb.digest}')
            with lock:
                written_digest = dw_cm.data(
                    schema, data_digest=received_hash, data=returned_data)
            return written_digest

        saved_digests = []
        nWorkers = calc_num_threadpool_workers()
        with concurrent.futures.ThreadPoolExecutor(max_workers=nWorkers) as executor:
            futures = [executor.submit(fetch_write_data_parallel,
                pb, datawriter_cm, schema, self.data_writer_lock) for pb in origins]
            for future in concurrent.futures.as_completed(futures):
                saved_digests.append(future.result())
                pbar.update(1)
        return saved_digests

    def fetch_data_origin(self, digests: Sequence[str]) -> List[hangar_service_pb2.DataOriginReply]:

        def origin_request_iter(digests: Sequence[str]):
            for digest in digests:
                yield hangar_service_pb2.DataOriginRequest(digest=digest)

        requestIter = origin_request_iter(digests)
        replies = self.stub.FetchFindDataOrigin(requestIter)

        output = []
        for reply in replies:
            output.append(reply)
        return output

    def push_find_data_origin(self, digests):
        try:
            specs = []
            hashTxn = TxnRegister().begin_reader_txn(self.env.hashenv)
            for digest in digests:
                hashKey = hash_data_db_key_from_raw_key(digest)
                hashVal = hashTxn.get(hashKey, default=False)
                if not hashVal:
                    raise KeyError(f'No hash record with key: {hashKey}')
                be_loc = backend_decoder(hashVal)
                specs.append((digest, be_loc))
        finally:
            TxnRegister().abort_reader_txn(self.env.hashenv)

    def push_data_begin_context(self):
        request = hangar_service_pb2.PushBeginContextRequest()
        reply = self.stub.PushBeginContext(request)
        return reply

    def push_data_end_context(self):
        request = hangar_service_pb2.PushEndContextRequest()
        reply = self.stub.PushEndContext(request)
        return reply

    def push_data(self, schema_hash: str, digests: Sequence[str],
                  pbar: tqdm = None) -> hangar_service_pb2.PushDataReply:
        """Given a schema and digest list, read the data and send to the server

        Parameters
        ----------
        schema_hash : str
            hash of the digest schemas
        digests : Sequence[str]
            iterable of digests to be read in and sent to the server
        pbar : tqdm, optional
            progress bar instance to be updated as the operation occurs, by default None

        Returns
        -------
        hangar_service_pb2.PushDataReply
            standard error proto indicating success

        Raises
        ------
        KeyError
            if one of the input digests does not exist on the client
        rpc_error
            if the server received corrupt data
        """
        CONFIG_COMPRESSION_IS_DESIRED = True
        try:
            specs = {}
            request_stack = []
            hashTxn = TxnRegister().begin_reader_txn(self.env.hashenv)
            for digest in digests:
                hashKey = hash_data_db_key_from_raw_key(digest)
                hashVal = hashTxn.get(hashKey, default=False)
                if not hashVal:
                    raise KeyError(f'No hash record with key: {hashKey}')

                be_loc = backend_decoder(hashVal)
                specs[digest] = be_loc  # saving for later so no recompute cost

                if be_loc.backend in ['01', '00', '10']:
                    dtype = hangar_service_pb2.DataType.NP_ARRAY
                elif be_loc.backend == '30':
                    dtype = hangar_service_pb2.DataType.STR
                elif be_loc.backend == '31':
                    dtype = hangar_service_pb2.DataType.BYTES
                else:
                    raise TypeError(be_loc)

                _request = hangar_service_pb2.PushFindDataOriginRequest(
                    data_type=dtype,
                    digest=digest,
                    compression_is_desired=CONFIG_COMPRESSION_IS_DESIRED)
                request_stack.append(_request)
        finally:
            TxnRegister().abort_reader_txn(self.env.hashenv)

        def request_stack_iterator(request_stack):
            for request in request_stack:
                yield request

        requestIter = request_stack_iterator(request_stack)
        replies: Iterable[hangar_service_pb2.PushFindDataOriginReply]
        replies = self.stub.PushFindDataOrigin(requestIter)

        try:
            for k in self._rFs.keys():
                self._rFs[k].__enter__()

            def push_request_iterator(raw, uri, data_type, schema_hash):
                push_request = hangar_service_pb2.PushDataRequest(
                    uri=uri,
                    nbytes=len(raw),
                    data_type=data_type,
                    schema_hash=schema_hash)
                for raw_chunk in chunks.chunk_bytes(raw):
                    push_request.raw_data = raw_chunk
                    yield push_request

            def push_data_parallel(reply):
                be_loc = specs[reply.digest]
                data = self._rFs[be_loc.backend].read_data(be_loc)
                _, raw_data = chunks.serialize_data(data)

                if reply.compression_expected is True:
                    compressed_record = blosc.compress(
                        raw_data, clevel=3, cname='blosclz', shuffle=blosc.NOSHUFFLE)
                else:
                    compressed_record = raw_data

                if be_loc.backend in ['01', '00', '10']:
                    dtype = hangar_service_pb2.DataType.NP_ARRAY
                elif be_loc.backend == '30':
                    dtype = hangar_service_pb2.DataType.STR
                elif be_loc.backend == '31':
                    dtype = hangar_service_pb2.DataType.BYTES
                else:
                    raise TypeError(be_loc)

                pushDataIter = push_request_iterator(compressed_record, reply.uri, dtype, schema_hash)
                push_data_response = self.stub.PushData(pushDataIter)
                return push_data_response

            nWorkers = calc_num_threadpool_workers()
            with concurrent.futures.ThreadPoolExecutor(max_workers=nWorkers) as executor:
                push_futures = tuple((executor.submit(push_data_parallel, reply) for reply in replies))
                for future in concurrent.futures.as_completed(push_futures):
                    _ = future.result()
                    pbar.update(1)

        except grpc.RpcError as rpc_error:
            logger.error(rpc_error)
            raise rpc_error

        finally:
            for k in self._rFs.keys():
                self._rFs[k].__exit__()

    def fetch_find_missing_commits(self, branch_name):

        c_commits = commiting.list_all_commits(self.env.refenv)
        branch_rec = hangar_service_pb2.BranchRecord(name=branch_name)
        request = hangar_service_pb2.FindMissingCommitsRequest()
        request.commits.extend(c_commits)
        request.branch.CopyFrom(branch_rec)
        reply = self.stub.FetchFindMissingCommits(request)
        return reply

    def push_find_missing_commits(self, branch_name):
        branch_commits = summarize.list_history(
            refenv=self.env.refenv,
            branchenv=self.env.branchenv,
            branch_name=branch_name)
        branch_rec = hangar_service_pb2.BranchRecord(
            name=branch_name, commit=branch_commits['head'])

        request = hangar_service_pb2.FindMissingCommitsRequest()
        request.commits.extend(branch_commits['order'])
        request.branch.CopyFrom(branch_rec)
        reply = self.stub.PushFindMissingCommits(request)
        return reply

    def fetch_find_missing_hash_records(self, commit):

        all_hashs = hashs.HashQuery(self.env.hashenv).list_all_hash_keys_raw()
        all_hashs_raw = [chunks.serialize_ident(digest, '') for digest in all_hashs]
        raw_pack = chunks.serialize_record_pack(all_hashs_raw)
        pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest
        cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func)
        responses = self.stub.FetchFindMissingHashRecords(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset: offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        raw_idents = chunks.deserialize_record_pack(uncompBytes)
        idents = [chunks.deserialize_ident(raw) for raw in raw_idents]
        return idents

    def push_find_missing_hash_records(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
                c_hashes = list(set(c_hashs_schemas.keys()))
                tmpDB.close()
        else:
            c_hashs_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
            c_hashes = list(set(c_hashs_schemas.keys()))

        c_hashs_raw = [chunks.serialize_ident(digest, '') for digest in c_hashes]
        raw_pack = chunks.serialize_record_pack(c_hashs_raw)
        pb2_func = hangar_service_pb2.FindMissingHashRecordsRequest
        cIter = chunks.missingHashRequestIterator(commit, raw_pack, pb2_func)

        responses = self.stub.PushFindMissingHashRecords(cIter)
        for idx, response in enumerate(responses):
            if idx == 0:
                hBytes, offset = bytearray(response.total_byte_size), 0
            size = len(response.hashs)
            hBytes[offset: offset + size] = response.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        s_missing_raw = chunks.deserialize_record_pack(uncompBytes)
        s_mis_hsh = [chunks.deserialize_ident(raw).digest for raw in s_missing_raw]
        s_mis_hsh_sch = [(s_hsh, c_hashs_schemas[s_hsh]) for s_hsh in s_mis_hsh]
        return s_mis_hsh_sch

    def fetch_find_missing_schemas(self, commit):
        c_schemaset = set(hashs.HashQuery(self.env.hashenv).list_all_schema_digests())
        c_schemas = list(c_schemaset)

        request = hangar_service_pb2.FindMissingSchemasRequest()
        request.commit = commit
        request.schema_digests.extend(c_schemas)

        response = self.stub.FetchFindMissingSchemas(request)
        return response

    def push_find_missing_schemas(self, commit, tmpDB: lmdb.Environment = None):

        if tmpDB is None:
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = os.path.join(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
                commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
                c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes())
                c_schemas = list(c_schemaset)
                tmpDB.close()
        else:
            c_schemaset = set(queries.RecordQuery(tmpDB).schema_hashes())
            c_schemas = list(c_schemaset)

        request = hangar_service_pb2.FindMissingSchemasRequest()
        request.commit = commit
        request.schema_digests.extend(c_schemas)

        response = self.stub.PushFindMissingSchemas(request)
        return response


================================================
FILE: src/hangar/remote/config_server.ini
================================================
[SERVER_GRPC]
channel_address = [::]:50051
max_thread_pool_workers = 200
max_concurrent_rpcs = 100
enable_compression = NoCompression
optimization_target = blend
fetch_max_nbytes = 500_000_000

[SERVER_ADMIN]
restrict_push = 0
username = --none--
password = --none--

[CLIENT_GRPC]
enable_compression = NoCompression
optimization_target = blend
push_max_nbytes = 600_000_000


================================================
FILE: src/hangar/remote/content.py
================================================
from typing import NamedTuple, Union, Optional

import numpy as np

from ..columns.constructors import open_file_handles, column_type_object_from_schema
from ..context import Environments
from ..records import (
    parsing,
    schema_spec_from_db_val,
    hash_schema_db_key_from_raw_key,
    hash_data_db_key_from_raw_key
)
from ..txnctx import TxnRegister


class ContentWriter(object):
    """Common methods to client & server which write content received.

    These are special methods configured especially for remote operations.
    They do not honor the public facing API or data write/read conventions
    established for users or the rest of Hangar internals.

    Parameters
    ----------
    envs
        main hangar environment context object.
    """

    def __init__(self, envs: Environments):

        self.env: Environments = envs
        self.txnctx: TxnRegister = TxnRegister()

    def commit(self, commit: str, parentVal: bytes, specVal: bytes,
               refVal: bytes) -> Union[str, bool]:
        """Write a commit record to the ref db

        Parameters
        ----------
        commit
            commit hash to write
        parentVal
            db formatted representation of commit parents
        specVal
            db formatted representation of the commit specs
        refVal
            db formated representation of commit record contents

        Returns
        -------
        str or False
            Commit hash if operation was successful.

            False if the commit hash existed in the db previously and
            no records were written.
        """
        commitSpecKey = parsing.commit_spec_db_key_from_raw_key(commit)
        commitParentKey = parsing.commit_parent_db_key_from_raw_key(commit)
        commitRefKey = parsing.commit_ref_db_key_from_raw_key(commit)
        refTxn = self.txnctx.begin_writer_txn(self.env.refenv)
        try:
            cmtParExists = refTxn.put(commitParentKey, parentVal, overwrite=False)
            cmtRefExists = refTxn.put(commitRefKey, refVal, overwrite=False)
            cmtSpcExists = refTxn.put(commitSpecKey, specVal, overwrite=False)
        finally:
            self.txnctx.commit_writer_txn(self.env.refenv)

        ret = False if not all([cmtParExists, cmtRefExists, cmtSpcExists]) else commit
        return ret

    def schema(self, schema_hash: str, schemaVal: bytes) -> Union[str, bool]:
        """Write a column schema hash specification record to the db

        Parameters
        ----------
        schema_hash
            schema hash being written
        schemaVal
            db formatted representation of schema specification

        Returns
        -------
        str or False
            schema_hash written if operation was successful.

            False if the schema_hash existed in db and no records written.
        """
        schemaKey = hash_schema_db_key_from_raw_key(schema_hash)
        hashTxn = self.txnctx.begin_writer_txn(self.env.hashenv)
        try:
            schemaExists = hashTxn.put(schemaKey, schemaVal, overwrite=False)
        finally:
            self.txnctx.commit_writer_txn(self.env.hashenv)

        ret = False if not schemaExists else schema_hash
        return ret


class DataWriter:

    def __init__(self, envs):

        self.env: Environments = envs
        self.txnctx: TxnRegister = TxnRegister()

        self._schema_hash_be_accessors = {}
        self._schema_hash_objects = {}
        self._is_cm = False

    def __enter__(self):
        self._is_cm = True
        self.hashTxn = self.txnctx.begin_writer_txn(self.env.hashenv)
        return self

    def __exit__(self, *exc):
        for be in self._schema_hash_be_accessors.values():
            be.close()
        self.txnctx.commit_writer_txn(self.env.hashenv)
        self._schema_hash_be_accessors.clear()
        self._schema_hash_objects.clear()
        self._is_cm = False
        self.hashTxn = None

    @property
    def is_cm(self):
        return self._is_cm

    def _open_new_backend(self, schema):
        be_accessor = open_file_handles(backends=[schema.backend],
                                        path=self.env.repo_path,
                                        mode='a',
                                        schema=schema,
                                        remote_operation=True)[schema.backend]
        self._schema_hash_be_accessors[schema.schema_hash_digest()] = be_accessor

    def _get_schema_object(self, schema_hash):
        schemaKey = hash_schema_db_key_from_raw_key(schema_hash)
        schemaVal = self.hashTxn.get(schemaKey)

        schema_val = schema_spec_from_db_val(schemaVal)
        schema = column_type_object_from_schema(schema_val)

        if schema_hash != schema.schema_hash_digest():
            raise RuntimeError(schema.__dict__)

        self._schema_hash_objects[schema_hash] = schema
        return schema

    def _get_changed_schema_object(self, schema_hash, backend, backend_options):
        import copy
        if schema_hash in self._schema_hash_objects:
            base_schema = copy.deepcopy(self._schema_hash_objects[schema_hash])
        else:
            base_schema = copy.deepcopy(self._get_schema_object(schema_hash))

        base_schema.change_backend(backend, backend_options=backend_options)
        changed_schema = self._schema_hash_objects.setdefault(base_schema.schema_hash_digest(), base_schema)
        return changed_schema

    def data(self,
             schema_hash: str,
             data_digest: str,
             data: Union[str, int, np.ndarray],
             backend: Optional[str] = None,
             backend_options: Optional[dict] = None) -> str:
        """Write data content to the hash records database

        Parameters
        ----------
        schema_hash
            schema_hash currently being written
        data_digest
            digest to write
        data
            actual piece of data to write
        backend
            Manually specified backend code which will be used to record the
            data records. If not specified (``None``), the default backend
            recorded in the schema spec will be used, by default None
        backend_options
            dict specifying backend options to use

        Returns
        -------
        str
            data digest written by this method.
        """
        if schema_hash not in self._schema_hash_objects:
            self._get_schema_object(schema_hash)
        schema = self._schema_hash_objects[schema_hash]
        if (backend is not None) and ((backend != schema.backend) or (backend_options is not None)):
            schema = self._get_changed_schema_object(schema_hash, backend, backend_options)

        # Need because after changing, the schema_hash of the backend changes
        final_schema_hash = schema.schema_hash_digest()
        if final_schema_hash not in self._schema_hash_be_accessors:
            self._open_new_backend(schema)

        be_accessor = self._schema_hash_be_accessors[final_schema_hash]
        hashVal = be_accessor.write_data(data, remote_operation=True)
        hashKey = hash_data_db_key_from_raw_key(data_digest)
        self.hashTxn.put(hashKey, hashVal)
        return data_digest


RawCommitContent = NamedTuple('RawCommitContent', [('commit', str),
                                                   ('cmtParentVal', bytes),
                                                   ('cmtSpecVal', bytes),
                                                   ('cmtRefVal', bytes)])


class ContentReader(object):
    """Common methods to client & server which read content.

    These are special methods configured especially for remote operations.
    They do not honor the public facing API or data write/read conventions
    established for users or the rest of Hangar internals.

    Parameters
    ----------
    envs : context.Environments
        main hangar environment context object.
    """
    def __init__(self, envs):

        self.env: Environments = envs
        self.txnctx: TxnRegister = TxnRegister()

    def commit(self, commit: str) -> Union[RawCommitContent, bool]:
        """Read a commit with a given hash and get db formatted content

        Parameters
        ----------
        commit
            commit hash to read from the ref db

        Returns
        -------
        namedtuple or False
            nametuple with typename = RawCommitContent field_names = ('commit',
            'cmtParentVal', 'cmtSpecVal', 'cmtRefVal') if operation successful.

            False if commit does not exist with provided digest.
        """
        cmtRefKey = parsing.commit_ref_db_key_from_raw_key(commit)
        cmtParentKey = parsing.commit_parent_db_key_from_raw_key(commit)
        cmtSpecKey = parsing.commit_spec_db_key_from_raw_key(commit)

        reftxn = self.txnctx.begin_reader_txn(self.env.refenv)
        try:
            cmtRefVal = reftxn.get(cmtRefKey, default=False)
            cmtParentVal = reftxn.get(cmtParentKey, default=False)
            cmtSpecVal = reftxn.get(cmtSpecKey, default=False)
        finally:
            self.txnctx.abort_reader_txn(self.env.refenv)

        ret = RawCommitContent(commit, cmtParentVal, cmtSpecVal, cmtRefVal)

        if not all(ret) and not isinstance(ret.cmtParentVal, bytes):
            return False
        else:
            return ret

    def schema(self, schema_hash: str) -> Union[bytes, bool]:
        """Read db formatted schema val for a schema hash

        Parameters
        ----------
        schema_hash
            schema hash to look up

        Returns
        -------
        bytes or False
            db formatted representation of schema bytes if schema_hash exists

            False if the schema_hash does not exist in the db.
        """
        schemaKey = hash_schema_db_key_from_raw_key(schema_hash)
        hashTxn = self.txnctx.begin_reader_txn(self.env.hashenv)
        try:
            schemaVal = hashTxn.get(schemaKey, default=False)
        finally:
            self.txnctx.abort_reader_txn(self.env.hashenv)

        ret = False if not schemaVal else schemaVal
        return ret


================================================
FILE: src/hangar/remote/hangar_service.proto
================================================
syntax = "proto3";

package hangar;
option optimize_for = SPEED;


service HangarService {

    rpc PING (PingRequest) returns (PingReply) {}
    rpc GetClientConfig (GetClientConfigRequest) returns (GetClientConfigReply) {}

    rpc FetchBranchRecord (FetchBranchRecordRequest) returns (FetchBranchRecordReply) {}
    rpc FetchData (FetchDataRequest) returns (stream FetchDataReply) {}
    rpc FetchCommit (FetchCommitRequest) returns (stream FetchCommitReply) {}
    rpc FetchSchema (FetchSchemaRequest) returns (FetchSchemaReply) {}

    rpc PushBranchRecord (PushBranchRecordRequest) returns (PushBranchRecordReply) {}
    rpc PushData (stream PushDataRequest) returns (PushDataReply) {}
    rpc PushCommit (stream PushCommitRequest) returns (PushCommitReply) {}
    rpc PushSchema (PushSchemaRequest) returns (PushSchemaReply) {}

    rpc FetchFindMissingCommits (FindMissingCommitsRequest) returns (FindMissingCommitsReply) {}
    rpc FetchFindMissingHashRecords (stream FindMissingHashRecordsRequest) returns (stream FindMissingHashRecordsReply) {}
    rpc FetchFindMissingSchemas (FindMissingSchemasRequest) returns (FindMissingSchemasReply) {}

    rpc PushFindMissingCommits (FindMissingCommitsRequest) returns (FindMissingCommitsReply) {}
    rpc PushFindMissingHashRecords (stream FindMissingHashRecordsRequest) returns (stream FindMissingHashRecordsReply) {}
    rpc PushFindMissingSchemas (FindMissingSchemasRequest) returns (FindMissingSchemasReply) {}

    rpc FetchFindDataOrigin (stream DataOriginRequest) returns (stream DataOriginReply) {}
    rpc PushFindDataOrigin (stream PushFindDataOriginRequest) returns (stream PushFindDataOriginReply) {}
    rpc PushBeginContext (PushBeginContextRequest) returns (PushBeginContextReply) {}
    rpc PushEndContext (PushEndContextRequest) returns (PushEndContextReply) {}
}


/*
-------------------------------------------------------------------------------
| Common Formats for Data and Records
-------------------------------------------------------------------------------
*/


message PushBeginContextRequest {
    // TODO: make this field actually do something
    string client_uuid = 1;
}
message PushBeginContextReply {
    ErrorProto err = 1;
}


message PushEndContextRequest {
    // TODO: make this field actually do something
    string client_uuid = 1;
}
message PushEndContextReply {
    ErrorProto err = 1;
}



message ErrorProto {
    // binary indicator of success. 1: success, 0: failed
    int64 code = 1;
    // string response indicating success. 'OK': success, 'ERROR': failed
    string message = 2;
  }


message BranchRecord {
    // name of the branch
    string name = 1;
    // branch head commit hash
    string commit = 2;
}

message HashRecord {
    // specific hash algorithm used to calculate the digest
    string type = 1;
    // (hex)digest of the hash record
    string digest = 2;
}

message CommitRecord {
    // parent hash(s) of the commit in same format as local store
    bytes parent = 1;
    // compressed record reference contents of the commit
    bytes ref = 2;
    // metadata attached to the commit record (username, email, message, time, etc.)
    bytes spec = 3;
}


message SchemaRecord {
    // hash of the schema def
    string digest = 1;
    // encoded schema val to be sent
    bytes blob = 2;
}

message DataOriginRequest {
    string digest = 1;
}


enum DataLocation {
    // Server Side Local Disk
    REMOTE_SERVER = 0;
    // Minio Instance
    MINIO = 1;
    // AWS S3
    S3 = 2;
    // Google Cloud Store
    GCS = 3;
    // Azure Blob Store
    ABS = 4;
}

enum DataType {
    NP_ARRAY = 0;
    SCHEMA = 1;  // Do not know if intend to share this or not.
    STR = 2;
    BYTES = 3;
}


message DataOriginReply {
    DataLocation location = 1;
    DataType data_type = 2;
    string digest = 3;
    string uri = 4;
    bool compression = 5;
    map compression_opts = 6;
}


message PushFindDataOriginRequest {
    DataType data_type = 1;
    string digest = 2;
    bool compression_is_desired = 3;
}

message PushFindDataOriginReply {
    string digest = 1;
    DataLocation location = 2;
    string uri = 3;
    bool compression_expected = 5;
    map compression_opts_expected = 6;
}



/*
-------------------------------------------------------------------------------
| Client Config from Server
-------------------------------------------------------------------------------
*/

message PingRequest {}

message PingReply {
    string result = 1;
}


message GetClientConfigRequest {}

message GetClientConfigReply {
    // dictionary style map of the config options
    map config = 1;
    // success or not
    ErrorProto error = 2;
}


/*
-------------------------------------------------------------------------------
| Fetching Data and Records
-------------------------------------------------------------------------------
*/


message FetchBranchRecordRequest {
    // name of the branch to fetch
    BranchRecord rec = 1;
}
message FetchBranchRecordReply {
    // record result of the branch
    BranchRecord rec = 1;
    // success or not
    ErrorProto error = 2;
}


message FetchDataRequest {
    string uri = 1;
//    bytes raw_data = 1;
//    // total size of the split tensorprotos
//    int64 comp_nbytes = 2;
//    // total size of the uncompressed raw data
//    int64 uncomp_nbytes = 3;
//    // string schema_hash = 4;
//    ErrorProto error = 4;
}


message FetchDataReply {
    string uri = 1;
    // data container for the tensor
    bytes raw_data = 2;
    // total number of bytes
    int64 nbytes = 3;
//    // total size of the split tensorprotos
//    int64 comp_nbytes = 2;
//    // total size of the uncompressed raw data
//    int64 uncomp_nbytes = 3;
    // success or not
    ErrorProto error = 4;
}



message FetchCommitRequest {
    // (hex)digest of the commit to fetch references to
    string commit = 1;
}
message FetchCommitReply {
    // (hex)digest hash of the commit record
    string commit = 1;
    // total size of bytes
    int64 total_byte_size = 2;
    // data
    CommitRecord record = 3;
    // success or not
    ErrorProto error = 4;
}


message FetchSchemaRequest {
    // schema record spec with hash specified
    SchemaRecord rec = 1;
}
message FetchSchemaReply {
    // schema record spec
    SchemaRecord rec = 1;
    // success or not
    ErrorProto error = 2;
}


/*
-------------------------------------------------------------------------------
| Pushing Data and Records
-------------------------------------------------------------------------------
*/


message PushBranchRecordRequest {
    // branch record to push
    BranchRecord rec = 1;
}
message PushBranchRecordReply {
    // success or not
    ErrorProto error = 1;
}

message PushDataRequest {
    string uri = 1;
    // data container for the tensor
    bytes raw_data = 2;
    // total number of bytes
    int64 nbytes = 3;
    // data type of the contents
    DataType data_type = 4;
    // TODO: Remove need for schema hash
    string schema_hash = 5;
}
message PushDataReply {
    // success or not
    ErrorProto error = 1;
}


message PushCommitRequest {
    // (hex)digest hash of the commit record
    string commit = 1;
    // total size of bytes
    int64 total_byte_size = 2;
    // data
    CommitRecord record = 3;
}
message PushCommitReply {
    // success or not
    ErrorProto error = 1;
}

message PushSchemaRequest {
    SchemaRecord rec = 1;
}
message PushSchemaReply {
    // SchemaRecord rec = 1;
    ErrorProto error = 1;
}



/*
-------------------------------------------------------------------------------
| Fetch Finding outdated records
-------------------------------------------------------------------------------
*/

message FindMissingCommitsRequest {
    // list of commits existing on one side
    repeated string commits = 1;
    // branch to query
    BranchRecord branch = 2;
}
message FindMissingCommitsReply {
    // list of commits existing on one side but not the other in requested branch.
    repeated string commits = 1;
    // branch to query
    BranchRecord branch = 2;
    // success or not
    ErrorProto error = 3;
}


message FindMissingHashRecordsRequest {
    // commit hash to check hash records for
    string commit = 1;
    // all hashes existing on a side
    bytes hashs = 2;
    // total byte size
    int64 total_byte_size = 3;
}
message FindMissingHashRecordsReply {
    // commit hash specified
    string commit = 1;
    // all hashes existing on a side
    bytes hashs = 2;
    // number of hashes total
    int64 total_byte_size = 3;
    // success or not
    ErrorProto error = 4;
}



message FindMissingSchemasRequest {
    // commit hash specified
    string commit = 1;
    // schema records on that side
    repeated string schema_digests = 2;
}
message FindMissingSchemasReply {
    // commit hash specified
    string commit = 1;
    // schema records on that side
    repeated string schema_digests = 2;
    // success or not
    ErrorProto error = 3;
}


================================================
FILE: src/hangar/remote/hangar_service_pb2.py
================================================
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: hangar_service.proto

from google.protobuf.internal import enum_type_wrapper
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()




DESCRIPTOR = _descriptor.FileDescriptor(
  name='hangar_service.proto',
  package='hangar',
  syntax='proto3',
  serialized_options=b'H\001',
  serialized_pb=b'\n\x14hangar_service.proto\x12\x06hangar\".\n\x17PushBeginContextRequest\x12\x13\n\x0b\x63lient_uuid\x18\x01 \x01(\t\"8\n\x15PushBeginContextReply\x12\x1f\n\x03\x65rr\x18\x01 \x01(\x0b\x32\x12.hangar.ErrorProto\",\n\x15PushEndContextRequest\x12\x13\n\x0b\x63lient_uuid\x18\x01 \x01(\t\"6\n\x13PushEndContextReply\x12\x1f\n\x03\x65rr\x18\x01 \x01(\x0b\x32\x12.hangar.ErrorProto\"+\n\nErrorProto\x12\x0c\n\x04\x63ode\x18\x01 \x01(\x03\x12\x0f\n\x07message\x18\x02 \x01(\t\",\n\x0c\x42ranchRecord\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06\x63ommit\x18\x02 \x01(\t\"*\n\nHashRecord\x12\x0c\n\x04type\x18\x01 \x01(\t\x12\x0e\n\x06\x64igest\x18\x02 \x01(\t\"9\n\x0c\x43ommitRecord\x12\x0e\n\x06parent\x18\x01 \x01(\x0c\x12\x0b\n\x03ref\x18\x02 \x01(\x0c\x12\x0c\n\x04spec\x18\x03 \x01(\x0c\",\n\x0cSchemaRecord\x12\x0e\n\x06\x64igest\x18\x01 \x01(\t\x12\x0c\n\x04\x62lob\x18\x02 \x01(\x0c\"#\n\x11\x44\x61taOriginRequest\x12\x0e\n\x06\x64igest\x18\x01 \x01(\t\"\x90\x02\n\x0f\x44\x61taOriginReply\x12&\n\x08location\x18\x01 \x01(\x0e\x32\x14.hangar.DataLocation\x12#\n\tdata_type\x18\x02 \x01(\x0e\x32\x10.hangar.DataType\x12\x0e\n\x06\x64igest\x18\x03 \x01(\t\x12\x0b\n\x03uri\x18\x04 \x01(\t\x12\x13\n\x0b\x63ompression\x18\x05 \x01(\x08\x12\x46\n\x10\x63ompression_opts\x18\x06 \x03(\x0b\x32,.hangar.DataOriginReply.CompressionOptsEntry\x1a\x36\n\x14\x43ompressionOptsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"p\n\x19PushFindDataOriginRequest\x12#\n\tdata_type\x18\x01 \x01(\x0e\x32\x10.hangar.DataType\x12\x0e\n\x06\x64igest\x18\x02 \x01(\t\x12\x1e\n\x16\x63ompression_is_desired\x18\x03 \x01(\x08\"\x9d\x02\n\x17PushFindDataOriginReply\x12\x0e\n\x06\x64igest\x18\x01 \x01(\t\x12&\n\x08location\x18\x02 \x01(\x0e\x32\x14.hangar.DataLocation\x12\x0b\n\x03uri\x18\x03 \x01(\t\x12\x1c\n\x14\x63ompression_expected\x18\x05 \x01(\x08\x12_\n\x19\x63ompression_opts_expected\x18\x06 \x03(\x0b\x32<.hangar.PushFindDataOriginReply.CompressionOptsExpectedEntry\x1a>\n\x1c\x43ompressionOptsExpectedEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\r\n\x0bPingRequest\"\x1b\n\tPingReply\x12\x0e\n\x06result\x18\x01 \x01(\t\"\x18\n\x16GetClientConfigRequest\"\xa2\x01\n\x14GetClientConfigReply\x12\x38\n\x06\x63onfig\x18\x01 \x03(\x0b\x32(.hangar.GetClientConfigReply.ConfigEntry\x12!\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x12.hangar.ErrorProto\x1a-\n\x0b\x43onfigEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"=\n\x18\x46\x65tchBranchRecordRequest\x12!\n\x03rec\x18\x01 \x01(\x0b\x32\x14.hangar.BranchRecord\"^\n\x16\x46\x65tchBranchRecordReply\x12!\n\x03rec\x18\x01 \x01(\x0b\x32\x14.hangar.BranchRecord\x12!\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x12.hangar.ErrorProto\"\x1f\n\x10\x46\x65tchDataRequest\x12\x0b\n\x03uri\x18\x01 \x01(\t\"b\n\x0e\x46\x65tchDataReply\x12\x0b\n\x03uri\x18\x01 \x01(\t\x12\x10\n\x08raw_data\x18\x02 \x01(\x0c\x12\x0e\n\x06nbytes\x18\x03 \x01(\x03\x12!\n\x05\x65rror\x18\x04 \x01(\x0b\x32\x12.hangar.ErrorProto\"$\n\x12\x46\x65tchCommitRequest\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\"\x84\x01\n\x10\x46\x65tchCommitReply\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\x12\x17\n\x0ftotal_byte_size\x18\x02 \x01(\x03\x12$\n\x06record\x18\x03 \x01(\x0b\x32\x14.hangar.CommitRecord\x12!\n\x05\x65rror\x18\x04 \x01(\x0b\x32\x12.hangar.ErrorProto\"7\n\x12\x46\x65tchSchemaRequest\x12!\n\x03rec\x18\x01 \x01(\x0b\x32\x14.hangar.SchemaRecord\"X\n\x10\x46\x65tchSchemaReply\x12!\n\x03rec\x18\x01 \x01(\x0b\x32\x14.hangar.SchemaRecord\x12!\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x12.hangar.ErrorProto\"<\n\x17PushBranchRecordRequest\x12!\n\x03rec\x18\x01 \x01(\x0b\x32\x14.hangar.BranchRecord\":\n\x15PushBranchRecordReply\x12!\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x12.hangar.ErrorProto\"z\n\x0fPushDataRequest\x12\x0b\n\x03uri\x18\x01 \x01(\t\x12\x10\n\x08raw_data\x18\x02 \x01(\x0c\x12\x0e\n\x06nbytes\x18\x03 \x01(\x03\x12#\n\tdata_type\x18\x04 \x01(\x0e\x32\x10.hangar.DataType\x12\x13\n\x0bschema_hash\x18\x05 \x01(\t\"2\n\rPushDataReply\x12!\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x12.hangar.ErrorProto\"b\n\x11PushCommitRequest\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\x12\x17\n\x0ftotal_byte_size\x18\x02 \x01(\x03\x12$\n\x06record\x18\x03 \x01(\x0b\x32\x14.hangar.CommitRecord\"4\n\x0fPushCommitReply\x12!\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x12.hangar.ErrorProto\"6\n\x11PushSchemaRequest\x12!\n\x03rec\x18\x01 \x01(\x0b\x32\x14.hangar.SchemaRecord\"4\n\x0fPushSchemaReply\x12!\n\x05\x65rror\x18\x01 \x01(\x0b\x32\x12.hangar.ErrorProto\"R\n\x19\x46indMissingCommitsRequest\x12\x0f\n\x07\x63ommits\x18\x01 \x03(\t\x12$\n\x06\x62ranch\x18\x02 \x01(\x0b\x32\x14.hangar.BranchRecord\"s\n\x17\x46indMissingCommitsReply\x12\x0f\n\x07\x63ommits\x18\x01 \x03(\t\x12$\n\x06\x62ranch\x18\x02 \x01(\x0b\x32\x14.hangar.BranchRecord\x12!\n\x05\x65rror\x18\x03 \x01(\x0b\x32\x12.hangar.ErrorProto\"W\n\x1d\x46indMissingHashRecordsRequest\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\x12\r\n\x05hashs\x18\x02 \x01(\x0c\x12\x17\n\x0ftotal_byte_size\x18\x03 \x01(\x03\"x\n\x1b\x46indMissingHashRecordsReply\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\x12\r\n\x05hashs\x18\x02 \x01(\x0c\x12\x17\n\x0ftotal_byte_size\x18\x03 \x01(\x03\x12!\n\x05\x65rror\x18\x04 \x01(\x0b\x32\x12.hangar.ErrorProto\"C\n\x19\x46indMissingSchemasRequest\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\x12\x16\n\x0eschema_digests\x18\x02 \x03(\t\"d\n\x17\x46indMissingSchemasReply\x12\x0e\n\x06\x63ommit\x18\x01 \x01(\t\x12\x16\n\x0eschema_digests\x18\x02 \x03(\t\x12!\n\x05\x65rror\x18\x03 \x01(\x0b\x32\x12.hangar.ErrorProto*F\n\x0c\x44\x61taLocation\x12\x11\n\rREMOTE_SERVER\x10\x00\x12\t\n\x05MINIO\x10\x01\x12\x06\n\x02S3\x10\x02\x12\x07\n\x03GCS\x10\x03\x12\x07\n\x03\x41\x42S\x10\x04*8\n\x08\x44\x61taType\x12\x0c\n\x08NP_ARRAY\x10\x00\x12\n\n\x06SCHEMA\x10\x01\x12\x07\n\x03STR\x10\x02\x12\t\n\x05\x42YTES\x10\x03\x32\x9a\r\n\rHangarService\x12\x30\n\x04PING\x12\x13.hangar.PingRequest\x1a\x11.hangar.PingReply\"\x00\x12Q\n\x0fGetClientConfig\x12\x1e.hangar.GetClientConfigRequest\x1a\x1c.hangar.GetClientConfigReply\"\x00\x12W\n\x11\x46\x65tchBranchRecord\x12 .hangar.FetchBranchRecordRequest\x1a\x1e.hangar.FetchBranchRecordReply\"\x00\x12\x41\n\tFetchData\x12\x18.hangar.FetchDataRequest\x1a\x16.hangar.FetchDataReply\"\x00\x30\x01\x12G\n\x0b\x46\x65tchCommit\x12\x1a.hangar.FetchCommitRequest\x1a\x18.hangar.FetchCommitReply\"\x00\x30\x01\x12\x45\n\x0b\x46\x65tchSchema\x12\x1a.hangar.FetchSchemaRequest\x1a\x18.hangar.FetchSchemaReply\"\x00\x12T\n\x10PushBranchRecord\x12\x1f.hangar.PushBranchRecordRequest\x1a\x1d.hangar.PushBranchRecordReply\"\x00\x12>\n\x08PushData\x12\x17.hangar.PushDataRequest\x1a\x15.hangar.PushDataReply\"\x00(\x01\x12\x44\n\nPushCommit\x12\x19.hangar.PushCommitRequest\x1a\x17.hangar.PushCommitReply\"\x00(\x01\x12\x42\n\nPushSchema\x12\x19.hangar.PushSchemaRequest\x1a\x17.hangar.PushSchemaReply\"\x00\x12_\n\x17\x46\x65tchFindMissingCommits\x12!.hangar.FindMissingCommitsRequest\x1a\x1f.hangar.FindMissingCommitsReply\"\x00\x12o\n\x1b\x46\x65tchFindMissingHashRecords\x12%.hangar.FindMissingHashRecordsRequest\x1a#.hangar.FindMissingHashRecordsReply\"\x00(\x01\x30\x01\x12_\n\x17\x46\x65tchFindMissingSchemas\x12!.hangar.FindMissingSchemasRequest\x1a\x1f.hangar.FindMissingSchemasReply\"\x00\x12^\n\x16PushFindMissingCommits\x12!.hangar.FindMissingCommitsRequest\x1a\x1f.hangar.FindMissingCommitsReply\"\x00\x12n\n\x1aPushFindMissingHashRecords\x12%.hangar.FindMissingHashRecordsRequest\x1a#.hangar.FindMissingHashRecordsReply\"\x00(\x01\x30\x01\x12^\n\x16PushFindMissingSchemas\x12!.hangar.FindMissingSchemasRequest\x1a\x1f.hangar.FindMissingSchemasReply\"\x00\x12O\n\x13\x46\x65tchFindDataOrigin\x12\x19.hangar.DataOriginRequest\x1a\x17.hangar.DataOriginReply\"\x00(\x01\x30\x01\x12^\n\x12PushFindDataOrigin\x12!.hangar.PushFindDataOriginRequest\x1a\x1f.hangar.PushFindDataOriginReply\"\x00(\x01\x30\x01\x12T\n\x10PushBeginContext\x12\x1f.hangar.PushBeginContextRequest\x1a\x1d.hangar.PushBeginContextReply\"\x00\x12N\n\x0ePushEndContext\x12\x1d.hangar.PushEndContextRequest\x1a\x1b.hangar.PushEndContextReply\"\x00\x42\x02H\x01\x62\x06proto3'
)

_DATALOCATION = _descriptor.EnumDescriptor(
  name='DataLocation',
  full_name='hangar.DataLocation',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='REMOTE_SERVER', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='MINIO', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='S3', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='GCS', index=3, number=3,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='ABS', index=4, number=4,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=3186,
  serialized_end=3256,
)
_sym_db.RegisterEnumDescriptor(_DATALOCATION)

DataLocation = enum_type_wrapper.EnumTypeWrapper(_DATALOCATION)
_DATATYPE = _descriptor.EnumDescriptor(
  name='DataType',
  full_name='hangar.DataType',
  filename=None,
  file=DESCRIPTOR,
  values=[
    _descriptor.EnumValueDescriptor(
      name='NP_ARRAY', index=0, number=0,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='SCHEMA', index=1, number=1,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='STR', index=2, number=2,
      serialized_options=None,
      type=None),
    _descriptor.EnumValueDescriptor(
      name='BYTES', index=3, number=3,
      serialized_options=None,
      type=None),
  ],
  containing_type=None,
  serialized_options=None,
  serialized_start=3258,
  serialized_end=3314,
)
_sym_db.RegisterEnumDescriptor(_DATATYPE)

DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
REMOTE_SERVER = 0
MINIO = 1
S3 = 2
GCS = 3
ABS = 4
NP_ARRAY = 0
SCHEMA = 1
STR = 2
BYTES = 3



_PUSHBEGINCONTEXTREQUEST = _descriptor.Descriptor(
  name='PushBeginContextRequest',
  full_name='hangar.PushBeginContextRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='client_uuid', full_name='hangar.PushBeginContextRequest.client_uuid', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=32,
  serialized_end=78,
)


_PUSHBEGINCONTEXTREPLY = _descriptor.Descriptor(
  name='PushBeginContextReply',
  full_name='hangar.PushBeginContextReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='err', full_name='hangar.PushBeginContextReply.err', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=80,
  serialized_end=136,
)


_PUSHENDCONTEXTREQUEST = _descriptor.Descriptor(
  name='PushEndContextRequest',
  full_name='hangar.PushEndContextRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='client_uuid', full_name='hangar.PushEndContextRequest.client_uuid', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=138,
  serialized_end=182,
)


_PUSHENDCONTEXTREPLY = _descriptor.Descriptor(
  name='PushEndContextReply',
  full_name='hangar.PushEndContextReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='err', full_name='hangar.PushEndContextReply.err', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=184,
  serialized_end=238,
)


_ERRORPROTO = _descriptor.Descriptor(
  name='ErrorProto',
  full_name='hangar.ErrorProto',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='code', full_name='hangar.ErrorProto.code', index=0,
      number=1, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='message', full_name='hangar.ErrorProto.message', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=240,
  serialized_end=283,
)


_BRANCHRECORD = _descriptor.Descriptor(
  name='BranchRecord',
  full_name='hangar.BranchRecord',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='name', full_name='hangar.BranchRecord.name', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.BranchRecord.commit', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=285,
  serialized_end=329,
)


_HASHRECORD = _descriptor.Descriptor(
  name='HashRecord',
  full_name='hangar.HashRecord',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='type', full_name='hangar.HashRecord.type', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='digest', full_name='hangar.HashRecord.digest', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=331,
  serialized_end=373,
)


_COMMITRECORD = _descriptor.Descriptor(
  name='CommitRecord',
  full_name='hangar.CommitRecord',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='parent', full_name='hangar.CommitRecord.parent', index=0,
      number=1, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='ref', full_name='hangar.CommitRecord.ref', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='spec', full_name='hangar.CommitRecord.spec', index=2,
      number=3, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=375,
  serialized_end=432,
)


_SCHEMARECORD = _descriptor.Descriptor(
  name='SchemaRecord',
  full_name='hangar.SchemaRecord',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='digest', full_name='hangar.SchemaRecord.digest', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='blob', full_name='hangar.SchemaRecord.blob', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=434,
  serialized_end=478,
)


_DATAORIGINREQUEST = _descriptor.Descriptor(
  name='DataOriginRequest',
  full_name='hangar.DataOriginRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='digest', full_name='hangar.DataOriginRequest.digest', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=480,
  serialized_end=515,
)


_DATAORIGINREPLY_COMPRESSIONOPTSENTRY = _descriptor.Descriptor(
  name='CompressionOptsEntry',
  full_name='hangar.DataOriginReply.CompressionOptsEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='hangar.DataOriginReply.CompressionOptsEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='hangar.DataOriginReply.CompressionOptsEntry.value', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=b'8\001',
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=736,
  serialized_end=790,
)

_DATAORIGINREPLY = _descriptor.Descriptor(
  name='DataOriginReply',
  full_name='hangar.DataOriginReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='location', full_name='hangar.DataOriginReply.location', index=0,
      number=1, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='data_type', full_name='hangar.DataOriginReply.data_type', index=1,
      number=2, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='digest', full_name='hangar.DataOriginReply.digest', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='uri', full_name='hangar.DataOriginReply.uri', index=3,
      number=4, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='compression', full_name='hangar.DataOriginReply.compression', index=4,
      number=5, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='compression_opts', full_name='hangar.DataOriginReply.compression_opts', index=5,
      number=6, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_DATAORIGINREPLY_COMPRESSIONOPTSENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=518,
  serialized_end=790,
)


_PUSHFINDDATAORIGINREQUEST = _descriptor.Descriptor(
  name='PushFindDataOriginRequest',
  full_name='hangar.PushFindDataOriginRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='data_type', full_name='hangar.PushFindDataOriginRequest.data_type', index=0,
      number=1, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='digest', full_name='hangar.PushFindDataOriginRequest.digest', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='compression_is_desired', full_name='hangar.PushFindDataOriginRequest.compression_is_desired', index=2,
      number=3, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=792,
  serialized_end=904,
)


_PUSHFINDDATAORIGINREPLY_COMPRESSIONOPTSEXPECTEDENTRY = _descriptor.Descriptor(
  name='CompressionOptsExpectedEntry',
  full_name='hangar.PushFindDataOriginReply.CompressionOptsExpectedEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='hangar.PushFindDataOriginReply.CompressionOptsExpectedEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='hangar.PushFindDataOriginReply.CompressionOptsExpectedEntry.value', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=b'8\001',
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1130,
  serialized_end=1192,
)

_PUSHFINDDATAORIGINREPLY = _descriptor.Descriptor(
  name='PushFindDataOriginReply',
  full_name='hangar.PushFindDataOriginReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='digest', full_name='hangar.PushFindDataOriginReply.digest', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='location', full_name='hangar.PushFindDataOriginReply.location', index=1,
      number=2, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='uri', full_name='hangar.PushFindDataOriginReply.uri', index=2,
      number=3, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='compression_expected', full_name='hangar.PushFindDataOriginReply.compression_expected', index=3,
      number=5, type=8, cpp_type=7, label=1,
      has_default_value=False, default_value=False,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='compression_opts_expected', full_name='hangar.PushFindDataOriginReply.compression_opts_expected', index=4,
      number=6, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_PUSHFINDDATAORIGINREPLY_COMPRESSIONOPTSEXPECTEDENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=907,
  serialized_end=1192,
)


_PINGREQUEST = _descriptor.Descriptor(
  name='PingRequest',
  full_name='hangar.PingRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1194,
  serialized_end=1207,
)


_PINGREPLY = _descriptor.Descriptor(
  name='PingReply',
  full_name='hangar.PingReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='result', full_name='hangar.PingReply.result', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1209,
  serialized_end=1236,
)


_GETCLIENTCONFIGREQUEST = _descriptor.Descriptor(
  name='GetClientConfigRequest',
  full_name='hangar.GetClientConfigRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1238,
  serialized_end=1262,
)


_GETCLIENTCONFIGREPLY_CONFIGENTRY = _descriptor.Descriptor(
  name='ConfigEntry',
  full_name='hangar.GetClientConfigReply.ConfigEntry',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='key', full_name='hangar.GetClientConfigReply.ConfigEntry.key', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='value', full_name='hangar.GetClientConfigReply.ConfigEntry.value', index=1,
      number=2, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=b'8\001',
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1382,
  serialized_end=1427,
)

_GETCLIENTCONFIGREPLY = _descriptor.Descriptor(
  name='GetClientConfigReply',
  full_name='hangar.GetClientConfigReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='config', full_name='hangar.GetClientConfigReply.config', index=0,
      number=1, type=11, cpp_type=10, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.GetClientConfigReply.error', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[_GETCLIENTCONFIGREPLY_CONFIGENTRY, ],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1265,
  serialized_end=1427,
)


_FETCHBRANCHRECORDREQUEST = _descriptor.Descriptor(
  name='FetchBranchRecordRequest',
  full_name='hangar.FetchBranchRecordRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='rec', full_name='hangar.FetchBranchRecordRequest.rec', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1429,
  serialized_end=1490,
)


_FETCHBRANCHRECORDREPLY = _descriptor.Descriptor(
  name='FetchBranchRecordReply',
  full_name='hangar.FetchBranchRecordReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='rec', full_name='hangar.FetchBranchRecordReply.rec', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FetchBranchRecordReply.error', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1492,
  serialized_end=1586,
)


_FETCHDATAREQUEST = _descriptor.Descriptor(
  name='FetchDataRequest',
  full_name='hangar.FetchDataRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='uri', full_name='hangar.FetchDataRequest.uri', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1588,
  serialized_end=1619,
)


_FETCHDATAREPLY = _descriptor.Descriptor(
  name='FetchDataReply',
  full_name='hangar.FetchDataReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='uri', full_name='hangar.FetchDataReply.uri', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='raw_data', full_name='hangar.FetchDataReply.raw_data', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='nbytes', full_name='hangar.FetchDataReply.nbytes', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FetchDataReply.error', index=3,
      number=4, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1621,
  serialized_end=1719,
)


_FETCHCOMMITREQUEST = _descriptor.Descriptor(
  name='FetchCommitRequest',
  full_name='hangar.FetchCommitRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.FetchCommitRequest.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1721,
  serialized_end=1757,
)


_FETCHCOMMITREPLY = _descriptor.Descriptor(
  name='FetchCommitReply',
  full_name='hangar.FetchCommitReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.FetchCommitReply.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='total_byte_size', full_name='hangar.FetchCommitReply.total_byte_size', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='record', full_name='hangar.FetchCommitReply.record', index=2,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FetchCommitReply.error', index=3,
      number=4, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1760,
  serialized_end=1892,
)


_FETCHSCHEMAREQUEST = _descriptor.Descriptor(
  name='FetchSchemaRequest',
  full_name='hangar.FetchSchemaRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='rec', full_name='hangar.FetchSchemaRequest.rec', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1894,
  serialized_end=1949,
)


_FETCHSCHEMAREPLY = _descriptor.Descriptor(
  name='FetchSchemaReply',
  full_name='hangar.FetchSchemaReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='rec', full_name='hangar.FetchSchemaReply.rec', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FetchSchemaReply.error', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=1951,
  serialized_end=2039,
)


_PUSHBRANCHRECORDREQUEST = _descriptor.Descriptor(
  name='PushBranchRecordRequest',
  full_name='hangar.PushBranchRecordRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='rec', full_name='hangar.PushBranchRecordRequest.rec', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2041,
  serialized_end=2101,
)


_PUSHBRANCHRECORDREPLY = _descriptor.Descriptor(
  name='PushBranchRecordReply',
  full_name='hangar.PushBranchRecordReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.PushBranchRecordReply.error', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2103,
  serialized_end=2161,
)


_PUSHDATAREQUEST = _descriptor.Descriptor(
  name='PushDataRequest',
  full_name='hangar.PushDataRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='uri', full_name='hangar.PushDataRequest.uri', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='raw_data', full_name='hangar.PushDataRequest.raw_data', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='nbytes', full_name='hangar.PushDataRequest.nbytes', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='data_type', full_name='hangar.PushDataRequest.data_type', index=3,
      number=4, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='schema_hash', full_name='hangar.PushDataRequest.schema_hash', index=4,
      number=5, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2163,
  serialized_end=2285,
)


_PUSHDATAREPLY = _descriptor.Descriptor(
  name='PushDataReply',
  full_name='hangar.PushDataReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.PushDataReply.error', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2287,
  serialized_end=2337,
)


_PUSHCOMMITREQUEST = _descriptor.Descriptor(
  name='PushCommitRequest',
  full_name='hangar.PushCommitRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.PushCommitRequest.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='total_byte_size', full_name='hangar.PushCommitRequest.total_byte_size', index=1,
      number=2, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='record', full_name='hangar.PushCommitRequest.record', index=2,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2339,
  serialized_end=2437,
)


_PUSHCOMMITREPLY = _descriptor.Descriptor(
  name='PushCommitReply',
  full_name='hangar.PushCommitReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.PushCommitReply.error', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2439,
  serialized_end=2491,
)


_PUSHSCHEMAREQUEST = _descriptor.Descriptor(
  name='PushSchemaRequest',
  full_name='hangar.PushSchemaRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='rec', full_name='hangar.PushSchemaRequest.rec', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2493,
  serialized_end=2547,
)


_PUSHSCHEMAREPLY = _descriptor.Descriptor(
  name='PushSchemaReply',
  full_name='hangar.PushSchemaReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.PushSchemaReply.error', index=0,
      number=1, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2549,
  serialized_end=2601,
)


_FINDMISSINGCOMMITSREQUEST = _descriptor.Descriptor(
  name='FindMissingCommitsRequest',
  full_name='hangar.FindMissingCommitsRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commits', full_name='hangar.FindMissingCommitsRequest.commits', index=0,
      number=1, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='branch', full_name='hangar.FindMissingCommitsRequest.branch', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2603,
  serialized_end=2685,
)


_FINDMISSINGCOMMITSREPLY = _descriptor.Descriptor(
  name='FindMissingCommitsReply',
  full_name='hangar.FindMissingCommitsReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commits', full_name='hangar.FindMissingCommitsReply.commits', index=0,
      number=1, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='branch', full_name='hangar.FindMissingCommitsReply.branch', index=1,
      number=2, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FindMissingCommitsReply.error', index=2,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2687,
  serialized_end=2802,
)


_FINDMISSINGHASHRECORDSREQUEST = _descriptor.Descriptor(
  name='FindMissingHashRecordsRequest',
  full_name='hangar.FindMissingHashRecordsRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.FindMissingHashRecordsRequest.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='hashs', full_name='hangar.FindMissingHashRecordsRequest.hashs', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='total_byte_size', full_name='hangar.FindMissingHashRecordsRequest.total_byte_size', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2804,
  serialized_end=2891,
)


_FINDMISSINGHASHRECORDSREPLY = _descriptor.Descriptor(
  name='FindMissingHashRecordsReply',
  full_name='hangar.FindMissingHashRecordsReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.FindMissingHashRecordsReply.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='hashs', full_name='hangar.FindMissingHashRecordsReply.hashs', index=1,
      number=2, type=12, cpp_type=9, label=1,
      has_default_value=False, default_value=b"",
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='total_byte_size', full_name='hangar.FindMissingHashRecordsReply.total_byte_size', index=2,
      number=3, type=3, cpp_type=2, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FindMissingHashRecordsReply.error', index=3,
      number=4, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=2893,
  serialized_end=3013,
)


_FINDMISSINGSCHEMASREQUEST = _descriptor.Descriptor(
  name='FindMissingSchemasRequest',
  full_name='hangar.FindMissingSchemasRequest',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.FindMissingSchemasRequest.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='schema_digests', full_name='hangar.FindMissingSchemasRequest.schema_digests', index=1,
      number=2, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=3015,
  serialized_end=3082,
)


_FINDMISSINGSCHEMASREPLY = _descriptor.Descriptor(
  name='FindMissingSchemasReply',
  full_name='hangar.FindMissingSchemasReply',
  filename=None,
  file=DESCRIPTOR,
  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
      name='commit', full_name='hangar.FindMissingSchemasReply.commit', index=0,
      number=1, type=9, cpp_type=9, label=1,
      has_default_value=False, default_value=b"".decode('utf-8'),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='schema_digests', full_name='hangar.FindMissingSchemasReply.schema_digests', index=1,
      number=2, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
      name='error', full_name='hangar.FindMissingSchemasReply.error', index=2,
      number=3, type=11, cpp_type=10, label=1,
      has_default_value=False, default_value=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      serialized_options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  nested_types=[],
  enum_types=[
  ],
  serialized_options=None,
  is_extendable=False,
  syntax='proto3',
  extension_ranges=[],
  oneofs=[
  ],
  serialized_start=3084,
  serialized_end=3184,
)

_PUSHBEGINCONTEXTREPLY.fields_by_name['err'].message_type = _ERRORPROTO
_PUSHENDCONTEXTREPLY.fields_by_name['err'].message_type = _ERRORPROTO
_DATAORIGINREPLY_COMPRESSIONOPTSENTRY.containing_type = _DATAORIGINREPLY
_DATAORIGINREPLY.fields_by_name['location'].enum_type = _DATALOCATION
_DATAORIGINREPLY.fields_by_name['data_type'].enum_type = _DATATYPE
_DATAORIGINREPLY.fields_by_name['compression_opts'].message_type = _DATAORIGINREPLY_COMPRESSIONOPTSENTRY
_PUSHFINDDATAORIGINREQUEST.fields_by_name['data_type'].enum_type = _DATATYPE
_PUSHFINDDATAORIGINREPLY_COMPRESSIONOPTSEXPECTEDENTRY.containing_type = _PUSHFINDDATAORIGINREPLY
_PUSHFINDDATAORIGINREPLY.fields_by_name['location'].enum_type = _DATALOCATION
_PUSHFINDDATAORIGINREPLY.fields_by_name['compression_opts_expected'].message_type = _PUSHFINDDATAORIGINREPLY_COMPRESSIONOPTSEXPECTEDENTRY
_GETCLIENTCONFIGREPLY_CONFIGENTRY.containing_type = _GETCLIENTCONFIGREPLY
_GETCLIENTCONFIGREPLY.fields_by_name['config'].message_type = _GETCLIENTCONFIGREPLY_CONFIGENTRY
_GETCLIENTCONFIGREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FETCHBRANCHRECORDREQUEST.fields_by_name['rec'].message_type = _BRANCHRECORD
_FETCHBRANCHRECORDREPLY.fields_by_name['rec'].message_type = _BRANCHRECORD
_FETCHBRANCHRECORDREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FETCHDATAREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FETCHCOMMITREPLY.fields_by_name['record'].message_type = _COMMITRECORD
_FETCHCOMMITREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FETCHSCHEMAREQUEST.fields_by_name['rec'].message_type = _SCHEMARECORD
_FETCHSCHEMAREPLY.fields_by_name['rec'].message_type = _SCHEMARECORD
_FETCHSCHEMAREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_PUSHBRANCHRECORDREQUEST.fields_by_name['rec'].message_type = _BRANCHRECORD
_PUSHBRANCHRECORDREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_PUSHDATAREQUEST.fields_by_name['data_type'].enum_type = _DATATYPE
_PUSHDATAREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_PUSHCOMMITREQUEST.fields_by_name['record'].message_type = _COMMITRECORD
_PUSHCOMMITREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_PUSHSCHEMAREQUEST.fields_by_name['rec'].message_type = _SCHEMARECORD
_PUSHSCHEMAREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FINDMISSINGCOMMITSREQUEST.fields_by_name['branch'].message_type = _BRANCHRECORD
_FINDMISSINGCOMMITSREPLY.fields_by_name['branch'].message_type = _BRANCHRECORD
_FINDMISSINGCOMMITSREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FINDMISSINGHASHRECORDSREPLY.fields_by_name['error'].message_type = _ERRORPROTO
_FINDMISSINGSCHEMASREPLY.fields_by_name['error'].message_type = _ERRORPROTO
DESCRIPTOR.message_types_by_name['PushBeginContextRequest'] = _PUSHBEGINCONTEXTREQUEST
DESCRIPTOR.message_types_by_name['PushBeginContextReply'] = _PUSHBEGINCONTEXTREPLY
DESCRIPTOR.message_types_by_name['PushEndContextRequest'] = _PUSHENDCONTEXTREQUEST
DESCRIPTOR.message_types_by_name['PushEndContextReply'] = _PUSHENDCONTEXTREPLY
DESCRIPTOR.message_types_by_name['ErrorProto'] = _ERRORPROTO
DESCRIPTOR.message_types_by_name['BranchRecord'] = _BRANCHRECORD
DESCRIPTOR.message_types_by_name['HashRecord'] = _HASHRECORD
DESCRIPTOR.message_types_by_name['CommitRecord'] = _COMMITRECORD
DESCRIPTOR.message_types_by_name['SchemaRecord'] = _SCHEMARECORD
DESCRIPTOR.message_types_by_name['DataOriginRequest'] = _DATAORIGINREQUEST
DESCRIPTOR.message_types_by_name['DataOriginReply'] = _DATAORIGINREPLY
DESCRIPTOR.message_types_by_name['PushFindDataOriginRequest'] = _PUSHFINDDATAORIGINREQUEST
DESCRIPTOR.message_types_by_name['PushFindDataOriginReply'] = _PUSHFINDDATAORIGINREPLY
DESCRIPTOR.message_types_by_name['PingRequest'] = _PINGREQUEST
DESCRIPTOR.message_types_by_name['PingReply'] = _PINGREPLY
DESCRIPTOR.message_types_by_name['GetClientConfigRequest'] = _GETCLIENTCONFIGREQUEST
DESCRIPTOR.message_types_by_name['GetClientConfigReply'] = _GETCLIENTCONFIGREPLY
DESCRIPTOR.message_types_by_name['FetchBranchRecordRequest'] = _FETCHBRANCHRECORDREQUEST
DESCRIPTOR.message_types_by_name['FetchBranchRecordReply'] = _FETCHBRANCHRECORDREPLY
DESCRIPTOR.message_types_by_name['FetchDataRequest'] = _FETCHDATAREQUEST
DESCRIPTOR.message_types_by_name['FetchDataReply'] = _FETCHDATAREPLY
DESCRIPTOR.message_types_by_name['FetchCommitRequest'] = _FETCHCOMMITREQUEST
DESCRIPTOR.message_types_by_name['FetchCommitReply'] = _FETCHCOMMITREPLY
DESCRIPTOR.message_types_by_name['FetchSchemaRequest'] = _FETCHSCHEMAREQUEST
DESCRIPTOR.message_types_by_name['FetchSchemaReply'] = _FETCHSCHEMAREPLY
DESCRIPTOR.message_types_by_name['PushBranchRecordRequest'] = _PUSHBRANCHRECORDREQUEST
DESCRIPTOR.message_types_by_name['PushBranchRecordReply'] = _PUSHBRANCHRECORDREPLY
DESCRIPTOR.message_types_by_name['PushDataRequest'] = _PUSHDATAREQUEST
DESCRIPTOR.message_types_by_name['PushDataReply'] = _PUSHDATAREPLY
DESCRIPTOR.message_types_by_name['PushCommitRequest'] = _PUSHCOMMITREQUEST
DESCRIPTOR.message_types_by_name['PushCommitReply'] = _PUSHCOMMITREPLY
DESCRIPTOR.message_types_by_name['PushSchemaRequest'] = _PUSHSCHEMAREQUEST
DESCRIPTOR.message_types_by_name['PushSchemaReply'] = _PUSHSCHEMAREPLY
DESCRIPTOR.message_types_by_name['FindMissingCommitsRequest'] = _FINDMISSINGCOMMITSREQUEST
DESCRIPTOR.message_types_by_name['FindMissingCommitsReply'] = _FINDMISSINGCOMMITSREPLY
DESCRIPTOR.message_types_by_name['FindMissingHashRecordsRequest'] = _FINDMISSINGHASHRECORDSREQUEST
DESCRIPTOR.message_types_by_name['FindMissingHashRecordsReply'] = _FINDMISSINGHASHRECORDSREPLY
DESCRIPTOR.message_types_by_name['FindMissingSchemasRequest'] = _FINDMISSINGSCHEMASREQUEST
DESCRIPTOR.message_types_by_name['FindMissingSchemasReply'] = _FINDMISSINGSCHEMASREPLY
DESCRIPTOR.enum_types_by_name['DataLocation'] = _DATALOCATION
DESCRIPTOR.enum_types_by_name['DataType'] = _DATATYPE
_sym_db.RegisterFileDescriptor(DESCRIPTOR)

PushBeginContextRequest = _reflection.GeneratedProtocolMessageType('PushBeginContextRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHBEGINCONTEXTREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushBeginContextRequest)
  })
_sym_db.RegisterMessage(PushBeginContextRequest)

PushBeginContextReply = _reflection.GeneratedProtocolMessageType('PushBeginContextReply', (_message.Message,), {
  'DESCRIPTOR' : _PUSHBEGINCONTEXTREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushBeginContextReply)
  })
_sym_db.RegisterMessage(PushBeginContextReply)

PushEndContextRequest = _reflection.GeneratedProtocolMessageType('PushEndContextRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHENDCONTEXTREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushEndContextRequest)
  })
_sym_db.RegisterMessage(PushEndContextRequest)

PushEndContextReply = _reflection.GeneratedProtocolMessageType('PushEndContextReply', (_message.Message,), {
  'DESCRIPTOR' : _PUSHENDCONTEXTREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushEndContextReply)
  })
_sym_db.RegisterMessage(PushEndContextReply)

ErrorProto = _reflection.GeneratedProtocolMessageType('ErrorProto', (_message.Message,), {
  'DESCRIPTOR' : _ERRORPROTO,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.ErrorProto)
  })
_sym_db.RegisterMessage(ErrorProto)

BranchRecord = _reflection.GeneratedProtocolMessageType('BranchRecord', (_message.Message,), {
  'DESCRIPTOR' : _BRANCHRECORD,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.BranchRecord)
  })
_sym_db.RegisterMessage(BranchRecord)

HashRecord = _reflection.GeneratedProtocolMessageType('HashRecord', (_message.Message,), {
  'DESCRIPTOR' : _HASHRECORD,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.HashRecord)
  })
_sym_db.RegisterMessage(HashRecord)

CommitRecord = _reflection.GeneratedProtocolMessageType('CommitRecord', (_message.Message,), {
  'DESCRIPTOR' : _COMMITRECORD,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.CommitRecord)
  })
_sym_db.RegisterMessage(CommitRecord)

SchemaRecord = _reflection.GeneratedProtocolMessageType('SchemaRecord', (_message.Message,), {
  'DESCRIPTOR' : _SCHEMARECORD,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.SchemaRecord)
  })
_sym_db.RegisterMessage(SchemaRecord)

DataOriginRequest = _reflection.GeneratedProtocolMessageType('DataOriginRequest', (_message.Message,), {
  'DESCRIPTOR' : _DATAORIGINREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.DataOriginRequest)
  })
_sym_db.RegisterMessage(DataOriginRequest)

DataOriginReply = _reflection.GeneratedProtocolMessageType('DataOriginReply', (_message.Message,), {

  'CompressionOptsEntry' : _reflection.GeneratedProtocolMessageType('CompressionOptsEntry', (_message.Message,), {
    'DESCRIPTOR' : _DATAORIGINREPLY_COMPRESSIONOPTSENTRY,
    '__module__' : 'hangar_service_pb2'
    # @@protoc_insertion_point(class_scope:hangar.DataOriginReply.CompressionOptsEntry)
    })
  ,
  'DESCRIPTOR' : _DATAORIGINREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.DataOriginReply)
  })
_sym_db.RegisterMessage(DataOriginReply)
_sym_db.RegisterMessage(DataOriginReply.CompressionOptsEntry)

PushFindDataOriginRequest = _reflection.GeneratedProtocolMessageType('PushFindDataOriginRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHFINDDATAORIGINREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushFindDataOriginRequest)
  })
_sym_db.RegisterMessage(PushFindDataOriginRequest)

PushFindDataOriginReply = _reflection.GeneratedProtocolMessageType('PushFindDataOriginReply', (_message.Message,), {

  'CompressionOptsExpectedEntry' : _reflection.GeneratedProtocolMessageType('CompressionOptsExpectedEntry', (_message.Message,), {
    'DESCRIPTOR' : _PUSHFINDDATAORIGINREPLY_COMPRESSIONOPTSEXPECTEDENTRY,
    '__module__' : 'hangar_service_pb2'
    # @@protoc_insertion_point(class_scope:hangar.PushFindDataOriginReply.CompressionOptsExpectedEntry)
    })
  ,
  'DESCRIPTOR' : _PUSHFINDDATAORIGINREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushFindDataOriginReply)
  })
_sym_db.RegisterMessage(PushFindDataOriginReply)
_sym_db.RegisterMessage(PushFindDataOriginReply.CompressionOptsExpectedEntry)

PingRequest = _reflection.GeneratedProtocolMessageType('PingRequest', (_message.Message,), {
  'DESCRIPTOR' : _PINGREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PingRequest)
  })
_sym_db.RegisterMessage(PingRequest)

PingReply = _reflection.GeneratedProtocolMessageType('PingReply', (_message.Message,), {
  'DESCRIPTOR' : _PINGREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PingReply)
  })
_sym_db.RegisterMessage(PingReply)

GetClientConfigRequest = _reflection.GeneratedProtocolMessageType('GetClientConfigRequest', (_message.Message,), {
  'DESCRIPTOR' : _GETCLIENTCONFIGREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.GetClientConfigRequest)
  })
_sym_db.RegisterMessage(GetClientConfigRequest)

GetClientConfigReply = _reflection.GeneratedProtocolMessageType('GetClientConfigReply', (_message.Message,), {

  'ConfigEntry' : _reflection.GeneratedProtocolMessageType('ConfigEntry', (_message.Message,), {
    'DESCRIPTOR' : _GETCLIENTCONFIGREPLY_CONFIGENTRY,
    '__module__' : 'hangar_service_pb2'
    # @@protoc_insertion_point(class_scope:hangar.GetClientConfigReply.ConfigEntry)
    })
  ,
  'DESCRIPTOR' : _GETCLIENTCONFIGREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.GetClientConfigReply)
  })
_sym_db.RegisterMessage(GetClientConfigReply)
_sym_db.RegisterMessage(GetClientConfigReply.ConfigEntry)

FetchBranchRecordRequest = _reflection.GeneratedProtocolMessageType('FetchBranchRecordRequest', (_message.Message,), {
  'DESCRIPTOR' : _FETCHBRANCHRECORDREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchBranchRecordRequest)
  })
_sym_db.RegisterMessage(FetchBranchRecordRequest)

FetchBranchRecordReply = _reflection.GeneratedProtocolMessageType('FetchBranchRecordReply', (_message.Message,), {
  'DESCRIPTOR' : _FETCHBRANCHRECORDREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchBranchRecordReply)
  })
_sym_db.RegisterMessage(FetchBranchRecordReply)

FetchDataRequest = _reflection.GeneratedProtocolMessageType('FetchDataRequest', (_message.Message,), {
  'DESCRIPTOR' : _FETCHDATAREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchDataRequest)
  })
_sym_db.RegisterMessage(FetchDataRequest)

FetchDataReply = _reflection.GeneratedProtocolMessageType('FetchDataReply', (_message.Message,), {
  'DESCRIPTOR' : _FETCHDATAREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchDataReply)
  })
_sym_db.RegisterMessage(FetchDataReply)

FetchCommitRequest = _reflection.GeneratedProtocolMessageType('FetchCommitRequest', (_message.Message,), {
  'DESCRIPTOR' : _FETCHCOMMITREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchCommitRequest)
  })
_sym_db.RegisterMessage(FetchCommitRequest)

FetchCommitReply = _reflection.GeneratedProtocolMessageType('FetchCommitReply', (_message.Message,), {
  'DESCRIPTOR' : _FETCHCOMMITREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchCommitReply)
  })
_sym_db.RegisterMessage(FetchCommitReply)

FetchSchemaRequest = _reflection.GeneratedProtocolMessageType('FetchSchemaRequest', (_message.Message,), {
  'DESCRIPTOR' : _FETCHSCHEMAREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchSchemaRequest)
  })
_sym_db.RegisterMessage(FetchSchemaRequest)

FetchSchemaReply = _reflection.GeneratedProtocolMessageType('FetchSchemaReply', (_message.Message,), {
  'DESCRIPTOR' : _FETCHSCHEMAREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FetchSchemaReply)
  })
_sym_db.RegisterMessage(FetchSchemaReply)

PushBranchRecordRequest = _reflection.GeneratedProtocolMessageType('PushBranchRecordRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHBRANCHRECORDREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushBranchRecordRequest)
  })
_sym_db.RegisterMessage(PushBranchRecordRequest)

PushBranchRecordReply = _reflection.GeneratedProtocolMessageType('PushBranchRecordReply', (_message.Message,), {
  'DESCRIPTOR' : _PUSHBRANCHRECORDREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushBranchRecordReply)
  })
_sym_db.RegisterMessage(PushBranchRecordReply)

PushDataRequest = _reflection.GeneratedProtocolMessageType('PushDataRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHDATAREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushDataRequest)
  })
_sym_db.RegisterMessage(PushDataRequest)

PushDataReply = _reflection.GeneratedProtocolMessageType('PushDataReply', (_message.Message,), {
  'DESCRIPTOR' : _PUSHDATAREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushDataReply)
  })
_sym_db.RegisterMessage(PushDataReply)

PushCommitRequest = _reflection.GeneratedProtocolMessageType('PushCommitRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHCOMMITREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushCommitRequest)
  })
_sym_db.RegisterMessage(PushCommitRequest)

PushCommitReply = _reflection.GeneratedProtocolMessageType('PushCommitReply', (_message.Message,), {
  'DESCRIPTOR' : _PUSHCOMMITREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushCommitReply)
  })
_sym_db.RegisterMessage(PushCommitReply)

PushSchemaRequest = _reflection.GeneratedProtocolMessageType('PushSchemaRequest', (_message.Message,), {
  'DESCRIPTOR' : _PUSHSCHEMAREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushSchemaRequest)
  })
_sym_db.RegisterMessage(PushSchemaRequest)

PushSchemaReply = _reflection.GeneratedProtocolMessageType('PushSchemaReply', (_message.Message,), {
  'DESCRIPTOR' : _PUSHSCHEMAREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.PushSchemaReply)
  })
_sym_db.RegisterMessage(PushSchemaReply)

FindMissingCommitsRequest = _reflection.GeneratedProtocolMessageType('FindMissingCommitsRequest', (_message.Message,), {
  'DESCRIPTOR' : _FINDMISSINGCOMMITSREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FindMissingCommitsRequest)
  })
_sym_db.RegisterMessage(FindMissingCommitsRequest)

FindMissingCommitsReply = _reflection.GeneratedProtocolMessageType('FindMissingCommitsReply', (_message.Message,), {
  'DESCRIPTOR' : _FINDMISSINGCOMMITSREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FindMissingCommitsReply)
  })
_sym_db.RegisterMessage(FindMissingCommitsReply)

FindMissingHashRecordsRequest = _reflection.GeneratedProtocolMessageType('FindMissingHashRecordsRequest', (_message.Message,), {
  'DESCRIPTOR' : _FINDMISSINGHASHRECORDSREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FindMissingHashRecordsRequest)
  })
_sym_db.RegisterMessage(FindMissingHashRecordsRequest)

FindMissingHashRecordsReply = _reflection.GeneratedProtocolMessageType('FindMissingHashRecordsReply', (_message.Message,), {
  'DESCRIPTOR' : _FINDMISSINGHASHRECORDSREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FindMissingHashRecordsReply)
  })
_sym_db.RegisterMessage(FindMissingHashRecordsReply)

FindMissingSchemasRequest = _reflection.GeneratedProtocolMessageType('FindMissingSchemasRequest', (_message.Message,), {
  'DESCRIPTOR' : _FINDMISSINGSCHEMASREQUEST,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FindMissingSchemasRequest)
  })
_sym_db.RegisterMessage(FindMissingSchemasRequest)

FindMissingSchemasReply = _reflection.GeneratedProtocolMessageType('FindMissingSchemasReply', (_message.Message,), {
  'DESCRIPTOR' : _FINDMISSINGSCHEMASREPLY,
  '__module__' : 'hangar_service_pb2'
  # @@protoc_insertion_point(class_scope:hangar.FindMissingSchemasReply)
  })
_sym_db.RegisterMessage(FindMissingSchemasReply)


DESCRIPTOR._options = None
_DATAORIGINREPLY_COMPRESSIONOPTSENTRY._options = None
_PUSHFINDDATAORIGINREPLY_COMPRESSIONOPTSEXPECTEDENTRY._options = None
_GETCLIENTCONFIGREPLY_CONFIGENTRY._options = None

_HANGARSERVICE = _descriptor.ServiceDescriptor(
  name='HangarService',
  full_name='hangar.HangarService',
  file=DESCRIPTOR,
  index=0,
  serialized_options=None,
  serialized_start=3317,
  serialized_end=5007,
  methods=[
  _descriptor.MethodDescriptor(
    name='PING',
    full_name='hangar.HangarService.PING',
    index=0,
    containing_service=None,
    input_type=_PINGREQUEST,
    output_type=_PINGREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='GetClientConfig',
    full_name='hangar.HangarService.GetClientConfig',
    index=1,
    containing_service=None,
    input_type=_GETCLIENTCONFIGREQUEST,
    output_type=_GETCLIENTCONFIGREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchBranchRecord',
    full_name='hangar.HangarService.FetchBranchRecord',
    index=2,
    containing_service=None,
    input_type=_FETCHBRANCHRECORDREQUEST,
    output_type=_FETCHBRANCHRECORDREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchData',
    full_name='hangar.HangarService.FetchData',
    index=3,
    containing_service=None,
    input_type=_FETCHDATAREQUEST,
    output_type=_FETCHDATAREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchCommit',
    full_name='hangar.HangarService.FetchCommit',
    index=4,
    containing_service=None,
    input_type=_FETCHCOMMITREQUEST,
    output_type=_FETCHCOMMITREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchSchema',
    full_name='hangar.HangarService.FetchSchema',
    index=5,
    containing_service=None,
    input_type=_FETCHSCHEMAREQUEST,
    output_type=_FETCHSCHEMAREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushBranchRecord',
    full_name='hangar.HangarService.PushBranchRecord',
    index=6,
    containing_service=None,
    input_type=_PUSHBRANCHRECORDREQUEST,
    output_type=_PUSHBRANCHRECORDREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushData',
    full_name='hangar.HangarService.PushData',
    index=7,
    containing_service=None,
    input_type=_PUSHDATAREQUEST,
    output_type=_PUSHDATAREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushCommit',
    full_name='hangar.HangarService.PushCommit',
    index=8,
    containing_service=None,
    input_type=_PUSHCOMMITREQUEST,
    output_type=_PUSHCOMMITREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushSchema',
    full_name='hangar.HangarService.PushSchema',
    index=9,
    containing_service=None,
    input_type=_PUSHSCHEMAREQUEST,
    output_type=_PUSHSCHEMAREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchFindMissingCommits',
    full_name='hangar.HangarService.FetchFindMissingCommits',
    index=10,
    containing_service=None,
    input_type=_FINDMISSINGCOMMITSREQUEST,
    output_type=_FINDMISSINGCOMMITSREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchFindMissingHashRecords',
    full_name='hangar.HangarService.FetchFindMissingHashRecords',
    index=11,
    containing_service=None,
    input_type=_FINDMISSINGHASHRECORDSREQUEST,
    output_type=_FINDMISSINGHASHRECORDSREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchFindMissingSchemas',
    full_name='hangar.HangarService.FetchFindMissingSchemas',
    index=12,
    containing_service=None,
    input_type=_FINDMISSINGSCHEMASREQUEST,
    output_type=_FINDMISSINGSCHEMASREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushFindMissingCommits',
    full_name='hangar.HangarService.PushFindMissingCommits',
    index=13,
    containing_service=None,
    input_type=_FINDMISSINGCOMMITSREQUEST,
    output_type=_FINDMISSINGCOMMITSREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushFindMissingHashRecords',
    full_name='hangar.HangarService.PushFindMissingHashRecords',
    index=14,
    containing_service=None,
    input_type=_FINDMISSINGHASHRECORDSREQUEST,
    output_type=_FINDMISSINGHASHRECORDSREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushFindMissingSchemas',
    full_name='hangar.HangarService.PushFindMissingSchemas',
    index=15,
    containing_service=None,
    input_type=_FINDMISSINGSCHEMASREQUEST,
    output_type=_FINDMISSINGSCHEMASREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='FetchFindDataOrigin',
    full_name='hangar.HangarService.FetchFindDataOrigin',
    index=16,
    containing_service=None,
    input_type=_DATAORIGINREQUEST,
    output_type=_DATAORIGINREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushFindDataOrigin',
    full_name='hangar.HangarService.PushFindDataOrigin',
    index=17,
    containing_service=None,
    input_type=_PUSHFINDDATAORIGINREQUEST,
    output_type=_PUSHFINDDATAORIGINREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushBeginContext',
    full_name='hangar.HangarService.PushBeginContext',
    index=18,
    containing_service=None,
    input_type=_PUSHBEGINCONTEXTREQUEST,
    output_type=_PUSHBEGINCONTEXTREPLY,
    serialized_options=None,
  ),
  _descriptor.MethodDescriptor(
    name='PushEndContext',
    full_name='hangar.HangarService.PushEndContext',
    index=19,
    containing_service=None,
    input_type=_PUSHENDCONTEXTREQUEST,
    output_type=_PUSHENDCONTEXTREPLY,
    serialized_options=None,
  ),
])
_sym_db.RegisterServiceDescriptor(_HANGARSERVICE)

DESCRIPTOR.services_by_name['HangarService'] = _HANGARSERVICE

# @@protoc_insertion_point(module_scope)


================================================
FILE: src/hangar/remote/hangar_service_pb2.pyi
================================================
# @generated by generate_proto_mypy_stubs.py.  Do not edit!
import sys
from google.protobuf.descriptor import (
    Descriptor as google___protobuf___descriptor___Descriptor,
    EnumDescriptor as google___protobuf___descriptor___EnumDescriptor,
    FileDescriptor as google___protobuf___descriptor___FileDescriptor,
)

from google.protobuf.internal.containers import (
    RepeatedScalarFieldContainer as google___protobuf___internal___containers___RepeatedScalarFieldContainer,
)

from google.protobuf.internal.enum_type_wrapper import (
    _EnumTypeWrapper as google___protobuf___internal___enum_type_wrapper____EnumTypeWrapper,
)

from google.protobuf.message import (
    Message as google___protobuf___message___Message,
)

from typing import (
    Iterable as typing___Iterable,
    Mapping as typing___Mapping,
    MutableMapping as typing___MutableMapping,
    NewType as typing___NewType,
    Optional as typing___Optional,
    Text as typing___Text,
    cast as typing___cast,
)

from typing_extensions import (
    Literal as typing_extensions___Literal,
)


builtin___bool = bool
builtin___bytes = bytes
builtin___float = float
builtin___int = int


DESCRIPTOR: google___protobuf___descriptor___FileDescriptor = ...

DataLocationValue = typing___NewType('DataLocationValue', builtin___int)
type___DataLocationValue = DataLocationValue
DataLocation: _DataLocation
class _DataLocation(google___protobuf___internal___enum_type_wrapper____EnumTypeWrapper[DataLocationValue]):
    DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ...
    REMOTE_SERVER = typing___cast(DataLocationValue, 0)
    MINIO = typing___cast(DataLocationValue, 1)
    S3 = typing___cast(DataLocationValue, 2)
    GCS = typing___cast(DataLocationValue, 3)
    ABS = typing___cast(DataLocationValue, 4)
REMOTE_SERVER = typing___cast(DataLocationValue, 0)
MINIO = typing___cast(DataLocationValue, 1)
S3 = typing___cast(DataLocationValue, 2)
GCS = typing___cast(DataLocationValue, 3)
ABS = typing___cast(DataLocationValue, 4)
type___DataLocation = DataLocation

DataTypeValue = typing___NewType('DataTypeValue', builtin___int)
type___DataTypeValue = DataTypeValue
DataType: _DataType
class _DataType(google___protobuf___internal___enum_type_wrapper____EnumTypeWrapper[DataTypeValue]):
    DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ...
    NP_ARRAY = typing___cast(DataTypeValue, 0)
    SCHEMA = typing___cast(DataTypeValue, 1)
    STR = typing___cast(DataTypeValue, 2)
    BYTES = typing___cast(DataTypeValue, 3)
NP_ARRAY = typing___cast(DataTypeValue, 0)
SCHEMA = typing___cast(DataTypeValue, 1)
STR = typing___cast(DataTypeValue, 2)
BYTES = typing___cast(DataTypeValue, 3)
type___DataType = DataType

class PushBeginContextRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    client_uuid: typing___Text = ...

    def __init__(self,
        *,
        client_uuid : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"client_uuid",b"client_uuid"]) -> None: ...
type___PushBeginContextRequest = PushBeginContextRequest

class PushBeginContextReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def err(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        err : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"err",b"err"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"err",b"err"]) -> None: ...
type___PushBeginContextReply = PushBeginContextReply

class PushEndContextRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    client_uuid: typing___Text = ...

    def __init__(self,
        *,
        client_uuid : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"client_uuid",b"client_uuid"]) -> None: ...
type___PushEndContextRequest = PushEndContextRequest

class PushEndContextReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def err(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        err : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"err",b"err"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"err",b"err"]) -> None: ...
type___PushEndContextReply = PushEndContextReply

class ErrorProto(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    code: builtin___int = ...
    message: typing___Text = ...

    def __init__(self,
        *,
        code : typing___Optional[builtin___int] = None,
        message : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"code",b"code",u"message",b"message"]) -> None: ...
type___ErrorProto = ErrorProto

class BranchRecord(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    name: typing___Text = ...
    commit: typing___Text = ...

    def __init__(self,
        *,
        name : typing___Optional[typing___Text] = None,
        commit : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"name",b"name"]) -> None: ...
type___BranchRecord = BranchRecord

class HashRecord(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    type: typing___Text = ...
    digest: typing___Text = ...

    def __init__(self,
        *,
        type : typing___Optional[typing___Text] = None,
        digest : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"digest",b"digest",u"type",b"type"]) -> None: ...
type___HashRecord = HashRecord

class CommitRecord(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    parent: builtin___bytes = ...
    ref: builtin___bytes = ...
    spec: builtin___bytes = ...

    def __init__(self,
        *,
        parent : typing___Optional[builtin___bytes] = None,
        ref : typing___Optional[builtin___bytes] = None,
        spec : typing___Optional[builtin___bytes] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"parent",b"parent",u"ref",b"ref",u"spec",b"spec"]) -> None: ...
type___CommitRecord = CommitRecord

class SchemaRecord(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    digest: typing___Text = ...
    blob: builtin___bytes = ...

    def __init__(self,
        *,
        digest : typing___Optional[typing___Text] = None,
        blob : typing___Optional[builtin___bytes] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"blob",b"blob",u"digest",b"digest"]) -> None: ...
type___SchemaRecord = SchemaRecord

class DataOriginRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    digest: typing___Text = ...

    def __init__(self,
        *,
        digest : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"digest",b"digest"]) -> None: ...
type___DataOriginRequest = DataOriginRequest

class DataOriginReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    class CompressionOptsEntry(google___protobuf___message___Message):
        DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
        key: typing___Text = ...
        value: typing___Text = ...

        def __init__(self,
            *,
            key : typing___Optional[typing___Text] = None,
            value : typing___Optional[typing___Text] = None,
            ) -> None: ...
        def ClearField(self, field_name: typing_extensions___Literal[u"key",b"key",u"value",b"value"]) -> None: ...
    type___CompressionOptsEntry = CompressionOptsEntry

    location: type___DataLocationValue = ...
    data_type: type___DataTypeValue = ...
    digest: typing___Text = ...
    uri: typing___Text = ...
    compression: builtin___bool = ...

    @property
    def compression_opts(self) -> typing___MutableMapping[typing___Text, typing___Text]: ...

    def __init__(self,
        *,
        location : typing___Optional[type___DataLocationValue] = None,
        data_type : typing___Optional[type___DataTypeValue] = None,
        digest : typing___Optional[typing___Text] = None,
        uri : typing___Optional[typing___Text] = None,
        compression : typing___Optional[builtin___bool] = None,
        compression_opts : typing___Optional[typing___Mapping[typing___Text, typing___Text]] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"compression",b"compression",u"compression_opts",b"compression_opts",u"data_type",b"data_type",u"digest",b"digest",u"location",b"location",u"uri",b"uri"]) -> None: ...
type___DataOriginReply = DataOriginReply

class PushFindDataOriginRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    data_type: type___DataTypeValue = ...
    digest: typing___Text = ...
    compression_is_desired: builtin___bool = ...

    def __init__(self,
        *,
        data_type : typing___Optional[type___DataTypeValue] = None,
        digest : typing___Optional[typing___Text] = None,
        compression_is_desired : typing___Optional[builtin___bool] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"compression_is_desired",b"compression_is_desired",u"data_type",b"data_type",u"digest",b"digest"]) -> None: ...
type___PushFindDataOriginRequest = PushFindDataOriginRequest

class PushFindDataOriginReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    class CompressionOptsExpectedEntry(google___protobuf___message___Message):
        DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
        key: typing___Text = ...
        value: typing___Text = ...

        def __init__(self,
            *,
            key : typing___Optional[typing___Text] = None,
            value : typing___Optional[typing___Text] = None,
            ) -> None: ...
        def ClearField(self, field_name: typing_extensions___Literal[u"key",b"key",u"value",b"value"]) -> None: ...
    type___CompressionOptsExpectedEntry = CompressionOptsExpectedEntry

    digest: typing___Text = ...
    location: type___DataLocationValue = ...
    uri: typing___Text = ...
    compression_expected: builtin___bool = ...

    @property
    def compression_opts_expected(self) -> typing___MutableMapping[typing___Text, typing___Text]: ...

    def __init__(self,
        *,
        digest : typing___Optional[typing___Text] = None,
        location : typing___Optional[type___DataLocationValue] = None,
        uri : typing___Optional[typing___Text] = None,
        compression_expected : typing___Optional[builtin___bool] = None,
        compression_opts_expected : typing___Optional[typing___Mapping[typing___Text, typing___Text]] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"compression_expected",b"compression_expected",u"compression_opts_expected",b"compression_opts_expected",u"digest",b"digest",u"location",b"location",u"uri",b"uri"]) -> None: ...
type___PushFindDataOriginReply = PushFindDataOriginReply

class PingRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    def __init__(self,
        ) -> None: ...
type___PingRequest = PingRequest

class PingReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    result: typing___Text = ...

    def __init__(self,
        *,
        result : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"result",b"result"]) -> None: ...
type___PingReply = PingReply

class GetClientConfigRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    def __init__(self,
        ) -> None: ...
type___GetClientConfigRequest = GetClientConfigRequest

class GetClientConfigReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    class ConfigEntry(google___protobuf___message___Message):
        DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
        key: typing___Text = ...
        value: typing___Text = ...

        def __init__(self,
            *,
            key : typing___Optional[typing___Text] = None,
            value : typing___Optional[typing___Text] = None,
            ) -> None: ...
        def ClearField(self, field_name: typing_extensions___Literal[u"key",b"key",u"value",b"value"]) -> None: ...
    type___ConfigEntry = ConfigEntry


    @property
    def config(self) -> typing___MutableMapping[typing___Text, typing___Text]: ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        config : typing___Optional[typing___Mapping[typing___Text, typing___Text]] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"config",b"config",u"error",b"error"]) -> None: ...
type___GetClientConfigReply = GetClientConfigReply

class FetchBranchRecordRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def rec(self) -> type___BranchRecord: ...

    def __init__(self,
        *,
        rec : typing___Optional[type___BranchRecord] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> None: ...
type___FetchBranchRecordRequest = FetchBranchRecordRequest

class FetchBranchRecordReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def rec(self) -> type___BranchRecord: ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        rec : typing___Optional[type___BranchRecord] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error",u"rec",b"rec"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error",u"rec",b"rec"]) -> None: ...
type___FetchBranchRecordReply = FetchBranchRecordReply

class FetchDataRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    uri: typing___Text = ...

    def __init__(self,
        *,
        uri : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"uri",b"uri"]) -> None: ...
type___FetchDataRequest = FetchDataRequest

class FetchDataReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    uri: typing___Text = ...
    raw_data: builtin___bytes = ...
    nbytes: builtin___int = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        uri : typing___Optional[typing___Text] = None,
        raw_data : typing___Optional[builtin___bytes] = None,
        nbytes : typing___Optional[builtin___int] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error",u"nbytes",b"nbytes",u"raw_data",b"raw_data",u"uri",b"uri"]) -> None: ...
type___FetchDataReply = FetchDataReply

class FetchCommitRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit"]) -> None: ...
type___FetchCommitRequest = FetchCommitRequest

class FetchCommitReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...
    total_byte_size: builtin___int = ...

    @property
    def record(self) -> type___CommitRecord: ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        total_byte_size : typing___Optional[builtin___int] = None,
        record : typing___Optional[type___CommitRecord] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error",u"record",b"record"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"error",b"error",u"record",b"record",u"total_byte_size",b"total_byte_size"]) -> None: ...
type___FetchCommitReply = FetchCommitReply

class FetchSchemaRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def rec(self) -> type___SchemaRecord: ...

    def __init__(self,
        *,
        rec : typing___Optional[type___SchemaRecord] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> None: ...
type___FetchSchemaRequest = FetchSchemaRequest

class FetchSchemaReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def rec(self) -> type___SchemaRecord: ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        rec : typing___Optional[type___SchemaRecord] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error",u"rec",b"rec"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error",u"rec",b"rec"]) -> None: ...
type___FetchSchemaReply = FetchSchemaReply

class PushBranchRecordRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def rec(self) -> type___BranchRecord: ...

    def __init__(self,
        *,
        rec : typing___Optional[type___BranchRecord] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> None: ...
type___PushBranchRecordRequest = PushBranchRecordRequest

class PushBranchRecordReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> None: ...
type___PushBranchRecordReply = PushBranchRecordReply

class PushDataRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    uri: typing___Text = ...
    raw_data: builtin___bytes = ...
    nbytes: builtin___int = ...
    data_type: type___DataTypeValue = ...
    schema_hash: typing___Text = ...

    def __init__(self,
        *,
        uri : typing___Optional[typing___Text] = None,
        raw_data : typing___Optional[builtin___bytes] = None,
        nbytes : typing___Optional[builtin___int] = None,
        data_type : typing___Optional[type___DataTypeValue] = None,
        schema_hash : typing___Optional[typing___Text] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"data_type",b"data_type",u"nbytes",b"nbytes",u"raw_data",b"raw_data",u"schema_hash",b"schema_hash",u"uri",b"uri"]) -> None: ...
type___PushDataRequest = PushDataRequest

class PushDataReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> None: ...
type___PushDataReply = PushDataReply

class PushCommitRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...
    total_byte_size: builtin___int = ...

    @property
    def record(self) -> type___CommitRecord: ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        total_byte_size : typing___Optional[builtin___int] = None,
        record : typing___Optional[type___CommitRecord] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"record",b"record"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"record",b"record",u"total_byte_size",b"total_byte_size"]) -> None: ...
type___PushCommitRequest = PushCommitRequest

class PushCommitReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> None: ...
type___PushCommitReply = PushCommitReply

class PushSchemaRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def rec(self) -> type___SchemaRecord: ...

    def __init__(self,
        *,
        rec : typing___Optional[type___SchemaRecord] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"rec",b"rec"]) -> None: ...
type___PushSchemaRequest = PushSchemaRequest

class PushSchemaReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> None: ...
type___PushSchemaReply = PushSchemaReply

class FindMissingCommitsRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commits: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text] = ...

    @property
    def branch(self) -> type___BranchRecord: ...

    def __init__(self,
        *,
        commits : typing___Optional[typing___Iterable[typing___Text]] = None,
        branch : typing___Optional[type___BranchRecord] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"branch",b"branch"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"branch",b"branch",u"commits",b"commits"]) -> None: ...
type___FindMissingCommitsRequest = FindMissingCommitsRequest

class FindMissingCommitsReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commits: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text] = ...

    @property
    def branch(self) -> type___BranchRecord: ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        commits : typing___Optional[typing___Iterable[typing___Text]] = None,
        branch : typing___Optional[type___BranchRecord] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"branch",b"branch",u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"branch",b"branch",u"commits",b"commits",u"error",b"error"]) -> None: ...
type___FindMissingCommitsReply = FindMissingCommitsReply

class FindMissingHashRecordsRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...
    hashs: builtin___bytes = ...
    total_byte_size: builtin___int = ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        hashs : typing___Optional[builtin___bytes] = None,
        total_byte_size : typing___Optional[builtin___int] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"hashs",b"hashs",u"total_byte_size",b"total_byte_size"]) -> None: ...
type___FindMissingHashRecordsRequest = FindMissingHashRecordsRequest

class FindMissingHashRecordsReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...
    hashs: builtin___bytes = ...
    total_byte_size: builtin___int = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        hashs : typing___Optional[builtin___bytes] = None,
        total_byte_size : typing___Optional[builtin___int] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"error",b"error",u"hashs",b"hashs",u"total_byte_size",b"total_byte_size"]) -> None: ...
type___FindMissingHashRecordsReply = FindMissingHashRecordsReply

class FindMissingSchemasRequest(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...
    schema_digests: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text] = ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        schema_digests : typing___Optional[typing___Iterable[typing___Text]] = None,
        ) -> None: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"schema_digests",b"schema_digests"]) -> None: ...
type___FindMissingSchemasRequest = FindMissingSchemasRequest

class FindMissingSchemasReply(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    commit: typing___Text = ...
    schema_digests: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text] = ...

    @property
    def error(self) -> type___ErrorProto: ...

    def __init__(self,
        *,
        commit : typing___Optional[typing___Text] = None,
        schema_digests : typing___Optional[typing___Iterable[typing___Text]] = None,
        error : typing___Optional[type___ErrorProto] = None,
        ) -> None: ...
    def HasField(self, field_name: typing_extensions___Literal[u"error",b"error"]) -> builtin___bool: ...
    def ClearField(self, field_name: typing_extensions___Literal[u"commit",b"commit",u"error",b"error",u"schema_digests",b"schema_digests"]) -> None: ...
type___FindMissingSchemasReply = FindMissingSchemasReply


================================================
FILE: src/hangar/remote/hangar_service_pb2_grpc.py
================================================
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
import grpc

from . import hangar_service_pb2 as hangar__service__pb2


class HangarServiceStub(object):
    """Missing associated documentation comment in .proto file"""

    def __init__(self, channel):
        """Constructor.

        Args:
            channel: A grpc.Channel.
        """
        self.PING = channel.unary_unary(
                '/hangar.HangarService/PING',
                request_serializer=hangar__service__pb2.PingRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PingReply.FromString,
                )
        self.GetClientConfig = channel.unary_unary(
                '/hangar.HangarService/GetClientConfig',
                request_serializer=hangar__service__pb2.GetClientConfigRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.GetClientConfigReply.FromString,
                )
        self.FetchBranchRecord = channel.unary_unary(
                '/hangar.HangarService/FetchBranchRecord',
                request_serializer=hangar__service__pb2.FetchBranchRecordRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FetchBranchRecordReply.FromString,
                )
        self.FetchData = channel.unary_stream(
                '/hangar.HangarService/FetchData',
                request_serializer=hangar__service__pb2.FetchDataRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FetchDataReply.FromString,
                )
        self.FetchCommit = channel.unary_stream(
                '/hangar.HangarService/FetchCommit',
                request_serializer=hangar__service__pb2.FetchCommitRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FetchCommitReply.FromString,
                )
        self.FetchSchema = channel.unary_unary(
                '/hangar.HangarService/FetchSchema',
                request_serializer=hangar__service__pb2.FetchSchemaRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FetchSchemaReply.FromString,
                )
        self.PushBranchRecord = channel.unary_unary(
                '/hangar.HangarService/PushBranchRecord',
                request_serializer=hangar__service__pb2.PushBranchRecordRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushBranchRecordReply.FromString,
                )
        self.PushData = channel.stream_unary(
                '/hangar.HangarService/PushData',
                request_serializer=hangar__service__pb2.PushDataRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushDataReply.FromString,
                )
        self.PushCommit = channel.stream_unary(
                '/hangar.HangarService/PushCommit',
                request_serializer=hangar__service__pb2.PushCommitRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushCommitReply.FromString,
                )
        self.PushSchema = channel.unary_unary(
                '/hangar.HangarService/PushSchema',
                request_serializer=hangar__service__pb2.PushSchemaRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushSchemaReply.FromString,
                )
        self.FetchFindMissingCommits = channel.unary_unary(
                '/hangar.HangarService/FetchFindMissingCommits',
                request_serializer=hangar__service__pb2.FindMissingCommitsRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FindMissingCommitsReply.FromString,
                )
        self.FetchFindMissingHashRecords = channel.stream_stream(
                '/hangar.HangarService/FetchFindMissingHashRecords',
                request_serializer=hangar__service__pb2.FindMissingHashRecordsRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FindMissingHashRecordsReply.FromString,
                )
        self.FetchFindMissingSchemas = channel.unary_unary(
                '/hangar.HangarService/FetchFindMissingSchemas',
                request_serializer=hangar__service__pb2.FindMissingSchemasRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FindMissingSchemasReply.FromString,
                )
        self.PushFindMissingCommits = channel.unary_unary(
                '/hangar.HangarService/PushFindMissingCommits',
                request_serializer=hangar__service__pb2.FindMissingCommitsRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FindMissingCommitsReply.FromString,
                )
        self.PushFindMissingHashRecords = channel.stream_stream(
                '/hangar.HangarService/PushFindMissingHashRecords',
                request_serializer=hangar__service__pb2.FindMissingHashRecordsRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FindMissingHashRecordsReply.FromString,
                )
        self.PushFindMissingSchemas = channel.unary_unary(
                '/hangar.HangarService/PushFindMissingSchemas',
                request_serializer=hangar__service__pb2.FindMissingSchemasRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.FindMissingSchemasReply.FromString,
                )
        self.FetchFindDataOrigin = channel.stream_stream(
                '/hangar.HangarService/FetchFindDataOrigin',
                request_serializer=hangar__service__pb2.DataOriginRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.DataOriginReply.FromString,
                )
        self.PushFindDataOrigin = channel.stream_stream(
                '/hangar.HangarService/PushFindDataOrigin',
                request_serializer=hangar__service__pb2.PushFindDataOriginRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushFindDataOriginReply.FromString,
                )
        self.PushBeginContext = channel.unary_unary(
                '/hangar.HangarService/PushBeginContext',
                request_serializer=hangar__service__pb2.PushBeginContextRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushBeginContextReply.FromString,
                )
        self.PushEndContext = channel.unary_unary(
                '/hangar.HangarService/PushEndContext',
                request_serializer=hangar__service__pb2.PushEndContextRequest.SerializeToString,
                response_deserializer=hangar__service__pb2.PushEndContextReply.FromString,
                )


class HangarServiceServicer(object):
    """Missing associated documentation comment in .proto file"""

    def PING(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def GetClientConfig(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchBranchRecord(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchData(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchCommit(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchSchema(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushBranchRecord(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushData(self, request_iterator, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushCommit(self, request_iterator, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushSchema(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchFindMissingCommits(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchFindMissingHashRecords(self, request_iterator, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchFindMissingSchemas(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushFindMissingCommits(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushFindMissingHashRecords(self, request_iterator, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushFindMissingSchemas(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def FetchFindDataOrigin(self, request_iterator, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushFindDataOrigin(self, request_iterator, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushBeginContext(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')

    def PushEndContext(self, request, context):
        """Missing associated documentation comment in .proto file"""
        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
        context.set_details('Method not implemented!')
        raise NotImplementedError('Method not implemented!')


def add_HangarServiceServicer_to_server(servicer, server):
    rpc_method_handlers = {
            'PING': grpc.unary_unary_rpc_method_handler(
                    servicer.PING,
                    request_deserializer=hangar__service__pb2.PingRequest.FromString,
                    response_serializer=hangar__service__pb2.PingReply.SerializeToString,
            ),
            'GetClientConfig': grpc.unary_unary_rpc_method_handler(
                    servicer.GetClientConfig,
                    request_deserializer=hangar__service__pb2.GetClientConfigRequest.FromString,
                    response_serializer=hangar__service__pb2.GetClientConfigReply.SerializeToString,
            ),
            'FetchBranchRecord': grpc.unary_unary_rpc_method_handler(
                    servicer.FetchBranchRecord,
                    request_deserializer=hangar__service__pb2.FetchBranchRecordRequest.FromString,
                    response_serializer=hangar__service__pb2.FetchBranchRecordReply.SerializeToString,
            ),
            'FetchData': grpc.unary_stream_rpc_method_handler(
                    servicer.FetchData,
                    request_deserializer=hangar__service__pb2.FetchDataRequest.FromString,
                    response_serializer=hangar__service__pb2.FetchDataReply.SerializeToString,
            ),
            'FetchCommit': grpc.unary_stream_rpc_method_handler(
                    servicer.FetchCommit,
                    request_deserializer=hangar__service__pb2.FetchCommitRequest.FromString,
                    response_serializer=hangar__service__pb2.FetchCommitReply.SerializeToString,
            ),
            'FetchSchema': grpc.unary_unary_rpc_method_handler(
                    servicer.FetchSchema,
                    request_deserializer=hangar__service__pb2.FetchSchemaRequest.FromString,
                    response_serializer=hangar__service__pb2.FetchSchemaReply.SerializeToString,
            ),
            'PushBranchRecord': grpc.unary_unary_rpc_method_handler(
                    servicer.PushBranchRecord,
                    request_deserializer=hangar__service__pb2.PushBranchRecordRequest.FromString,
                    response_serializer=hangar__service__pb2.PushBranchRecordReply.SerializeToString,
            ),
            'PushData': grpc.stream_unary_rpc_method_handler(
                    servicer.PushData,
                    request_deserializer=hangar__service__pb2.PushDataRequest.FromString,
                    response_serializer=hangar__service__pb2.PushDataReply.SerializeToString,
            ),
            'PushCommit': grpc.stream_unary_rpc_method_handler(
                    servicer.PushCommit,
                    request_deserializer=hangar__service__pb2.PushCommitRequest.FromString,
                    response_serializer=hangar__service__pb2.PushCommitReply.SerializeToString,
            ),
            'PushSchema': grpc.unary_unary_rpc_method_handler(
                    servicer.PushSchema,
                    request_deserializer=hangar__service__pb2.PushSchemaRequest.FromString,
                    response_serializer=hangar__service__pb2.PushSchemaReply.SerializeToString,
            ),
            'FetchFindMissingCommits': grpc.unary_unary_rpc_method_handler(
                    servicer.FetchFindMissingCommits,
                    request_deserializer=hangar__service__pb2.FindMissingCommitsRequest.FromString,
                    response_serializer=hangar__service__pb2.FindMissingCommitsReply.SerializeToString,
            ),
            'FetchFindMissingHashRecords': grpc.stream_stream_rpc_method_handler(
                    servicer.FetchFindMissingHashRecords,
                    request_deserializer=hangar__service__pb2.FindMissingHashRecordsRequest.FromString,
                    response_serializer=hangar__service__pb2.FindMissingHashRecordsReply.SerializeToString,
            ),
            'FetchFindMissingSchemas': grpc.unary_unary_rpc_method_handler(
                    servicer.FetchFindMissingSchemas,
                    request_deserializer=hangar__service__pb2.FindMissingSchemasRequest.FromString,
                    response_serializer=hangar__service__pb2.FindMissingSchemasReply.SerializeToString,
            ),
            'PushFindMissingCommits': grpc.unary_unary_rpc_method_handler(
                    servicer.PushFindMissingCommits,
                    request_deserializer=hangar__service__pb2.FindMissingCommitsRequest.FromString,
                    response_serializer=hangar__service__pb2.FindMissingCommitsReply.SerializeToString,
            ),
            'PushFindMissingHashRecords': grpc.stream_stream_rpc_method_handler(
                    servicer.PushFindMissingHashRecords,
                    request_deserializer=hangar__service__pb2.FindMissingHashRecordsRequest.FromString,
                    response_serializer=hangar__service__pb2.FindMissingHashRecordsReply.SerializeToString,
            ),
            'PushFindMissingSchemas': grpc.unary_unary_rpc_method_handler(
                    servicer.PushFindMissingSchemas,
                    request_deserializer=hangar__service__pb2.FindMissingSchemasRequest.FromString,
                    response_serializer=hangar__service__pb2.FindMissingSchemasReply.SerializeToString,
            ),
            'FetchFindDataOrigin': grpc.stream_stream_rpc_method_handler(
                    servicer.FetchFindDataOrigin,
                    request_deserializer=hangar__service__pb2.DataOriginRequest.FromString,
                    response_serializer=hangar__service__pb2.DataOriginReply.SerializeToString,
            ),
            'PushFindDataOrigin': grpc.stream_stream_rpc_method_handler(
                    servicer.PushFindDataOrigin,
                    request_deserializer=hangar__service__pb2.PushFindDataOriginRequest.FromString,
                    response_serializer=hangar__service__pb2.PushFindDataOriginReply.SerializeToString,
            ),
            'PushBeginContext': grpc.unary_unary_rpc_method_handler(
                    servicer.PushBeginContext,
                    request_deserializer=hangar__service__pb2.PushBeginContextRequest.FromString,
                    response_serializer=hangar__service__pb2.PushBeginContextReply.SerializeToString,
            ),
            'PushEndContext': grpc.unary_unary_rpc_method_handler(
                    servicer.PushEndContext,
                    request_deserializer=hangar__service__pb2.PushEndContextRequest.FromString,
                    response_serializer=hangar__service__pb2.PushEndContextReply.SerializeToString,
            ),
    }
    generic_handler = grpc.method_handlers_generic_handler(
            'hangar.HangarService', rpc_method_handlers)
    server.add_generic_rpc_handlers((generic_handler,))


 # This class is part of an EXPERIMENTAL API.
class HangarService(object):
    """Missing associated documentation comment in .proto file"""

    @staticmethod
    def PING(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PING',
            hangar__service__pb2.PingRequest.SerializeToString,
            hangar__service__pb2.PingReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def GetClientConfig(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/GetClientConfig',
            hangar__service__pb2.GetClientConfigRequest.SerializeToString,
            hangar__service__pb2.GetClientConfigReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchBranchRecord(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/FetchBranchRecord',
            hangar__service__pb2.FetchBranchRecordRequest.SerializeToString,
            hangar__service__pb2.FetchBranchRecordReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchData(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_stream(request, target, '/hangar.HangarService/FetchData',
            hangar__service__pb2.FetchDataRequest.SerializeToString,
            hangar__service__pb2.FetchDataReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchCommit(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_stream(request, target, '/hangar.HangarService/FetchCommit',
            hangar__service__pb2.FetchCommitRequest.SerializeToString,
            hangar__service__pb2.FetchCommitReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchSchema(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/FetchSchema',
            hangar__service__pb2.FetchSchemaRequest.SerializeToString,
            hangar__service__pb2.FetchSchemaReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushBranchRecord(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PushBranchRecord',
            hangar__service__pb2.PushBranchRecordRequest.SerializeToString,
            hangar__service__pb2.PushBranchRecordReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushData(request_iterator,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.stream_unary(request_iterator, target, '/hangar.HangarService/PushData',
            hangar__service__pb2.PushDataRequest.SerializeToString,
            hangar__service__pb2.PushDataReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushCommit(request_iterator,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.stream_unary(request_iterator, target, '/hangar.HangarService/PushCommit',
            hangar__service__pb2.PushCommitRequest.SerializeToString,
            hangar__service__pb2.PushCommitReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushSchema(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PushSchema',
            hangar__service__pb2.PushSchemaRequest.SerializeToString,
            hangar__service__pb2.PushSchemaReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchFindMissingCommits(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/FetchFindMissingCommits',
            hangar__service__pb2.FindMissingCommitsRequest.SerializeToString,
            hangar__service__pb2.FindMissingCommitsReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchFindMissingHashRecords(request_iterator,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.stream_stream(request_iterator, target, '/hangar.HangarService/FetchFindMissingHashRecords',
            hangar__service__pb2.FindMissingHashRecordsRequest.SerializeToString,
            hangar__service__pb2.FindMissingHashRecordsReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchFindMissingSchemas(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/FetchFindMissingSchemas',
            hangar__service__pb2.FindMissingSchemasRequest.SerializeToString,
            hangar__service__pb2.FindMissingSchemasReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushFindMissingCommits(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PushFindMissingCommits',
            hangar__service__pb2.FindMissingCommitsRequest.SerializeToString,
            hangar__service__pb2.FindMissingCommitsReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushFindMissingHashRecords(request_iterator,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.stream_stream(request_iterator, target, '/hangar.HangarService/PushFindMissingHashRecords',
            hangar__service__pb2.FindMissingHashRecordsRequest.SerializeToString,
            hangar__service__pb2.FindMissingHashRecordsReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushFindMissingSchemas(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PushFindMissingSchemas',
            hangar__service__pb2.FindMissingSchemasRequest.SerializeToString,
            hangar__service__pb2.FindMissingSchemasReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def FetchFindDataOrigin(request_iterator,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.stream_stream(request_iterator, target, '/hangar.HangarService/FetchFindDataOrigin',
            hangar__service__pb2.DataOriginRequest.SerializeToString,
            hangar__service__pb2.DataOriginReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushFindDataOrigin(request_iterator,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.stream_stream(request_iterator, target, '/hangar.HangarService/PushFindDataOrigin',
            hangar__service__pb2.PushFindDataOriginRequest.SerializeToString,
            hangar__service__pb2.PushFindDataOriginReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushBeginContext(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PushBeginContext',
            hangar__service__pb2.PushBeginContextRequest.SerializeToString,
            hangar__service__pb2.PushBeginContextReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)

    @staticmethod
    def PushEndContext(request,
            target,
            options=(),
            channel_credentials=None,
            call_credentials=None,
            compression=None,
            wait_for_ready=None,
            timeout=None,
            metadata=None):
        return grpc.experimental.unary_unary(request, target, '/hangar.HangarService/PushEndContext',
            hangar__service__pb2.PushEndContextRequest.SerializeToString,
            hangar__service__pb2.PushEndContextReply.FromString,
            options, channel_credentials,
            call_credentials, compression, wait_for_ready, timeout, metadata)


================================================
FILE: src/hangar/remote/header_manipulator_client_interceptor.py
================================================
"""Interceptor that adds headers to outgoing requests

Portions of this code have been taken and modified from the "gRPC" project.

URL:      https://github.com/grpc/grpc/
File:     examples/python/interceptors/default_value/header_manipulator_client_interceptor.py
Commit:   87cd994b0477e98c976e7b321b3c1f52666ab5e0
Accessed: 23 APR 2019

gRPC License
-------------------------------------------------------------------------------
Copyright 2017 gRPC authors.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of the
License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
from collections import namedtuple

import grpc


class _GenericClientInterceptor(
        grpc.UnaryUnaryClientInterceptor, grpc.UnaryStreamClientInterceptor,
        grpc.StreamUnaryClientInterceptor, grpc.StreamStreamClientInterceptor):
    """Base class for interceptors that operate on all RPC types."""

    def __init__(self, interceptor_function):
        self._fn = interceptor_function

    def intercept_unary_unary(self, continuation, client_call_details, request):
        new_details, new_request_iterator, postprocess = self._fn(
            client_call_details, iter((request,)), False, False)
        response = continuation(new_details, next(new_request_iterator))
        return postprocess(response) if postprocess else response

    def intercept_unary_stream(self, continuation, client_call_details,
                               request):
        new_details, new_request_iterator, postprocess = self._fn(
            client_call_details, iter((request,)), False, True)
        response_it = continuation(new_details, next(new_request_iterator))
        return postprocess(response_it) if postprocess else response_it

    def intercept_stream_unary(self, continuation, client_call_details,
                               request_iterator):
        new_details, new_request_iterator, postprocess = self._fn(
            client_call_details, request_iterator, True, False)
        response = continuation(new_details, new_request_iterator)
        return postprocess(response) if postprocess else response

    def intercept_stream_stream(self, continuation, client_call_details,
                                request_iterator):
        new_details, new_request_iterator, postprocess = self._fn(
            client_call_details, request_iterator, True, True)
        response_it = continuation(new_details, new_request_iterator)
        return postprocess(response_it) if postprocess else response_it


def create_client_interceptor(intercept_call):
    return _GenericClientInterceptor(intercept_call)


class _ClientCallDetails(
        namedtuple(
            typename='_ClientCallDetails',
            field_names=('method', 'timeout', 'metadata', 'credentials')),
        grpc.ClientCallDetails):
    pass


def header_adder_interceptor(header, value):
    """Interceptor that adds headers to outgoing requests."""

    def intercept_call(client_call_details, request_iterator, request_streaming,
                       response_streaming):
        metadata = []
        if client_call_details.metadata is not None:
            metadata = list(client_call_details.metadata)

        if (header != '') and (value != ''):
            metadata.append((header, value))
        client_call_details = _ClientCallDetails(
            client_call_details.method,
            client_call_details.timeout,
            metadata,
            client_call_details.credentials)
        return client_call_details, request_iterator, None

    return create_client_interceptor(intercept_call)

================================================
FILE: src/hangar/remote/request_header_validator_interceptor.py
================================================
"""Interceptor that ensures a specific header is present.

Portions of this code have been taken and modified from the "gRPC" project.

URL:      https://github.com/grpc/grpc/
File:     examples/python/interceptors/default_value/default_value_client_interceptor.py
Commit:   6146151a4fe1e28921c12d1ae5635e113a24b9d7
Accessed: 23 APR 2019

gRPC License
-------------------------------------------------------------------------------
Copyright 2017 gRPC authors.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of the
License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed
under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.
"""
from os.path import split

import grpc


SERVICE_METHOD_TYPES = {
    'PING': 'uu',
    'GetClientConfig': 'uu',
    'FetchBranchRecord': 'uu',
    'FetchData': 'us',
    'FetchCommit': 'us',
    'FetchSchema': 'uu',
    'PushBranchRecord': 'uu',
    'PushData': 'su',
    'PushCommit': 'su',
    'PushSchema': 'uu',
    'FetchFindMissingCommits': 'uu',
    'FetchFindMissingHashRecords': 'ss',
    'FetchFindMissingSchemas': 'uu',
    'PushFindMissingCommits': 'uu',
    'PushFindMissingHashRecords': 'ss',
    'PushFindMissingSchemas': 'uu',
    'FetchFindDataOrigin': 'ss',
    'PushFindDataOrigin': 'ss',
    'PushBeginContext': 'uu',
    'PushEndContext': 'uu',
}


def _unary_unary_rpc_terminator(code, details):
    def terminate(ignored_request, context):
        context.abort(code, details)
    return grpc.unary_unary_rpc_method_handler(terminate)


def _unary_stream_rpc_terminator(code, details):  # pragma: no cover
    def terminate(ignored_request, context):
        context.abort(code, details)
    return grpc.unary_stream_rpc_method_handler(terminate)


def _stream_unary_rpc_terminator(code, details):  # pragma: no cover
    def terminate(ignored_request, context):
        context.abort(code, details)
    return grpc.unary_stream_rpc_method_handler(terminate)


def _stream_stream_rpc_terminator(code, details):  # pragma: no cover
    def terminate(ignored_request, context):
        context.abort(code, details)
    return grpc.stream_stream_rpc_method_handler(terminate)


def _select_rpc_terminator(intercepted_method):
    method_type = SERVICE_METHOD_TYPES[intercepted_method]

    if method_type == 'uu':
        return _unary_unary_rpc_terminator
    elif method_type == 'su':  # pragma: no cover
        return _stream_unary_rpc_terminator
    elif method_type == 'us':  # pragma: no cover
        return _unary_stream_rpc_terminator
    elif method_type == 'ss':  # pragma: no cover
        return _stream_stream_rpc_terminator
    else:                      # pragma: no cover
        raise ValueError(f'unknown method type: {method_type} for service: {intercepted_method}')


class RequestHeaderValidatorInterceptor(grpc.ServerInterceptor):

    def __init__(self, push_restricted, header, value, code, details):
        self._push_restricted = push_restricted
        self._header = header
        self._value = value
        self._code = code
        self._details = details

    def intercept_service(self, continuation, handler_call_details):
        _, intercepted_method = split(handler_call_details.method)
        print(f'intercepted method: {intercepted_method}')

        if (intercepted_method.startswith('Push') is True) and (self._push_restricted is True):
            if (self._header, self._value) in handler_call_details.invocation_metadata:
                return continuation(handler_call_details)
            else:
                return _select_rpc_terminator(intercepted_method)(self._code, self._details)
        else:
            return continuation(handler_call_details)


================================================
FILE: src/hangar/remote/server.py
================================================
import configparser
import os
import shutil
import tempfile
import traceback
import warnings
from concurrent import futures
from os.path import join as pjoin
from pathlib import Path
from pprint import pprint as pp
from threading import Lock
from typing import Union, Iterable

import blosc
import grpc
import lmdb

from . import (
    chunks,
    hangar_service_pb2,
    hangar_service_pb2_grpc,
    request_header_validator_interceptor,
)
from .content import ContentWriter, DataWriter
from .. import constants as c
from ..backends import BACKEND_ACCESSOR_MAP, backend_decoder
from ..context import Environments
from ..records import (
    commiting,
    hashs,
    heads,
    parsing,
    queries,
    summarize,
    hash_schema_db_key_from_raw_key,
    hash_data_db_key_from_raw_key,
)
from ..records.hashmachine import hash_func_from_tcode
from ..txnctx import TxnRegister
from ..utils import set_blosc_nthreads

set_blosc_nthreads()


def server_config(server_dir, *, create: bool = True) -> configparser.ConfigParser:
    CFG = configparser.ConfigParser()
    dst_dir = Path(server_dir)
    dst_path = dst_dir.joinpath(c.CONFIG_SERVER_NAME)
    if dst_path.is_file():
        CFG.read(dst_path)
        print(f'Found Config File at {dst_path}')
    else:
        if create:
            dst_dir.mkdir(exist_ok=True)
            print(f'Creating Server Config File in {dst_path}')
            src_path = Path(os.path.dirname(__file__), c.CONFIG_SERVER_NAME)
            shutil.copyfile(src_path, dst_path)
            CFG.read(src_path)
        else:
            src_path = Path(os.path.dirname(__file__), c.CONFIG_SERVER_NAME)
            CFG.read(src_path)
    return CFG


def context_abort_with_exception_traceback(
        context: grpc.ServicerContext,
        exc: Exception,
        status_code: grpc.StatusCode
):
    context.abort(
        code=status_code,
        details=(f'Exception Type: {type(exc)} \n'
                 f'Exception Message: {exc} \n'
                 f'Traceback: \n {traceback.format_tb(exc.__traceback__)}'))


def context_abort_with_handled_error(
        context: grpc.ServicerContext,
        message: str, status_code:
        grpc.StatusCode
):
    context.abort(code=status_code, details=message)


class HangarServer(hangar_service_pb2_grpc.HangarServiceServicer):

    def __init__(self, repo_path: Union[str, bytes, Path], overwrite=False):

        if isinstance(repo_path, (str, bytes)):
            repo_path = Path(repo_path)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', UserWarning)
            envs = Environments(pth=repo_path)
        self.env: Environments = envs
        self.data_writer_lock = Lock()
        self.hash_reader_lock = Lock()

        try:
            self.env.init_repo(
                user_name='SERVER_USER',
                user_email='SERVER_USER@HANGAR.SERVER',
                remove_old=overwrite)
        except OSError:
            pass

        self._rFs = {}
        for backend, accessor in BACKEND_ACCESSOR_MAP.items():
            if accessor is not None:
                self._rFs[backend] = accessor(
                    repo_path=self.env.repo_path,
                    schema_shape=None,
                    schema_dtype=None)
                self._rFs[backend].open(mode='r')

        self.CFG = server_config(repo_path, create=True)
        print(f'Server Started with Config:')
        pp({k: dict(v) for k, v in self.CFG.items()})
        self.txnregister = TxnRegister()
        self.repo_path = self.env.repo_path
        self.data_dir = pjoin(self.repo_path, c.DIR_DATA)
        self.CW = ContentWriter(self.env)
        self.DW = DataWriter(self.env)

    def close(self):
        for backend_accessor in self._rFs.values():
            backend_accessor.close()
        self.env._close_environments()

    # -------------------- Client Config --------------------------------------

    def PING(self, request, context):
        """Test function. PING -> PONG!
        """
        reply = hangar_service_pb2.PingReply(result='PONG')
        return reply

    def GetClientConfig(self, request, context):
        """Return parameters to the client to set up channel options as desired by the server.
        """
        clientCFG = self.CFG['CLIENT_GRPC']
        push_max_nbytes = clientCFG['push_max_nbytes']
        enable_compression = clientCFG['enable_compression']
        optimization_target = clientCFG['optimization_target']

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.GetClientConfigReply(error=err)
        reply.config['push_max_nbytes'] = push_max_nbytes
        reply.config['enable_compression'] = enable_compression
        reply.config['optimization_target'] = optimization_target
        return reply

    # -------------------- Branch Record --------------------------------------

    def FetchBranchRecord(self, request, context):
        """Return the current HEAD commit of a particular branch
        """
        branch_name = request.rec.name
        try:
            head = heads.get_branch_head_commit(self.env.branchenv, branch_name)
            rec = hangar_service_pb2.BranchRecord(name=branch_name, commit=head)
            err = hangar_service_pb2.ErrorProto(code=0, message='OK')
            reply = hangar_service_pb2.FetchBranchRecordReply(rec=rec, error=err)
            return reply
        except ValueError:
            msg = f'BRANCH: {branch_name} DOES NOT EXIST ON SERVER.'
            context_abort_with_handled_error(
                context=context, message=msg, status_code=grpc.StatusCode.NOT_FOUND)
            return

    def PushBranchRecord(self, request, context):
        """Update the HEAD commit of a branch, creating the record if not previously existing.
        """
        branch_name = request.rec.name
        commit = request.rec.commit
        branch_names = heads.get_branch_names(self.env.branchenv)
        if branch_name not in branch_names:
            heads.create_branch(self.env.branchenv, name=branch_name, base_commit=commit)
            err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        else:
            current_head = heads.get_branch_head_commit(self.env.branchenv, branch_name)
            if current_head == commit:
                msg = f'NO CHANGE TO BRANCH: {branch_name} WITH HEAD: {current_head}'
                context_abort_with_handled_error(
                    context=context, message=msg, status_code=grpc.StatusCode.ALREADY_EXISTS)
                return
            else:
                heads.set_branch_head_commit(self.env.branchenv, branch_name, commit)
                err = hangar_service_pb2.ErrorProto(code=0, message='OK')

        reply = hangar_service_pb2.PushBranchRecordReply(error=err)
        return reply

    # -------------------------- Commit Record --------------------------------

    def FetchCommit(self, request, context):
        """Return raw data representing contents, spec, and parents of a commit hash.
        """
        commit = request.commit
        commitRefKey = parsing.commit_ref_db_key_from_raw_key(commit)
        commitParentKey = parsing.commit_parent_db_key_from_raw_key(commit)
        commitSpecKey = parsing.commit_spec_db_key_from_raw_key(commit)

        reftxn = self.txnregister.begin_reader_txn(self.env.refenv)
        try:
            commitRefVal = reftxn.get(commitRefKey, default=False)
            commitParentVal = reftxn.get(commitParentKey, default=False)
            commitSpecVal = reftxn.get(commitSpecKey, default=False)
        finally:
            self.txnregister.abort_reader_txn(self.env.refenv)

        if commitRefVal is False:
            msg = f'COMMIT: {commit} DOES NOT EXIST ON SERVER'
            context.set_details(msg)
            context.set_code(grpc.StatusCode.NOT_FOUND)
            err = hangar_service_pb2.ErrorProto(code=5, message=msg)
            reply = hangar_service_pb2.FetchCommitReply(commit=commit, error=err)
            yield reply
            raise StopIteration()
        else:
            raw_data_chunks = chunks.chunk_bytes(commitRefVal)
            bsize = len(commitRefVal)
            commit_proto = hangar_service_pb2.CommitRecord()
            commit_proto.parent = commitParentVal
            commit_proto.spec = commitSpecVal
            reply = hangar_service_pb2.FetchCommitReply(commit=commit, total_byte_size=bsize)
            for chunk in raw_data_chunks:
                commit_proto.ref = chunk
                reply.record.CopyFrom(commit_proto)
                yield reply

    def PushCommit(self, request_iterator, context):
        """Record the contents of a new commit sent to the server.

        Will not overwrite data if a commit hash is already recorded on the server.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                refBytes, offset = bytearray(request.total_byte_size), 0
                specVal = request.record.spec
                parentVal = request.record.parent
            size = len(request.record.ref)
            refBytes[offset: offset + size] = request.record.ref
            offset += size

        digest = self.CW.commit(commit, parentVal, specVal, refBytes)
        if not digest:
            msg = f'COMMIT: {commit} ALREADY EXISTS'
            context.set_code(grpc.StatusCode.ALREADY_EXISTS)
            context.set_details(msg)
            err = hangar_service_pb2.ErrorProto(code=6, message=msg)
        else:
            err = hangar_service_pb2.ErrorProto(code=0, message='OK')
            commiting.move_process_data_to_store(self.env.repo_path, remote_operation=True)

        reply = hangar_service_pb2.PushCommitReply(error=err)
        return reply

    # --------------------- Schema Record -------------------------------------

    def FetchSchema(self, request, context):
        """Return the raw byte specification of a particular schema with requested hash.
        """
        schema_hash = request.rec.digest
        schemaKey = hash_schema_db_key_from_raw_key(schema_hash)
        hashTxn = self.txnregister.begin_reader_txn(self.env.hashenv)
        try:
            schemaExists = hashTxn.get(schemaKey, default=False)
            if schemaExists is not False:
                print(f'found schema: {schema_hash}')
                rec = hangar_service_pb2.SchemaRecord(digest=schema_hash, blob=schemaExists)
                err = hangar_service_pb2.ErrorProto(code=0, message='OK')
            else:
                print(f'not exists: {schema_hash}')
                msg = f'SCHEMA HASH: {schema_hash} DOES NOT EXIST ON SERVER'
                context.set_details(msg)
                context.set_code(grpc.StatusCode.NOT_FOUND)
                err = hangar_service_pb2.ErrorProto(code=5, message=msg)
                rec = hangar_service_pb2.SchemaRecord(digest=schema_hash)
        finally:
            self.txnregister.abort_reader_txn(self.env.hashenv)

        reply = hangar_service_pb2.FetchSchemaReply(rec=rec, error=err)
        return reply

    def PushSchema(self, request, context):
        """Add a new schema byte specification record.

        Will not overwrite a schema hash which already exists on the server.
        """
        schema_hash = request.rec.digest
        schema_val = request.rec.blob

        digest = self.CW.schema(schema_hash, schema_val)
        if not digest:
            print(f'exists: {schema_val}')
            msg = f'SCHEMA: {schema_hash} ALREADY EXISTS ON SERVER'
            context.set_details(msg)
            context.set_code(grpc.StatusCode.ALREADY_EXISTS)
            err = hangar_service_pb2.ErrorProto(code=6, message=msg)
        else:
            print(f'created new: {schema_val}')
            err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.PushSchemaReply(error=err)
        return reply

    # ---------------------------- Data ---------------------------------------

    def FetchFindDataOrigin(self, request_iterator, context):
        digests = []
        for request in request_iterator:
            digests.append(request.digest)

        hashTxn = self.txnregister.begin_reader_txn(self.env.hashenv)
        try:
            for digest in digests:
                hashKey = hash_data_db_key_from_raw_key(digest)
                hashVal = hashTxn.get(hashKey, default=False)
                if hashVal is False:
                    msg = f'HASH DOES NOT EXIST: {hashKey}'
                    context.set_details(msg)
                    context.set_code(grpc.StatusCode.NOT_FOUND)
                    err = hangar_service_pb2.ErrorProto(code=5, message=msg)
                    reply = hangar_service_pb2.FetchDataReply(error=err)
                    yield reply
                    raise StopIteration()
                else:
                    spec = backend_decoder(hashVal)
                    if spec.backend in ['01', '00', '10']:
                        dtype = hangar_service_pb2.DataType.NP_ARRAY
                    elif spec.backend == '30':
                        dtype = hangar_service_pb2.DataType.STR
                    elif spec.backend == '31':
                        dtype = hangar_service_pb2.DataType.BYTES
                    else:
                        raise TypeError(spec)

                    response = hangar_service_pb2.DataOriginReply(
                        location=hangar_service_pb2.DataLocation.REMOTE_SERVER,
                        data_type=dtype,
                        digest=digest,
                        uri=digest,
                        compression=True,
                    )
                    response.compression_opts['id'] = 'blosc'
                    response.compression_opts['cname'] = 'blosclz'
                    response.compression_opts['clevel'] = '3'
                    yield response

        finally:
            self.txnregister.abort_reader_txn(self.env.hashenv)

    def FetchData(self, request, context):
        """Return a packed byte representation of samples corresponding to a digest.

        Please see comments below which explain why not all requests are
        guaranteed to fully complete in one operation.

        We receive a list of digests to send to the client. One consideration
        we have is that there is no way to know how much memory will be used
        when the data is read from disk. Samples are compressed against
        each-other before going over the wire, which means its preferable to
        read in as much as possible. However, since we don't want to overload
        the client system when the binary blob is decompressed into individual
        tensors, we set some maximum size which tensors can occupy when
        uncompressed. When we receive a list of digests whose data size is in
        excess of this limit, we just say sorry to the client, send the chunk
        of digests/tensors off to them as is (incomplete), and request that
        the client figure out what it still needs and ask us again.
        """
        uri = request.uri
        hashKey = hash_data_db_key_from_raw_key(uri)
        try:
            with self.hash_reader_lock:
                hashTxn = self.txnregister.begin_reader_txn(self.env.hashenv)
                hashVal = hashTxn.get(hashKey, default=False)
                self.txnregister.abort_reader_txn(self.env.hashenv)
        except Exception as e:
            context_abort_with_exception_traceback(
                context=context, exc=e, status_code=grpc.StatusCode.INTERNAL)
            raise e

        if hashVal is False:
            exc = FileNotFoundError(f'request uri does not exist. URI: {uri}')
            context_abort_with_exception_traceback(
                context=context, exc=exc, status_code=grpc.StatusCode.NOT_FOUND)

        spec = backend_decoder(hashVal)
        data = self._rFs[spec.backend].read_data(spec)
        dtype_code, raw_record = chunks.serialize_data(data)
        compressed_record = blosc.compress(
            raw_record, clevel=3, cname='blosclz', shuffle=blosc.NOSHUFFLE)

        def replies_iterator(raw, uri, error_proto):
            reply = hangar_service_pb2.FetchDataReply(
                uri=uri,
                nbytes=len(raw),
                error=error_proto)
            for raw_chunk in chunks.chunk_bytes(raw):
                reply.raw_data = raw_chunk
                yield reply

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        repliesIter = replies_iterator(compressed_record, uri, err)
        yield from repliesIter

    def PushFindDataOrigin(
            self,
            request_iterator: Iterable[hangar_service_pb2.PushFindDataOriginRequest],
            context
    ) -> hangar_service_pb2.PushFindDataOriginReply:

        CONFIG_SEND_LOCATION = hangar_service_pb2.DataLocation.REMOTE_SERVER

        all_requests = [req for req in request_iterator]
        for request in all_requests:
            if request.compression_is_desired is True:
                reply_compression_expected = True
                if request.data_type == hangar_service_pb2.DataType.NP_ARRAY:
                    reply_compression_opts_expected = {
                        'id': 'blosc',
                        'cname': 'blosclz',
                        'clevel': '3'
                    }
                elif request.data_type == hangar_service_pb2.DataType.STR:
                    reply_compression_opts_expected = {
                        'id': 'blosc',
                        'cname': 'zstd',
                        'clevel': '3'
                    }
                elif request.data_type == hangar_service_pb2.DataType.BYTES:
                    reply_compression_opts_expected = {
                        'id': 'blosc',
                        'cname': 'blosclz',
                        'clevel': '3'
                    }
                else:
                    raise TypeError(request)
            else:
                reply_compression_expected = False
                reply_compression_opts_expected = {}

            if CONFIG_SEND_LOCATION == hangar_service_pb2.DataLocation.REMOTE_SERVER:
                reply_uri = request.digest
            else:
                raise RuntimeError(f'CONFIG_SEND_LOCATION: {CONFIG_SEND_LOCATION}')

            reply = hangar_service_pb2.PushFindDataOriginReply(
                digest=request.digest,
                location=CONFIG_SEND_LOCATION,
                uri=reply_uri,
                compression_expected=reply_compression_expected,
                compression_opts_expected=reply_compression_opts_expected,
            )
            yield reply

    def PushBeginContext(self, request, context):
        try:
            self.DW.__enter__()
        except Exception as e:
            context.abort(
                code=grpc.StatusCode.INTERNAL,
                details=(f'Exception Type: {type(e)} \n'
                         f'Exception Message: {e} \n'
                         f'Traceback: \n {traceback.format_tb(e.__traceback__)}')
            )
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.PushBeginContextReply(err=err)
        return reply

    def PushEndContext(self, request, context):
        try:
            self.DW.__exit__()
        except Exception as e:
            context.abort(
                code=grpc.StatusCode.INTERNAL,
                details=(f'Exception Type: {type(e)} \n'
                         f'Exception Message: {e} \n'
                         f'Traceback: \n {traceback.format_tb(e.__traceback__)}')
            )
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.PushEndContextReply(err=err)
        return reply

    def PushData(
            self,
            request_iterator: Iterable[hangar_service_pb2.PushDataRequest],
            context: grpc.ServicerContext
    ) -> hangar_service_pb2.PushDataReply:
        """Receive compressed streams of binary data from the client.

        In order to prevent errors or malicious behavior, the cryptographic hash
        of every tensor is calculated and compared to what the client "said" it
        is. If an error is detected, no sample in the entire stream will be
        saved to disk.
        """

        for idx, request in enumerate(request_iterator):
            if idx == 0:
                if not self.DW.is_cm:
                    context.abort(
                        code=grpc.StatusCode.FAILED_PRECONDITION,
                        details=f'Attept to push without opening context'
                    )
                uri = request.uri
                dtype_code = request.data_type
                schema_hash = request.schema_hash
                dBytes = bytearray(request.nbytes)
                offset = 0
            size = len(request.raw_data)
            dBytes[offset: offset + size] = request.raw_data
            offset += size

        # TODO: Handle expected vs required
        uncompBytes = blosc.decompress(dBytes)

        recieved_data = chunks.deserialize_data(dtype_code, uncompBytes)
        hash_func = hash_func_from_tcode(str(dtype_code))
        recieved_hash = hash_func(recieved_data)

        # TODO: uri is not the correct name for this
        if recieved_hash != uri:
            context.abort(
                code=grpc.StatusCode.DATA_LOSS,
                details=f'HASH MANGLED, received: {recieved_hash} != expected digest: {uri}'
            )
        try:
            with self.data_writer_lock:
                _ = self.DW.data(schema_hash, data_digest=recieved_hash, data=recieved_data)  # returns saved)_digests
        except Exception as e:
            context.abort(
                code=grpc.StatusCode.INTERNAL,
                details=(f'Exception Type: {type(e)} \n'
                         f'Exception Message: {e} \n'
                         f'Traceback: \n {traceback.format_tb(e.__traceback__)}')
            )
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.PushDataReply(error=err)
        return reply

    # ------------------------ Fetch Find Missing -----------------------------------

    def FetchFindMissingCommits(self, request, context):
        """Determine commit digests existing on the server which are not present on the client.
        """
        c_branch_name = request.branch.name
        c_ordered_commits = request.commits

        try:
            s_history = summarize.list_history(
                refenv=self.env.refenv,
                branchenv=self.env.branchenv,
                branch_name=c_branch_name)
        except ValueError:
            msg = f'BRANCH NOT EXIST. Name: {c_branch_name}'
            context.set_code(grpc.StatusCode.NOT_FOUND)
            context.set_details(msg)
            err = hangar_service_pb2.ErrorProto(code=5, message=msg)
            reply = hangar_service_pb2.FindMissingCommitsReply(error=err)
            return reply

        s_orderset = set(s_history['order'])
        c_orderset = set(c_ordered_commits)
        c_missing = list(s_orderset.difference(c_orderset))   # only difference to PushFindMissingCommits

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        if len(c_missing) == 0:
            brch = hangar_service_pb2.BranchRecord(name=c_branch_name, commit=s_history['head'])
            reply = hangar_service_pb2.FindMissingCommitsReply(branch=brch, error=err)
        else:
            brch = hangar_service_pb2.BranchRecord(name=c_branch_name, commit=s_history['head'])
            reply = hangar_service_pb2.FindMissingCommitsReply(branch=brch, error=err)
            reply.commits.extend(c_missing)

        return reply

    def PushFindMissingCommits(self, request, context):
        """Determine commit digests existing on the client which are not present on the server.
        """
        c_branch_name = request.branch.name
        c_head_commit = request.branch.commit
        c_ordered_commits = request.commits

        s_commits = commiting.list_all_commits(self.env.refenv)
        s_orderset = set(s_commits)
        c_orderset = set(c_ordered_commits)
        s_missing = list(c_orderset.difference(s_orderset))  # only difference to FetchFindMissingCommits

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        if len(s_missing) == 0:
            brch = hangar_service_pb2.BranchRecord(name=c_branch_name, commit=c_head_commit)
            reply = hangar_service_pb2.FindMissingCommitsReply(branch=brch, error=err)
        else:
            brch = hangar_service_pb2.BranchRecord(name=c_branch_name, commit=c_head_commit)
            reply = hangar_service_pb2.FindMissingCommitsReply(branch=brch, error=err)
            reply.commits.extend(s_missing)

        return reply

    def FetchFindMissingHashRecords(self, request_iterator, context):
        """Determine data tensor hash records existing on the server and not on the client.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                hBytes, offset = bytearray(request.total_byte_size), 0
            size = len(request.hashs)
            hBytes[offset: offset + size] = request.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        c_hashs_raw = chunks.deserialize_record_pack(uncompBytes)
        c_hashset = set([chunks.deserialize_ident(raw).digest for raw in c_hashs_raw])

        with tempfile.TemporaryDirectory() as tempD:
            tmpDF = os.path.join(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
            commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
            s_hashes_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
            s_hashes = set(s_hashes_schemas.keys())
            tmpDB.close()

        c_missing = list(s_hashes.difference(c_hashset))
        c_hash_schemas_raw = [chunks.serialize_ident(c_mis, s_hashes_schemas[c_mis]) for c_mis in c_missing]
        raw_pack = chunks.serialize_record_pack(c_hash_schemas_raw)
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        response_pb = hangar_service_pb2.FindMissingHashRecordsReply
        cIter = chunks.missingHashIterator(commit, raw_pack, err, response_pb)
        yield from cIter

    def PushFindMissingHashRecords(self, request_iterator, context):
        """Determine data tensor hash records existing on the client and not on the server.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                hBytes, offset = bytearray(request.total_byte_size), 0
            size = len(request.hashs)
            hBytes[offset: offset + size] = request.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        c_hashs_raw = chunks.deserialize_record_pack(uncompBytes)
        c_hashset = set([chunks.deserialize_ident(raw).digest for raw in c_hashs_raw])
        s_hashset = set(hashs.HashQuery(self.env.hashenv).list_all_hash_keys_raw())
        s_missing = c_hashset.difference(s_hashset)
        s_hashs_raw = [chunks.serialize_ident(s_mis, '') for s_mis in s_missing]
        raw_pack = chunks.serialize_record_pack(s_hashs_raw)

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        response_pb = hangar_service_pb2.FindMissingHashRecordsReply
        cIter = chunks.missingHashIterator(commit, raw_pack, err, response_pb)
        yield from cIter

    def FetchFindMissingSchemas(self, request, context):
        """Determine schema hash digest records existing on the server and not on the client.
        """
        commit = request.commit
        c_schemas = set(request.schema_digests)

        with tempfile.TemporaryDirectory() as tempD:
            tmpDF = os.path.join(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
            commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
            s_schemas = set(queries.RecordQuery(tmpDB).schema_hashes())
            tmpDB.close()

        c_missing = list(s_schemas.difference(c_schemas))
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.FindMissingSchemasReply(commit=commit, error=err)
        reply.schema_digests.extend(c_missing)
        return reply

    def PushFindMissingSchemas(self, request, context):
        """Determine schema hash digest records existing on the client and not on the server.
        """
        commit = request.commit
        c_schemas = set(request.schema_digests)
        s_schemas = set(hashs.HashQuery(self.env.hashenv).list_all_schema_digests())
        s_missing = list(c_schemas.difference(s_schemas))

        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        reply = hangar_service_pb2.FindMissingSchemasReply(commit=commit, error=err)
        reply.schema_digests.extend(s_missing)
        return reply


def serve(hangar_path: str,
          overwrite: bool = False,
          *,
          channel_address: str = None,
          restrict_push: bool = None,
          username: str = None,
          password: str = None) -> tuple:
    """Start serving the GRPC server. Should only be called once.

    Raises:
        e: critical error from one of the workers.
    """

    # ------------------- Configure Server ------------------------------------

    server_dir = pjoin(hangar_path, c.DIR_HANGAR_SERVER)
    CFG = server_config(server_dir, create=False)
    serverCFG = CFG['SERVER_GRPC']
    enable_compression = serverCFG['enable_compression']
    if enable_compression == 'NoCompression':
        compression_val = grpc.Compression.NoCompression
    elif enable_compression == 'Deflate':
        compression_val = grpc.Compression.Deflate
    elif enable_compression == 'Gzip':
        compression_val = grpc.Compression.Gzip
    else:
        compression_val = grpc.Compression.NoCompression

    optimization_target = serverCFG['optimization_target']
    if channel_address is None:
        channel_address = serverCFG['channel_address']
    max_thread_pool_workers = int(serverCFG['max_thread_pool_workers'])
    max_concurrent_rpcs = int(serverCFG['max_concurrent_rpcs'])

    adminCFG = CFG['SERVER_ADMIN']
    if (restrict_push is None) and (username is None) and (password is None):
        admin_restrict_push = bool(int(adminCFG['restrict_push']))
        admin_username = adminCFG['username']
        admin_password = adminCFG['password']
    else:
        admin_restrict_push = restrict_push
        admin_username = username
        admin_password = password
    msg = 'PERMISSION ERROR: PUSH OPERATIONS RESTRICTED FOR CALLER'
    code = grpc.StatusCode.PERMISSION_DENIED
    interc = request_header_validator_interceptor.RequestHeaderValidatorInterceptor(
        admin_restrict_push, admin_username, admin_password, code, msg)

    # ---------------- Start the thread pool for the grpc server --------------

    grpc_thread_pool = futures.ThreadPoolExecutor(
        max_workers=max_thread_pool_workers,
        thread_name_prefix='grpc_thread_pool')
    server = grpc.server(
        thread_pool=grpc_thread_pool,
        maximum_concurrent_rpcs=max_concurrent_rpcs,
        options=[('grpc.optimization_target', optimization_target)],
        compression=compression_val,
        interceptors=(interc,))

    # ------------------- Start the GRPC server -------------------------------

    hangserv = HangarServer(server_dir, overwrite)
    hangar_service_pb2_grpc.add_HangarServiceServicer_to_server(hangserv, server)
    port = server.add_insecure_port(channel_address)
    if port == 0:
        server.stop(0.1)
        server.wait_for_termination(timeout=2)
        raise OSError(f'Unable to bind port, adddress {channel_address} already in use.')
    return (server, hangserv, channel_address)


if __name__ == '__main__':
    workdir = os.getcwd()
    print(workdir)
    serve(workdir)


================================================
FILE: src/hangar/remotes.py
================================================
import logging
import tempfile
import time
import warnings
from collections import defaultdict
from contextlib import closing
from pathlib import Path
from typing import (
    List, NamedTuple, Optional, Sequence, Union, Tuple, Set, Dict
)

import grpc
import lmdb
from tqdm import tqdm

from .backends import backend_decoder
from .constants import LMDB_SETTINGS
from .context import Environments
from .records import hash_data_db_key_from_raw_key
from .records import heads, queries, summarize
from .records.commiting import (
    check_commit_hash_in_history,
    move_process_data_to_store,
    unpack_commit_ref,
)
from .remote.client import HangarClient
from .remote.content import ContentWriter, ContentReader, DataWriter
from .txnctx import TxnRegister
from .utils import is_suitable_user_key

logger = logging.getLogger(__name__)

RemoteInfo = NamedTuple('RemoteInfo', [('name', str), ('address', str)])

KeyType = Union[str, int]


class Remotes(object):
    """Class which governs access to remote interactor objects.

    .. note::

       The remote-server implementation is under heavy development, and is
       likely to undergo changes in the Future. While we intend to ensure
       compatability between software versions of Hangar repositories written
       to disk, the API is likely to change. Please follow our process at:
       https://www.github.com/tensorwerk/hangar-py

    """

    def __init__(self, env: Environments):

        self._env: Environments = env
        self._repo_path: Path = self._env.repo_path
        self._client: Optional[HangarClient] = None

    def __verify_repo_initialized(self):
        """Internal method to verify repo initialized before operations occur

        Raises
        ------
        RuntimeError
            If the repository db environments have not been initialized at the
            specified repo path.
        """
        if not self._env.repo_is_initialized:
            raise RuntimeError(
                f'Path {self._repo_path} not Hangar Repo. Use `init_repo()` method')

    def add(self, name: str, address: str) -> RemoteInfo:
        """Add a remote to the repository accessible by `name` at `address`.

        Parameters
        ----------
        name
            the name which should be used to refer to the remote server
            (i.e. 'origin')
        address
            the IP:PORT where the hangar server is running

        Returns
        -------
        RemoteInfo
            Two-tuple containing (``name``, ``address``) of the remote added to
            the client's server list.

        Raises
        ------
        ValueError
            If provided name contains any non ascii letter characters
            characters, or if the string is longer than 64 characters long.
        ValueError
            If a remote with the provided name is already listed on this client,
            No-Op. In order to update a remote server address, it must be
            removed and then re-added with the desired address.
        """
        self.__verify_repo_initialized()
        if (not isinstance(name, str)) or (not is_suitable_user_key(name)):
            raise ValueError(
                f'Remote name {name} of type: {type(name)} invalid. Must be '
                f'string with only alpha-numeric (or "." "_" "-") ascii characters. '
                f'Must be <= 64 characters long.')

        succ = heads.add_remote(self._env.branchenv, name=name, address=address)
        if succ is False:
            raise ValueError(f'No-Op: Remote named: {name} already exists.')
        return RemoteInfo(name=name, address=address)

    def remove(self, name: str) -> RemoteInfo:
        """Remove a remote repository from the branch records

        Parameters
        ----------
        name
            name of the remote to remove the reference to

        Raises
        ------
        KeyError
            If a remote with the provided name does not exist

        Returns
        -------
        RemoteInfo
            The channel address which was removed at the given remote name
        """
        self.__verify_repo_initialized()
        try:
            address = heads.remove_remote(branchenv=self._env.branchenv, name=name)
        except KeyError as e:
            raise e
        return RemoteInfo(name=name, address=address)

    def list_all(self) -> List[RemoteInfo]:
        """List all remote names and addresses recorded in the client's repository.

        Returns
        -------
        List[RemoteInfo]
            list of namedtuple specifying (``name``, ``address``) for each
            remote server recorded in the client repo.
        """
        self.__verify_repo_initialized()
        res = []
        names = heads.get_remote_names(self._env.branchenv)
        for name in names:
            address = heads.get_remote_address(self._env.branchenv, name)
            res.append(RemoteInfo(name=name, address=address))
        return res

    def ping(self, name: str) -> float:
        """Ping remote server and check the round trip time.

        Parameters
        ----------
        name
            name of the remote server to ping

        Returns
        -------
        float
            round trip time it took to ping the server after the connection was
            established and requested client configuration was retrieved

        Raises
        ------
        KeyError
            If no remote with the provided name is recorded.
        ConnectionError
            If the remote server could not be reached.
        """
        self.__verify_repo_initialized()
        address = heads.get_remote_address(branchenv=self._env.branchenv, name=name)
        self._client = HangarClient(envs=self._env, address=address)
        with closing(self._client) as client:
            client: HangarClient
            start = time.time()
            client.ping_pong()
            elapsed = time.time() - start
        return elapsed

    def fetch(self, remote: str, branch: str) -> str:
        """Retrieve new commits made on a remote repository branch.

        This is semantically identical to a `git fetch` command. Any new commits
        along the branch will be retrieved, but placed on an isolated branch to
        the local copy (ie. ``remote_name/branch_name``). In order to unify
        histories, simply merge the remote branch into the local branch.

        Parameters
        ----------
        remote
            name of the remote repository to fetch from (ie. ``origin``)
        branch
            name of the branch to fetch the commit references for.

        Returns
        -------
        str
            Name of the branch which stores the retrieved commits.
        """
        self.__verify_repo_initialized()
        address = heads.get_remote_address(self._env.branchenv, name=remote)
        self._client = HangarClient(envs=self._env, address=address)
        CW = ContentWriter(self._env)

        with closing(self._client) as client:
            client: HangarClient

            # ----------------- setup / validate operations -------------------

            try:
                cHEAD = heads.get_branch_head_commit(self._env.branchenv, branch)
            except ValueError:
                # branch does not exist on local client
                try:
                    s_branch = client.fetch_branch_record(branch)
                    sHEAD = s_branch.rec.commit
                except grpc.RpcError as rpc_error:
                    if rpc_error.code() == grpc.StatusCode.NOT_FOUND:
                        # branch does not exist on remote
                        logger.error(rpc_error.details())
                    raise rpc_error
            else:
                c_bhistory = summarize.list_history(
                    self._env.refenv, self._env.branchenv, branch_name=branch)
                try:
                    s_branch = client.fetch_branch_record(branch)
                    sHEAD = s_branch.rec.commit
                except grpc.RpcError as rpc_error:
                    if rpc_error.code() == grpc.StatusCode.NOT_FOUND:
                        # branch does not exist on remote
                        logger.error(rpc_error.details())
                    raise rpc_error

                # verify histories are intact and should be synced
                if sHEAD == cHEAD:
                    warnings.warn(f'NoOp:  {sHEAD} == client HEAD {cHEAD}', UserWarning)
                    return branch
                elif sHEAD in c_bhistory['order']:
                    warnings.warn(
                        f'REJECTED: remote HEAD: {sHEAD} behind local: {cHEAD}', UserWarning)
                    return branch

            # ------------------- get data ------------------------------------

            mCmtResponse = client.fetch_find_missing_commits(branch)
            m_cmts = mCmtResponse.commits
            for commit in tqdm(m_cmts, desc='fetching commit data refs'):
                mSchemaResponse = client.fetch_find_missing_schemas(commit)
                for schema in mSchemaResponse.schema_digests:
                    schema_hash, schemaVal = client.fetch_schema(schema)
                    CW.schema(schema_hash, schemaVal)
                # Record missing data hash digests (does not get data itself)
                m_hashes = client.fetch_find_missing_hash_records(commit)
                m_schema_hash_map = defaultdict(list)
                for digest, schema_hash in m_hashes:
                    m_schema_hash_map[schema_hash].append((digest, schema_hash))

                DW = DataWriter(self._env)
                with DW as DW_CM:
                    for schema_hash, m_digests_schemas in m_schema_hash_map.items():
                        for data_digest, data_schema_hash in m_digests_schemas:
                            DW_CM.data(schema_hash,
                                       data_digest=data_digest,
                                       data=data_schema_hash,
                                       backend='50')

            # Get missing commit reference specification
            for commit in tqdm(m_cmts, desc='fetching commit spec'):
                cmt, parentVal, specVal, refVal = client.fetch_commit_record(commit)
                CW.commit(cmt, parentVal, specVal, refVal)

            # --------------------------- At completion -----------------------

            # Update (or create) remote branch pointer with new HEAD commit
            fetchBranchName = f'{remote}/{branch}'
            try:
                heads.create_branch(
                    self._env.branchenv, name=fetchBranchName, base_commit=sHEAD)
            except ValueError:
                heads.set_branch_head_commit(
                    self._env.branchenv, branch_name=fetchBranchName, commit_hash=sHEAD)

            return fetchBranchName

    def fetch_data_sample(self,
                          remote: str,
                          column: str,
                          samples: Union[KeyType, Sequence[KeyType],
                                         Sequence[Union[Tuple[KeyType, KeyType], Tuple[KeyType], KeyType]]],
                          branch: Optional[str] = None,
                          commit: Optional[str] = None) -> str:
        """Granular fetch data operation allowing selection of individual samples.

        .. warning::

            This is a specialized version of the :meth:`fetch_data` method for use
            in specilized situations where some prior knowledge is known about the data.
            Most users should prefer :meth:`fetch_data` over this version.

        In some cases, it may be desireable to only perform a fetch data operation
        for some particular samples within a column (without needing to download any
        other data contained in the column). This method allows for the granular
        specification of keys to fetch in a certain column at the selected `branch` /
        `commit` time point.

        Parameters
        ----------
        remote
            name of the remote server to pull data from
        column
            name of the column which data is being fetched from.
        samples
            Key, or sequence of sample keys to select.

            *  Flat column layouts should provide just a single key, or flat sequence of
               keys which will be fetched from the server. ie. `sample1` OR
               [`sample1`, `sample2`, `sample3`, etc.]

            *  Nested column layouts can provide tuples specifying `(sample, subsample)`
               records to retrieve, tuples with an `Ellipsis` character in the `subsample`
               index `(sample, ...)` (which will fetch all subsamples for the given sample),
               or can provide lone sample keys in the sequences `sample` (which will also fetch
               all subsamples listed under the sample) OR ANY COMBINATION of the above.
        branch
            branch head to operate on, either ``branch`` or ``commit`` argument must be
            passed, but NOT both. Default is ``None``
        commit
            commit to operate on, either `branch` or `commit` argument must be passed,
            but NOT both.

        Returns
        -------
        str
            On success, the commit hash which data was fetched into.
        """
        self.__verify_repo_initialized()
        address = heads.get_remote_address(branchenv=self._env.branchenv, name=remote)
        self._client = HangarClient(envs=self._env, address=address)

        # ----------------- setup / validate operations -----------------------

        if all([branch, commit]):
            raise ValueError(f'``branch`` and ``commit`` args cannot be set simultaneously')
        if branch is not None:
            cmt = heads.get_branch_head_commit(self._env.branchenv, branch_name=branch)
        else:
            cmt = commit
            cmtExist = check_commit_hash_in_history(self._env.refenv, commit)
            if not cmtExist:
                raise ValueError(f'specified commit: {commit} does not exist in the repo.')

        if not isinstance(samples, (list, tuple)):
            samples = (samples,)

        # ------------------ Determine which data to fetch --------------------

        with tempfile.TemporaryDirectory() as tempD:
            # share unpacked ref db between dependent methods
            tmpDF = Path(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=str(tmpDF), **LMDB_SETTINGS)
            try:
                with tmpDB.begin(write=True) as txn:
                    with txn.cursor() as curs:
                        notEmpty = curs.first()
                        while notEmpty:
                            notEmpty = curs.delete()
                unpack_commit_ref(self._env.refenv, tmpDB, cmt)
                recQuery = queries.RecordQuery(tmpDB)
                selectedDataRecords = self._select_digests_fetch_data_sample(
                    cmt=cmt, column=column, recQuery=recQuery, samples=samples
                )
            finally:
                tmpDB.close()

            m_schema_hash_map = self._form_missing_schema_digest_map(
                selectedDataRecords=selectedDataRecords, hashenv=self._env.hashenv
            )

            # -------------------- download missing data --------------------------

            DW = DataWriter(self._env)
            total_data = sum(len(v) for v in m_schema_hash_map.values())
            with closing(self._client) as client, tqdm(total=total_data, desc='fetching data') as pbar,  DW as DW_CM:
                client: HangarClient  # type hint
                for schema in m_schema_hash_map.keys():
                    hashes = set(m_schema_hash_map[schema])
                    origins = client.fetch_data_origin(hashes)
                    client.fetch_data(
                        origins=origins,
                        datawriter_cm=DW_CM,
                        schema=schema,
                        pbar=pbar)

            move_process_data_to_store(self._repo_path, remote_operation=True)
            return cmt

    @staticmethod
    def _select_digests_fetch_data_sample(
            cmt: str,
            column: str,
            recQuery: queries.RecordQuery,
            samples: Union[KeyType, Sequence[KeyType],
                           Sequence[Union[Tuple[KeyType, KeyType], Tuple[KeyType], KeyType]]]
    ) -> Set[queries.DataRecordVal]:
        """Map sample keys to data record digest

        Depending on column layout, the mapping of samples -> digests
        is handled differently.

        "flat" columns:
            There is a direct map of sample key -> digest. If a sample
            does not exist in the column, it is a key error.
        "nested" column:
            There is a layered mapping of sample key -> subsamples -> digests
            We take the approach that only specifying a sample key results
            in fetching all subsamples contained under it.

        Parameters
        ----------
        cmt
            commit which is being operated on
        column
            column name
        recQuery
            record query object set up with necessary `dataenv`
        samples
            specified samples to query

        Returns
        -------
        Set[queries.DataRecordVal]
            data records which should be fetched (includes digests)
        """
        # handle column_names option
        cmt_column_names = recQuery.column_names()
        if column not in cmt_column_names:
            raise KeyError(f'column name {column} does not exist in repo at commit {cmt}')

        selectedDataRecords = set()
        column_layout = recQuery.column_schema_layout(column=column)
        if column_layout == 'flat':
            sampleRecords = {}
            for keyRecord, dataRecord in recQuery.column_data_records(column):
                sampleRecords[keyRecord.sample] = dataRecord
            for _key in samples:
                if isinstance(_key, (str, int)):
                    selectedDataRecords.add(sampleRecords[_key])
                else:
                    raise TypeError(_key)

        elif column_layout == 'nested':
            sampleRecords = defaultdict(dict)
            for keyRecord, dataRecord in recQuery.column_data_records(column):
                sampleRecords[keyRecord.sample].update(
                    {keyRecord.subsample: dataRecord}
                )

            for _key in samples:
                if isinstance(_key, (list, tuple)):
                    if len(_key) == 2:
                        # sequence specifying `(sample, subsample)`
                        if _key[1] == Ellipsis:
                            # Ellipsis indicator ``...`` is intepreted as:
                            # "get all subsamples under this sample key"
                            for _spec in sampleRecords[_key[0]].values():
                                selectedDataRecords.add(_spec)
                        else:
                            # otherwise "get sample + subsample named as specified"
                            selectedDataRecords.add(sampleRecords[_key[0]][_key[1]])
                    elif len(_key) == 1:
                        # sequence specifying `(sample,)` interpreted as:
                        # "get all subsamples under this key"
                        for _spec in sampleRecords[_key[0]].values():
                            selectedDataRecords.add(_spec)
                    else:
                        raise ValueError(
                            f'nested column specifier sequence len() must be '
                            f'either length ``1`` or ``2``. key {_key} has length '
                            f'{len(_key)}.')
                elif isinstance(_key, (str, int)):
                    # if not sequence, then `key` == `sample`; interpreted as:
                    # "get all subsamples under this key"
                    for _spec in sampleRecords[_key].values():
                        selectedDataRecords.add(_spec)
                else:
                    raise TypeError(_key)
        return selectedDataRecords

    def fetch_data(self,
                   remote: str,
                   branch: str = None,
                   commit: str = None,
                   *,
                   column_names: Optional[Sequence[str]] = None,
                   retrieve_all_history: bool = False) -> List[str]:
        """Retrieve the data for some commit which exists in a `partial` state.

        Parameters
        ----------
        remote
            name of the remote to pull the data from
        branch
            The name of a branch whose HEAD will be used as the data fetch
            point. If None, ``commit`` argument expected, by default None
        commit
            Commit hash to retrieve data for, If None, ``branch`` argument
            expected, by default None
        column_names
            Names of the columns which should be retrieved for the particular
            commits, any columns not named will not have their data fetched
            from the server. Default behavior is to retrieve all columns
        retrieve_all_history
            if data should be retrieved for all history accessible by the parents
            of this commit HEAD. by default False

        Returns
        -------
        List[str]
            commit hashes of the data which was returned.

        Raises
        ------
        ValueError
            if branch and commit args are set simultaneously.
        ValueError
            if specified commit does not exist in the repository.
        ValueError
            if branch name does not exist in the repository.
        """
        self.__verify_repo_initialized()
        address = heads.get_remote_address(branchenv=self._env.branchenv, name=remote)
        self._client = HangarClient(envs=self._env, address=address)

        # ----------------- setup / validate operations -----------------------

        if all([branch, commit]):
            raise ValueError(f'``branch`` and ``commit`` args cannot be set simultaneously')
        if branch is not None:
            cmt = heads.get_branch_head_commit(self._env.branchenv, branch_name=branch)
        else:
            cmt = commit
            cmtExist = check_commit_hash_in_history(self._env.refenv, commit)
            if not cmtExist:
                raise ValueError(f'specified commit: {commit} does not exist in the repo.')

        # --------------- negotiate missing data to get -----------------------

        if retrieve_all_history is True:
            hist = summarize.list_history(self._env.refenv, self._env.branchenv, commit_hash=cmt)
            commits = hist['order']
        else:
            commits = [cmt]

        with tempfile.TemporaryDirectory() as tempD:
            # share unpacked ref db between dependent methods
            tmpDF = Path(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=str(tmpDF), **LMDB_SETTINGS)

            try:
                # all history argument
                selectedDataRecords = set()
                for commit in tqdm(commits, desc='counting objects'):
                    with tmpDB.begin(write=True) as txn:
                        with txn.cursor() as curs:
                            notEmpty = curs.first()
                            while notEmpty:
                                notEmpty = curs.delete()
                    unpack_commit_ref(self._env.refenv, tmpDB, commit)
                    recQuery = queries.RecordQuery(tmpDB)
                    commitDataRecords = self._select_digest_fetch_data(
                        column_names=column_names, recQuery=recQuery
                    )
                    selectedDataRecords.update(commitDataRecords)
            finally:
                tmpDB.close()

        m_schema_hash_map = self._form_missing_schema_digest_map(
            selectedDataRecords=selectedDataRecords, hashenv=self._env.hashenv
        )

        # -------------------- download missing data --------------------------

        DW = DataWriter(self._env)
        total_data = sum(len(v) for v in m_schema_hash_map.values())

        with closing(self._client) as client, \
                tqdm(total=total_data, desc='fetching data') as pbar, \
                DW as DW_CM:
            client: HangarClient  # type hint
            for schema in m_schema_hash_map.keys():
                hashes = set(m_schema_hash_map[schema])
                origins = client.fetch_data_origin(hashes)
                client.fetch_data(
                    origins=origins,
                    datawriter_cm=DW_CM,
                    schema=schema,
                    pbar=pbar)

        move_process_data_to_store(self._repo_path, remote_operation=True)
        return commits

    @staticmethod
    def _form_missing_schema_digest_map(
            selectedDataRecords: Set[queries.DataRecordVal],
            hashenv: lmdb.Environment
    ) -> Dict[str, List[str]]:
        """Calculate mapping of schemas to data digests.

        Parameters
        ----------
        selectedDataRecords
        hashenv

        Returns
        -------
        Dict[str, List[str]]
            map of all schema digests -> sequence of all data hash digests
            registered under that schema.
        """

        try:
            hashTxn = TxnRegister().begin_reader_txn(hashenv)
            m_schema_hash_map = defaultdict(list)
            for hashVal in selectedDataRecords:
                hashKey = hash_data_db_key_from_raw_key(hashVal.digest)
                hashRef = hashTxn.get(hashKey)
                be_loc = backend_decoder(hashRef)
                if be_loc.backend == '50':
                    m_schema_hash_map[be_loc.schema_hash].append(hashVal.digest)
        finally:
            TxnRegister().abort_reader_txn(hashenv)
        return m_schema_hash_map

    @staticmethod
    def _select_digest_fetch_data(
            column_names: Union[None, Sequence[str]],
            recQuery: queries.RecordQuery
    ) -> Set[queries.DataRecordVal]:
        """Map column names to data digests.

        Parameters
        ----------
        column_names
            column names to fetch data for. If ``None``, download all column data.
        recQuery
            initialized record query object set up with appropriate ``dataenv``.

        Returns
        -------
        Set[queries.DataRecordVal]
            data records which should be fetched (includes digests)
        """
        selectedDataRecords = set()
        cmt_column_names = recQuery.column_names()
        if column_names is None:
            # handle column_names option
            cmt_columns = cmt_column_names
        else:
            cmt_columns = [col for col in column_names if col in cmt_column_names]
        for col in cmt_columns:
            cmtData_hashs = recQuery.column_data_hashes(col)
            selectedDataRecords.update(cmtData_hashs)
        return selectedDataRecords

    def push(self, remote: str, branch: str,
             *, username: str = '', password: str = '') -> str:
        """push changes made on a local repository to a remote repository.

        This method is semantically identical to a ``git push`` operation.
        Any local updates will be sent to the remote repository.

        .. note::

            The current implementation is not capable of performing a
            ``force push`` operation. As such, remote branches with diverged
            histories to the local repo must be retrieved, locally merged,
            then re-pushed. This feature will be added in the near future.

        Parameters
        ----------
        remote
            name of the remote repository to make the push on.
        branch
            Name of the branch to push to the remote. If the branch name does
            not exist on the remote, the it will be created
        username
            credentials to use for authentication if repository push restrictions
            are enabled, by default ''.
        password
            credentials to use for authentication if repository push restrictions
            are enabled, by default ''.

        Returns
        -------
        str
            Name of the branch which was pushed
        """
        self.__verify_repo_initialized()
        try:
            address = heads.get_remote_address(self._env.branchenv, name=remote)
            cHEAD = heads.get_branch_head_commit(self._env.branchenv, branch)
        except (KeyError, ValueError) as e:
            raise e from None

        CR = ContentReader(self._env)
        self._client = HangarClient(envs=self._env,
                                    address=address,
                                    auth_username=username,
                                    auth_password=password)

        # ----------------- setup / validate operations -------------------

        with closing(self._client) as client:
            client: HangarClient  # type hinting for development
            CR: ContentReader
            c_bhistory = summarize.list_history(refenv=self._env.refenv,
                                                branchenv=self._env.branchenv,
                                                branch_name=branch)
            try:
                s_branch = client.fetch_branch_record(branch)
            except grpc.RpcError as rpc_error:
                # Do not raise if error due to branch not existing on server
                if rpc_error.code() != grpc.StatusCode.NOT_FOUND:
                    raise rpc_error
            else:
                sHEAD = s_branch.rec.commit
                if sHEAD == cHEAD:
                    warnings.warn(
                        f'NoOp: server HEAD: {sHEAD} == client HEAD: {cHEAD}', UserWarning)
                    return branch
                elif (sHEAD not in c_bhistory['order']) and (sHEAD != ''):
                    warnings.warn(
                        f'REJECTED: server branch has commits not on client', UserWarning)
                    return branch

            # --------------- negotiate missing data to send -------------------

            try:
                # First push op verifies user permissions if push restricted (NOT SECURE)
                res = client.push_find_missing_commits(branch)
                m_commits = res.commits
            except grpc.RpcError as rpc_error:
                if rpc_error.code() == grpc.StatusCode.PERMISSION_DENIED:
                    raise PermissionError(f'{rpc_error.code()}: {rpc_error.details()}')
                else:
                    raise rpc_error

            m_schemas = set()
            m_schema_hashs = defaultdict(set)
            with tempfile.TemporaryDirectory() as tempD:
                tmpDF = Path(tempD, 'test.lmdb')
                tmpDB = lmdb.open(path=str(tmpDF), **LMDB_SETTINGS)
                for commit in tqdm(m_commits, desc='counting objects'):
                    # share unpacked ref db between dependent methods
                    with tmpDB.begin(write=True) as txn:
                        with txn.cursor() as curs:
                            notEmpty = curs.first()
                            while notEmpty:
                                notEmpty = curs.delete()
                    unpack_commit_ref(self._env.refenv, tmpDB, commit)
                    # schemas
                    schema_res = client.push_find_missing_schemas(commit, tmpDB=tmpDB)
                    m_schemas.update(schema_res.schema_digests)
                    # data hashs
                    m_cmt_schema_hashs = defaultdict(list)
                    mis_hashes_sch = client.push_find_missing_hash_records(commit, tmpDB=tmpDB)
                    for hsh, schema in mis_hashes_sch:
                        m_cmt_schema_hashs[schema].append(hsh)
                    for schema, hashes in m_cmt_schema_hashs.items():
                        m_schema_hashs[schema].update(hashes)
                tmpDB.close()

            # ------------------------- send data -----------------------------

            # schemas
            for m_schema in tqdm(m_schemas, desc='pushing schemas'):
                schemaVal = CR.schema(m_schema)
                if not schemaVal:
                    raise KeyError(f'no schema with hash: {m_schema} exists')
                client.push_schema(m_schema, schemaVal)
            # data
            total_data = sum([len(v) for v in m_schema_hashs.values()])
            with tqdm(total=total_data, desc='pushing data') as p:
                client.push_data_begin_context()
                try:
                    for dataSchema, dataHashes in m_schema_hashs.items():
                        client.push_data(dataSchema, dataHashes, pbar=p)
                        p.update(1)
                finally:
                    client.push_data_end_context()
            # commit refs
            for commit in tqdm(m_commits, desc='pushing commit refs'):
                cmtContent = CR.commit(commit)
                if not cmtContent:
                    raise KeyError(f'no commit with hash: {commit} exists')
                client.push_commit_record(commit=cmtContent.commit,
                                          parentVal=cmtContent.cmtParentVal,
                                          specVal=cmtContent.cmtSpecVal,
                                          refVal=cmtContent.cmtRefVal)

            # --------------------------- At completion -----------------------

            # update local remote HEAD pointer
            branchHead = heads.get_branch_head_commit(self._env.branchenv, branch)
            try:
                client.push_branch_record(branch, branchHead)
            except grpc.RpcError as rpc_error:
                # Do not raise if error due to branch not existing on server
                if rpc_error.code() != grpc.StatusCode.ALREADY_EXISTS:
                    logger.warning(f'CODE: {rpc_error.code()} DETAILS:{rpc_error.details()}')
                else:
                    raise rpc_error
            else:
                cRemoteBranch = f'{remote}/{branch}'
                if cRemoteBranch not in heads.get_branch_names(self._env.branchenv):
                    heads.create_branch(branchenv=self._env.branchenv,
                                        name=cRemoteBranch,
                                        base_commit=branchHead)
                else:
                    heads.set_branch_head_commit(branchenv=self._env.branchenv,
                                                 branch_name=cRemoteBranch,
                                                 commit_hash=branchHead)
            return branch


================================================
FILE: src/hangar/repository.py
================================================
from pathlib import Path
import weakref
import warnings
from typing import Union, Optional, List
from io import StringIO

from .merger import select_merge_algorithm
from .constants import DIR_HANGAR
from .remotes import Remotes
from .context import Environments
from .diagnostics import ecosystem, integrity
from .records import heads, parsing, summarize, vcompat, commiting
from .checkout import ReaderCheckout, WriterCheckout
from .diff import DiffAndConflicts, ReaderUserDiff
from .utils import (
    is_valid_directory_path,
    is_suitable_user_key,
    is_ascii,
    folder_size,
    format_bytes
)


class Repository(object):
    """Launching point for all user operations in a Hangar repository.

    All interaction, including the ability to initialize a repo, checkout a
    commit (for either reading or writing), create a branch, merge branches, or
    generally view the contents or state of the local repository starts here.
    Just provide this class instance with a path to an existing Hangar
    repository, or to a directory one should be initialized, and all required
    data for starting your work on the repo will automatically be populated.

        >>> from hangar import Repository
        >>> repo = Repository('foo/path/to/dir')

    Parameters
    ----------
    path : Union[str, os.PathLike]
        local directory path where the Hangar repository exists (or initialized)
    exists : bool, optional
        True if a Hangar repository should exist at the given directory path.
        Should no Hangar repository exists at that location, a UserWarning will
        be raised indicating that the :meth:`init` method needs to be called.

        False if the provided path does not need to (but optionally can) contain a
        Hangar repository.  if a Hangar repository does not exist at that path, the
        usual UserWarning will be suppressed.

        In both cases, the path must exist and the user must have sufficient OS
        permissions to write to that location. Default = True
    """

    def __init__(self, path: Union[str, Path], exists: bool = True):

        if isinstance(path, (str, bytes)):
            path = Path(path)

        try:
            usr_path = is_valid_directory_path(path)
        except (TypeError, NotADirectoryError, PermissionError) as e:
            raise e from None

        repo_pth = usr_path.joinpath(DIR_HANGAR)
        if exists is False:
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', UserWarning)
                envs = Environments(pth=repo_pth)
        else:
            envs = Environments(pth=repo_pth)

        self._repo_path: Path = repo_pth
        self._env: Environments = envs
        self._remote: Remotes = Remotes(self._env)

    def _repr_pretty_(self, p, cycle):
        """provide a pretty-printed repr for ipython based user interaction.

        Parameters
        ----------
        p : printer
            io stream printer type object which is provided via ipython
        cycle : bool
            if the pretty-printer detects a cycle or infinite loop. Not a
            concern here since we just output the text and return, no looping
            required.

        """
        self.__verify_repo_initialized()
        res = f'Hangar {self.__class__.__name__}\
               \n    Repository Path  : {self.path}\
               \n    Writer-Lock Free : {heads.writer_lock_held(self._env.branchenv)}\n'
        p.text(res)

    def __repr__(self):
        """Override the default repr to show useful information to developers.

        Note: the pprint repr (ipython enabled) is separately defined in
        :py:meth:`_repr_pretty_`. We specialize because we assume that anyone
        operating in a terminal-based interpreter is probably a more advanced
        developer-type, and expects traditional repr information instead of a
        user facing summary of the repo. Though if we're wrong, go ahead and
        feel free to reassign the attribute :) won't hurt our feelings, promise.

        Returns
        -------
        string
            formatted representation of the object
        """
        res = f'{self.__class__}(path={self._repo_path})'
        return res

    def __verify_repo_initialized(self):
        """Internal method to verify repo initialized before operations occur

        Raises
        ------
        RuntimeError
            If the repository db environments have not been initialized at the
            specified repo path.
        """
        if not self._env.repo_is_initialized:
            msg = f'Repository at path: {self._repo_path} has not been initialized. '\
                  f'Please run the `init_repo()` function'
            raise RuntimeError(msg)

    @property
    def remote(self) -> Remotes:
        """Accessor to the methods controlling remote interactions.

        .. seealso::

           :class:`Remotes` for available methods of this property

        Returns
        -------
        Remotes
            Accessor object methods for controlling remote interactions.
        """
        proxy = weakref.proxy(self._remote)
        return proxy

    @property
    def path(self) -> str:
        """Return the path to the repository on disk, read-only attribute

        Returns
        -------
        str
            path to the specified repository, not including `.hangar` directory
        """
        self.__verify_repo_initialized()
        return str(self._repo_path.parent)

    @property
    def writer_lock_held(self) -> bool:
        """Check if the writer lock is currently marked as held. Read-only attribute.

        Returns
        -------
        bool
            True is writer-lock is held, False if writer-lock is free.
        """
        self.__verify_repo_initialized()
        return not heads.writer_lock_held(self._env.branchenv)

    @property
    def version(self) -> str:
        """Find the version of Hangar software the repository is written with

        Returns
        -------
        str
            semantic version of major, minor, micro version of repo software version.
        """
        self.__verify_repo_initialized()
        res = vcompat.get_repository_software_version_spec(self._env.branchenv)
        return str(res)

    @property
    def initialized(self) -> bool:
        """
        Check if the repository has been initialized or not

        Returns
        -------
        bool
            True if repository has been initialized.
        """
        return self._env.repo_is_initialized

    @property
    def size_nbytes(self) -> int:
        """Disk space used by the repository returned in number of bytes.

            >>> repo.size_nbytes
            1234567890
            >>> print(type(repo.size_nbytes))
            

        Returns
        -------
        int
            number of bytes used by the repository on disk.
        """
        self.__verify_repo_initialized()
        return folder_size(self._repo_path, recurse=True)

    @property
    def size_human(self) -> str:
        """Disk space used by the repository returned in human readable string.

            >>> repo.size_human
            '1.23 GB'
            >>> print(type(repo.size_human))
            

        Returns
        -------
        str
            disk space used by the repository formated in human readable text.
        """
        self.__verify_repo_initialized()
        nbytes = folder_size(self._repo_path, recurse=True)
        return format_bytes(nbytes)

    def checkout(self,
                 write: bool = False,
                 *,
                 branch: str = '',
                 commit: str = '') -> Union[ReaderCheckout, WriterCheckout]:
        """Checkout the repo at some point in time in either `read` or `write` mode.

        Only one writer instance can exist at a time. Write enabled checkout
        must must create a staging area from the ``HEAD`` commit of a branch. On
        the contrary, any number of reader checkouts can exist at the same time
        and can specify either a branch name or a commit hash.

        Parameters
        ----------
        write : bool, optional
            Specify if the checkout is write capable, defaults to False
        branch : str, optional
            name of the branch to checkout. This utilizes the state of the repo
            as it existed at the branch ``HEAD`` commit when this checkout object
            was instantiated, defaults to ''
        commit : str, optional
            specific hash of a commit to use for the checkout (instead of a
            branch ``HEAD`` commit). This argument takes precedent over a branch
            name parameter if it is set. Note: this only will be used in
            non-writeable checkouts, defaults to ''

        Raises
        ------
        ValueError
            If the value of `write` argument is not boolean
        ValueError
            If ``commit`` argument is set to any value when ``write=True``.
            Only ``branch`` argument is allowed.

        Returns
        -------
        Union[ReaderCheckout, WriterCheckout]
            Checkout object which can be used to interact with the repository
            data
        """
        self.__verify_repo_initialized()
        try:
            if write is True:
                if commit != '':
                    raise ValueError(
                        f'Only `branch` argument can be set if `write=True`. '
                        f'Setting `commit={commit}` not allowed.')
                if branch == '':
                    branch = heads.get_staging_branch_head(self._env.branchenv)
                co = WriterCheckout(
                    repo_pth=self._repo_path,
                    branch_name=branch,
                    hashenv=self._env.hashenv,
                    refenv=self._env.refenv,
                    stageenv=self._env.stageenv,
                    branchenv=self._env.branchenv,
                    stagehashenv=self._env.stagehashenv)
                return co
            elif write is False:
                commit_hash = self._env.checkout_commit(
                    branch_name=branch, commit=commit)
                co = ReaderCheckout(
                    base_path=self._repo_path,
                    dataenv=self._env.cmtenv[commit_hash],
                    hashenv=self._env.hashenv,
                    branchenv=self._env.branchenv,
                    refenv=self._env.refenv,
                    commit=commit_hash)
                return co
            else:
                raise ValueError("Argument `write` only takes True or False as value")
        except (RuntimeError, ValueError) as e:
            raise e from None

    def clone(self, user_name: str, user_email: str, remote_address: str,
              *, remove_old: bool = False) -> str:
        """Download a remote repository to the local disk.

        The clone method implemented here is very similar to a `git clone`
        operation. This method will pull all commit records, history, and data
        which are parents of the remote's `master` branch head commit. If a
        :class:`Repository` exists at the specified directory,
        the operation will fail.

        Parameters
        ----------
        user_name : str
            Name of the person who will make commits to the repository. This
            information is recorded permanently in the commit records.
        user_email : str
            Email address of the repository user. This information is recorded
            permanently in any commits created.
        remote_address : str
            location where the
            :class:`hangar.remote.server.HangarServer` process is
            running and accessible by the clone user.
        remove_old : bool, optional, kwarg only
            DANGER! DEVELOPMENT USE ONLY! If enabled, a
            :class:`hangar.repository.Repository` existing on disk at the same
            path as the requested clone location will be completely removed and
            replaced with the newly cloned repo. (the default is False, which
            will not modify any contents on disk and which will refuse to create
            a repository at a given location if one already exists there.)

        Returns
        -------
        str
            Name of the master branch for the newly cloned repository.
        """
        self.init(user_name=user_name, user_email=user_email, remove_old=remove_old)
        self._remote.add(name='origin', address=remote_address)
        branch = self._remote.fetch(remote='origin', branch='master')
        HEAD = heads.get_branch_head_commit(self._env.branchenv, branch_name=branch)
        heads.set_branch_head_commit(self._env.branchenv, 'master', HEAD)
        with warnings.catch_warnings(record=False):
            warnings.simplefilter('ignore', category=UserWarning)
            co = self.checkout(write=True, branch='master')
            co.reset_staging_area()
            co.close()
        return 'master'

    def init(self,
             user_name: str,
             user_email: str,
             *,
             remove_old: bool = False) -> str:
        """Initialize a Hangar repository at the specified directory path.

        This function must be called before a checkout can be performed.

        Parameters
        ----------
        user_name : str
            Name of the repository user account.
        user_email : str
            Email address of the repository user account.
        remove_old : bool, kwarg-only
            DEVELOPER USE ONLY -- remove and reinitialize a Hangar
            repository at the given path, Default = False

        Returns
        -------
        str
            the full directory path where the Hangar repository was
            initialized on disk.
        """
        pth = self._env.init_repo(user_name=user_name,
                                  user_email=user_email,
                                  remove_old=remove_old)
        return str(pth)

    def log(self,
            branch: str = None,
            commit: str = None,
            *,
            return_contents: bool = False,
            show_time: bool = False,
            show_user: bool = False) -> Optional[dict]:
        """Displays a pretty printed commit log graph to the terminal.

        .. note::

            For programatic access, the return_contents value can be set to true
            which will retrieve relevant commit specifications as dictionary
            elements.

        Parameters
        ----------
        branch : str, optional
            The name of the branch to start the log process from. (Default value
            = None)
        commit : str, optional
            The commit hash to start the log process from. (Default value = None)
        return_contents : bool, optional, kwarg only
            If true, return the commit graph specifications in a dictionary
            suitable for programatic access/evaluation.
        show_time : bool, optional, kwarg only
            If true and return_contents is False, show the time of each commit
            on the printed log graph
        show_user : bool, optional, kwarg only
            If true and return_contents is False, show the committer of each
            commit on the printed log graph
        Returns
        -------
        Optional[dict]
            Dict containing the commit ancestor graph, and all specifications.
        """
        self.__verify_repo_initialized()
        res = summarize.log(branchenv=self._env.branchenv,
                            refenv=self._env.refenv,
                            branch=branch,
                            commit=commit,
                            return_contents=return_contents,
                            show_time=show_time,
                            show_user=show_user)
        return res

    def summary(self, *, branch: str = '', commit: str = '') -> None:
        """Print a summary of the repository contents to the terminal

        Parameters
        ----------
        branch : str, optional
            A specific branch name whose head commit will be used as the summary
            point (Default value = '')
        commit : str, optional
            A specific commit hash which should be used as the summary point.
            (Default value = '')
        """
        self.__verify_repo_initialized()
        try:
            ppbuf = summarize.summary(self._env, branch=branch, commit=commit)
        except ValueError:
            if commiting.number_commits_recorded(self._env.refenv) == 0:
                ppbuf = StringIO()
                ppbuf.write(f'No commits have been made in the repository. \n')
                ppbuf.write(f'Please make a commit and try again.')
            else:
                raise
        print(ppbuf.getvalue())
        return None

    def _details(self, *, line_limit=100, line_length=100) -> None:  # pragma: no cover
        """DEVELOPER USE ONLY: Dump some details about the underlying db structure to disk.
        """
        print(summarize.details(
            self._env.branchenv, line_limit=line_limit, line_length=line_length).getvalue())
        print(summarize.details(
            self._env.refenv, line_limit=line_limit, line_length=line_length).getvalue())
        print(summarize.details(
            self._env.hashenv, line_limit=line_limit, line_length=line_length).getvalue())
        print(summarize.details(
            self._env.stageenv, line_limit=line_limit, line_length=line_length).getvalue())
        print(summarize.details(
            self._env.stagehashenv, line_limit=line_limit, line_length=line_length).getvalue())
        for commit, commitenv in self._env.cmtenv.items():
            print(summarize.details(
                commitenv, line_limit=line_limit, line_length=line_length).getvalue())
        return

    def _ecosystem_details(self) -> dict:
        """DEVELOPER USER ONLY: log and return package versions on the system.
        """
        eco = ecosystem.get_versions()
        return eco

    def diff(self, master: str, dev: str) -> DiffAndConflicts:
        """Calculate diff between master and dev branch/commits.

        Diff is calculated as if we are to merge "dev" into "master"

        Parameters
        ----------
        master: str
            branch name or commit hash digest to use as the "master" which
            changes made in "dev" are compared to.
        dev: str
            branch name or commit hash digest to use as the "dev"
            (ie. "feature") branch which changes have been made to
            which are to be compared to the contents of "master".

        Returns
        -------
        DiffAndConflicts
            Standard output diff structure.
        """
        current_branches = self.list_branches()

        # assert branch / commit specified by "master" exists and
        # standardize into "digest" rather than "branch name" arg type
        if master in current_branches:
            masterHEAD = heads.get_branch_head_commit(
                branchenv=self._env.branchenv, branch_name=master)
        else:
            cmtExists = commiting.check_commit_hash_in_history(
                refenv=self._env.refenv, commit_hash=master)
            if not cmtExists:
                raise ValueError(f'`master` {master} is not valid branch/commit.')
            masterHEAD = master

        # same check & transform for "dev" branch/commit arg.
        if dev in current_branches:
            devHEAD = heads.get_branch_head_commit(
                branchenv=self._env.branchenv, branch_name=dev)
        else:
            cmtExists = commiting.check_commit_hash_in_history(
                refenv=self._env.refenv, commit_hash=dev)
            if not cmtExists:
                raise ValueError(f'`dev` {dev} is not valid branch/commit.')
            devHEAD = dev

        # create differ object and generate results...
        diff = ReaderUserDiff(commit_hash=masterHEAD,
                              branchenv=self._env.branchenv,
                              refenv=self._env.refenv)
        res = diff.commit(dev_commit_hash=devHEAD)
        return res

    def merge(self, message: str, master_branch: str, dev_branch: str) -> str:
        """Perform a merge of the changes made on two branches.

        Parameters
        ----------
        message: str
            Commit message to use for this merge.
        master_branch : str
            name of the master branch to merge into
        dev_branch : str
            name of the dev/feature branch to merge

        Returns
        -------
        str
            Hash of the commit which is written if possible.
        """
        self.__verify_repo_initialized()
        commit_hash = select_merge_algorithm(
            message=message,
            branchenv=self._env.branchenv,
            stageenv=self._env.stageenv,
            refenv=self._env.refenv,
            stagehashenv=self._env.stagehashenv,
            master_branch=master_branch,
            dev_branch=dev_branch,
            repo_path=self._repo_path)

        return commit_hash

    def create_branch(self, name: str, base_commit: str = None) -> heads.BranchHead:
        """create a branch with the provided name from a certain commit.

        If no base commit hash is specified, the current writer branch ``HEAD``
        commit is used as the ``base_commit`` hash for the branch. Note that
        creating a branch does not actually create a checkout object for
        interaction with the data. to interact you must use the repository
        checkout method to properly initialize a read (or write) enabled
        checkout object.

            >>> from hangar import Repository
            >>> repo = Repository('foo/path/to/dir')

            >>> repo.create_branch('testbranch')
                BranchHead(name='testbranch', digest='b66b...a8cc')
            >>> repo.list_branches()
                ['master', 'testbranch']
            >>> co = repo.checkout(write=True, branch='testbranch')
            >>> # add data ...
            >>> newDigest = co.commit('added some stuff')

            >>> repo.create_branch('new-changes', base_commit=newDigest)
                BranchHead(name='new-changes', digest='35kd...3254')
            >>> repo.list_branches()
                ['master', 'new-changes', 'testbranch']

        Parameters
        ----------
        name : str
            name to assign to the new branch
        base_commit : str, optional
            commit hash to start the branch root at. if not specified, the
            writer branch ``HEAD`` commit at the time of execution will be used,
            defaults to None

        Returns
        -------
        :class:`~.heads.BranchHead`
            NamedTuple[str, str] with fields for ``name`` and ``digest`` of the
            branch created (if the operation was successful)

        Raises
        ------
        ValueError
            If the branch name provided contains characters outside of alpha-numeric
            ascii characters and ".", "_", "-" (no whitespace), or is > 64 characters.
        ValueError
            If the branch already exists.
        RuntimeError
            If the repository does not have at-least one commit on the "default"
            (ie. ``master``) branch.
        """
        self.__verify_repo_initialized()
        if (not is_ascii(name)) or (not is_suitable_user_key(name)):
            err = ValueError(
                f'Branch name provided: {name} invalid. Must contain only alpha-numeric '
                f'or "." "_" "-" ascii characters. And be <= 64 Characters')
            raise err from None
        createdBranch = heads.create_branch(
            branchenv=self._env.branchenv,
            name=name,
            base_commit=base_commit)
        return createdBranch

    def remove_branch(self, name: str, *, force_delete: bool = False) -> heads.BranchHead:
        """Permanently delete a branch pointer from the repository history.

        Since a branch (by definition) is the name associated with the HEAD
        commit of a historical path, the default behavior of this method is to
        throw an exception (no-op) should the ``HEAD`` not be referenced as an
        ancestor (or at least as a twin) of a separate branch which is
        currently *ALIVE*. If referenced in another branch's history, we are
        assured that all changes have been merged and recorded, and that this
        pointer can be safely deleted without risk of damage to historical
        provenance or (eventual) loss to garbage collection.

            >>> from hangar import Repository
            >>> repo = Repository('foo/path/to/dir')

            >>> repo.create_branch('first-testbranch')
            BranchHead(name='first-testbranch', digest='9785...56da')
            >>> repo.create_branch('second-testbranch')
            BranchHead(name='second-testbranch', digest='9785...56da')
            >>> repo.list_branches()
            ['master', 'first-testbranch', 'second-testbranch']
            >>> # Make a commit to advance a branch
            >>> co = repo.checkout(write=True, branch='first-testbranch')
            >>> # add data ...
            >>> co.commit('added some stuff')
            '3l253la5hna3k3a553256nak35hq5q534kq35532'
            >>> co.close()

            >>> repo.remove_branch('second-testbranch')
            BranchHead(name='second-testbranch', digest='9785...56da')

        A user may manually specify to delete an un-merged branch, in which
        case the ``force_delete`` keyword-only argument should be set to
        ``True``.

            >>> # check out master and try to remove 'first-testbranch'
            >>> co = repo.checkout(write=True, branch='master')
            >>> co.close()

            >>> repo.remove_branch('first-testbranch')
            Traceback (most recent call last):
                ...
            RuntimeError: ("The branch first-testbranch is not fully merged. "
            "If you are sure you want to delete it, re-run with "
            "force-remove parameter set.")
            >>> # Now set the `force_delete` parameter
            >>> repo.remove_branch('first-testbranch', force_delete=True)
            BranchHead(name='first-testbranch', digest='9785...56da')

        It is important to note that *while this method will handle all safety
        checks, argument validation, and performs the operation to permanently
        delete a branch name/digest pointer, **no commit refs along the history
        will be deleted from the Hangar database**.* Most of the history contains
        commit refs which must be safe in other branch histories, and recent
        commits may have been used as the base for some new history. As such, even
        if some of the latest commits leading up to a deleted branch ``HEAD`` are
        orphaned (unreachable), the records (and all data added in those commits)
        will remain on the disk.

        In the future, we intend to implement a garbage collector which will remove
        orphan commits which have not been modified for some set amount of time
        (probably on the order of a few months), but this is not implemented at the
        moment.

        Should an accidental forced branch deletion occur, *it is possible to
        recover* and create a new branch head pointing to the same commit. If
        the commit digest of the removed branch ``HEAD`` is known, its as simple as
        specifying a name and the ``base_digest`` in the normal
        :meth:`create_branch` method. If the digest is unknown, it will be a
        bit more work, but some of the developer facing introspection tools /
        routines could be used to either manually or (with minimal effort)
        programmatically find the orphan commit candidates. If you find
        yourself having accidentally deleted a branch, and must get it back,
        please reach out on the `Github Issues
        `__ page. We'll gladly
        explain more in depth and walk you through the process in any way we
        can help!

        Parameters
        ----------
        name : str
            name of the branch which should be deleted. This branch must exist, and
            cannot refer to a remote tracked branch (ie. origin/devbranch), please
            see exception descriptions for other parameters determining validity of
            argument
        force_delete : bool, optional
            If True, remove the branch pointer even if the changes are un-merged in
            other branch histories. May result in orphaned commits which may be
            time-consuming to recover if needed, by default False

        Returns
        -------
        :class:`~.heads.BranchHead`
            NamedTuple[str, str] with fields for `name` and `digest` of the branch
            pointer deleted.

        Raises
        ------
        ValueError
            If a branch with the provided name does not exist locally
        PermissionError
            If removal of the branch would result in a repository with zero local
            branches.
        PermissionError
            If a write enabled checkout is holding the writer-lock at time of this
            call.
        PermissionError
            If the branch to be removed was the last used in a write-enabled
            checkout, and whose contents form the base of the staging area.
        RuntimeError
            If the branch has not been fully merged into other branch histories,
            and ``force_delete`` option is not ``True``.
        """
        self.__verify_repo_initialized()
        res = heads.remove_branch(branchenv=self._env.branchenv,
                                  refenv=self._env.refenv,
                                  name=name,
                                  force_delete=force_delete)
        return res

    def list_branches(self) -> List[str]:
        """list all branch names created in the repository.

        Returns
        -------
        List[str]
            the branch names recorded in the repository
        """
        self.__verify_repo_initialized()
        branches = heads.get_branch_names(self._env.branchenv)
        return branches

    def verify_repo_integrity(self) -> bool:
        """Verify the integrity of the repository data on disk.

        Runs a full cryptographic verification of repository contents in order
        to ensure the integrity of all data and history recorded on disk.

        .. note::

            This proof may take a significant amount of time to run for
            repositories which:

            1. store significant quantities of data on disk.
            2. have a very large number of commits in their history.

            As a brief explanation for why these are the driving factors behind
            processing time:

            1. Every single piece of data in the repositories history must be read
               from disk, cryptographically hashed, and compared to the expected
               value. There is no exception to this rule; regardless of when a piece
               of data was added / removed from an column, or for how many (or how
               few) commits some sample exists in. The integrity of the commit tree at
               any point after some piece of data is added to the repo can only be
               validated if it - and all earlier data pieces - are proven to be intact
               and unchanged.

               Note: This does not mean that the verification is repeatedly
               performed for every commit some piece of data is stored in. Each
               data piece is read from disk and verified only once, regardless of
               how many commits some piece of data is referenced in.

            2. Each commit reference (defining names / contents of a commit) must be
               decompressed and parsed into a usable data structure. We scan across
               all data digests referenced in the commit and ensure that the
               corresponding data piece is known to hangar (and validated as
               unchanged). The commit refs (along with the corresponding user records,
               message, and parent map), are then re-serialized and cryptographically
               hashed for comparison to the expected value. While this process is
               fairly efficient for a single commit, it must be repeated for each
               commit in the repository history, and may take a non-trivial amount of
               time for repositories with thousands of commits.

        While the two points above are the most time consuming operations,
        there are many more checks which are performed alongside them as part
        of the full verification run.

        Returns
        -------
        bool
            True if integrity verification is successful, otherwise False; in
            this case, a message describing the offending component will be
            printed to stdout.
        """
        self.__verify_repo_initialized()
        heads.acquire_writer_lock(self._env.branchenv, 'VERIFY_PROCESS')
        try:
            integrity.run_verification(
                branchenv=self._env.branchenv,
                hashenv=self._env.hashenv,
                refenv=self._env.refenv,
                repo_path=self._env.repo_path)
        finally:
            heads.release_writer_lock(self._env.branchenv, 'VERIFY_PROCESS')
        return True

    def force_release_writer_lock(self) -> bool:
        """Force release the lock left behind by an unclosed writer-checkout

        .. warning::

            *NEVER USE THIS METHOD IF WRITER PROCESS IS CURRENTLY ACTIVE.* At the time
            of writing, the implications of improper/malicious use of this are not
            understood, and there is a a risk of of undefined behavior or (potentially)
            data corruption.

            At the moment, the responsibility to close a write-enabled checkout is
            placed entirely on the user. If the `close()` method is not called
            before the program terminates, a new checkout with write=True will fail.
            The lock can only be released via a call to this method.

        .. note::

            This entire mechanism is subject to review/replacement in the future.

        Returns
        -------
        bool
            if the operation was successful.
        """
        self.__verify_repo_initialized()
        forceReleaseSentinal = parsing.repo_writer_lock_force_release_sentinal()
        success = heads.release_writer_lock(self._env.branchenv, forceReleaseSentinal)
        return success


================================================
FILE: src/hangar/txnctx.py
================================================
from collections import Counter
from typing import MutableMapping

import lmdb


class TxnRegisterSingleton(type):
    _instances = {}
    def __call__(cls, *args, **kwargs):
        if cls not in cls._instances:
            cls._instances[cls] = super(TxnRegisterSingleton, cls).__call__(*args, **kwargs)
        return cls._instances[cls]


class TxnRegister(metaclass=TxnRegisterSingleton):
    """Singleton to manage transaction thread safety in lmdb databases.

    This is essentailly a reference counting transaction register, lots of room
    for improvement here.
    """

    def __init__(self):
        self.WriterAncestors = Counter()
        self.ReaderAncestors = Counter()
        self.WriterTxn: MutableMapping[lmdb.Environment, lmdb.Transaction] = {}
        self.ReaderTxn: MutableMapping[lmdb.Environment, lmdb.Transaction] = {}

    @property
    def _debug_(self):  # pragma: no cover
        return {
            '__class__': self.__class__,
            'WriterAncestors': self.WriterAncestors,
            'ReaderAncestors': self.ReaderAncestors,
            'WriterTxn': self.WriterTxn,
            'ReaderTxn': self.ReaderTxn,
        }

    def begin_writer_txn(self, lmdbenv: lmdb.Environment,
                         buffer: bool = False) -> lmdb.Transaction:
        """Start a write enabled transaction on the given environment

        If multiple write transactions are requested for the same handle, only
        one instance of the transaction handle will be returened, and will not
        close until all operations on that handle have requested to close

        Parameters
        ----------
        lmdbenv : lmdb.Environment
            the environment to open the transaction on
        buffer : bool, optional
            if buffer objects should be used (the default is False, which does
            not use buffers)

        Returns
        -------
        lmdb.Transaction
            transaction handle to perform operations on
        """
        if self.WriterAncestors[lmdbenv] == 0:
            self.WriterTxn[lmdbenv] = lmdbenv.begin(write=True, buffers=buffer)
        self.WriterAncestors[lmdbenv] += 1
        return self.WriterTxn[lmdbenv]

    def begin_reader_txn(self, lmdbenv: lmdb.Environment,
                         buffer: bool = False) -> lmdb.Transaction:
        """Start a reader only txn for the given environment

        If there a read-only transaction for the same environment already exists
        then the same reader txn handle will be returned, and will not close
        until all operations on that handle have said they are finished.

        Parameters
        ----------
        lmdbenv : lmdb.Environment
            the environment to start the transaction in.
        buffer : bool, optional
            weather a buffer transaction should be used (the default is False,
            which means no buffers are returned)

        Returns
        -------
        lmdb.Transaction
            handle to the lmdb transaction.
        """
        if self.ReaderAncestors[lmdbenv] == 0:
            self.ReaderTxn[lmdbenv] = lmdbenv.begin(write=False, buffers=buffer)
        self.ReaderAncestors[lmdbenv] += 1
        return self.ReaderTxn[lmdbenv]

    def commit_writer_txn(self, lmdbenv: lmdb.Environment) -> bool:
        """Commit changes made in a write-enable transaction handle

        As multiple objects can have references to the same open transaction handle,
        the data is not actually committed until all open transactions have called
        the commit method.

        Parameters
        ----------
        lmdbenv : lmdb.Environment
            the environment handle used to open the transaction

        Raises
        ------
        RuntimeError
            If the internal reference counting gets out of sync

        Returns
        -------
        bool
            True if this operation actually committed, otherwise false
            if other objects have references to the same (open) handle
        """
        ancestors = self.WriterAncestors[lmdbenv]
        if ancestors == 0:
            msg = f'hash ancestors are zero but commit called on {lmdbenv}'
            raise RuntimeError(msg)
        elif ancestors == 1:
            self.WriterTxn[lmdbenv].commit()
            self.WriterTxn.__delitem__(lmdbenv)
            ret = True
        else:
            ret = False
        self.WriterAncestors[lmdbenv] -= 1
        return ret

    def abort_reader_txn(self, lmdbenv: lmdb.Environment) -> bool:
        """Request to close a read-only transaction handle

        As multiple objects can have references to the same open transaction
        handle, the transaction is not actuall aborted until all open transactions
        have called the abort method


        Parameters
        ----------
        lmdbenv : lmdb.Environment
            the environment handle used to open the transaction

        Raises
        ------
        RuntimeError
            If the internal reference counting gets out of sync.

        Returns
        -------
        bool
            True if this operation actually aborted the transaction,
            otherwise False if other objects have references to the same (open)
            handle.
        """
        ancestors = self.ReaderAncestors[lmdbenv]
        if ancestors == 0:
            raise RuntimeError(f'hash ancestors are zero but abort called')
        elif ancestors == 1:
            self.ReaderTxn[lmdbenv].abort()
            self.ReaderTxn.__delitem__(lmdbenv)
            ret = True
        else:
            ret = False
        self.ReaderAncestors[lmdbenv] -= 1
        return ret


================================================
FILE: src/hangar/typesystem/__init__.py
================================================
from .descriptors import (
    Descriptor, OneOf, DictItems, EmptyDict, SizedIntegerTuple, checkedmeta
)
from .ndarray import NdarrayVariableShape, NdarrayFixedShape
from .pystring import StringVariableShape
from .pybytes import BytesVariableShape

__all__ = [
    'Descriptor', 'OneOf', 'DictItems', 'EmptyDict', 'SizedIntegerTuple',
    'checkedmeta', 'NdarrayVariableShape', 'NdarrayFixedShape',
    'StringVariableShape', 'BytesVariableShape'
]


================================================
FILE: src/hangar/typesystem/base.py
================================================
from .descriptors import OneOf, String, checkedmeta
from ..records import hash_func_from_tcode


@OneOf(['flat', 'nested'])
class ColumnLayout(String):
    pass


@OneOf(['str', 'ndarray', 'bytes'])
class ColumnDType(String):
    pass


@OneOf(['1'])
class SchemaHasherTcode(String):
    pass


class ColumnBase(metaclass=checkedmeta):
    _column_layout = ColumnLayout()
    _column_type = ColumnDType()
    _schema_hasher_tcode = SchemaHasherTcode()

    def __init__(
            self,
            column_layout,
            column_type,
            data_hasher_tcode,
            schema_hasher_tcode=None,
            *args, **kwargs
    ):
        if schema_hasher_tcode is None:
            schema_hasher_tcode = '1'

        self._column_layout = column_layout
        self._column_type = column_type
        self._schema_hasher_tcode = schema_hasher_tcode
        self._data_hasher_tcode = data_hasher_tcode
        self._schema_attributes = [
            '_column_layout',
            '_column_type',
            '_schema_hasher_tcode',
            '_data_hasher_tcode',
        ]
        self._schema_hasher_func = hash_func_from_tcode(self._schema_hasher_tcode)
        self._data_hasher_func = hash_func_from_tcode(self._data_hasher_tcode)
        self._hidden_be_opts = None

    @property
    def _beopts(self):
        from ..backends import BACKEND_OPTIONS_MAP
        if self._hidden_be_opts is None:
            self._hidden_be_opts = BACKEND_OPTIONS_MAP[self.backend](
                backend_options=self.backend_options,
                dtype=self.dtype,
                shape=(self.shape if hasattr(self, '_shape') else None))
        return self._hidden_be_opts

    @_beopts.deleter
    def _beopts(self):
        self._hidden_be_opts = None

    @_beopts.setter
    def _beopts(self, backend_options):
        from ..backends import BACKEND_OPTIONS_MAP
        self._hidden_be_opts = BACKEND_OPTIONS_MAP[self.backend](
            backend_options=backend_options,
            dtype=self.dtype,
            shape=(self.shape if hasattr(self, '_shape') else None))

    @property
    def column_layout(self):
        return self._column_layout

    @property
    def column_type(self):
        return self._column_type

    @property
    def schema_hasher_tcode(self):
        return self._schema_hasher_tcode

    @property
    def schema(self):
        schema_dict = {}
        public_attr_names = [attr.lstrip('_') for attr in self._schema_attributes]
        for attr in public_attr_names:
            schema_dict[attr] = getattr(self, f'_{attr}')
        return schema_dict

    def schema_hash_digest(self):
        return self._schema_hasher_func(self.schema)

    def backend_from_heuristics(self, *args, **kwargs):
        raise NotImplementedError

    def verify_data_compatible(self, *args, **kwargs):
        raise NotImplementedError

    @property
    def data_hasher_tcode(self):
        return self._data_hasher_tcode

    def data_hash_digest(self, *args, **kwargs):
        raise NotImplementedError



================================================
FILE: src/hangar/typesystem/descriptors.py
================================================
"""
Portions of this code have been taken and modified from the book:

Beazley, D. and B. K. Jones (2013). Python Cookbook, O’Reilly Media, Inc.

Chapter: 8.13. Implementing a Data Model or Type System

===============================================================================

Problem
-------

You want to define various kinds of data structures, but want to enforce
constraints on the values that are allowed to be assigned to certain
attributes.

Solution
--------

In this problem, you are basically faced with the task of placing checks or
assertions on the setting of certain instance attributes. To do this, you need
to customize the setting of attributes on a per-attribute basis. To do this,
you should use descriptors.

This recipe involves a number of advanced techniques, including descriptors,
mixin classes, the use of super(), class decorators, and metaclasses. Covering
the basics of all those topics is beyond what can be covered here; However,
there are a number of subtle points worth noting.

First, in the Descriptor base class, you will notice that there is a __set__()
method, but no corresponding __get__(). If a descriptor will do nothing more
than extract an identically named value from the underlying instance
dictionary, defining __get__() is unnecessary. In fact, defining __get__() will
just make it run slower. Thus, this recipe only focuses on the implementation
of __set__().

The overall design of the various descriptor classes is based on mixin classes.
For example, the Unsigned and MaxSized classes are meant to be mixed with the
other descriptor classes derived from Typed. To handle a specific kind of data
type, multiple inheritance is used to combine the desired functionality.

You will also notice that all __init__() methods of the various descriptors
have been programmed to have an identical signature involving keyword arguments
**opts. The class for MaxSized looks for its required attribute in opts, but
simply passes it along to the Descriptor base class, which actually sets it.
One tricky part about composing classes like this (especially mixins), is that
you don’t always know how the classes are going to be chained together or what
super() will invoke. For this reason, you need to make it work with any
possible combination of classes.

The definitions of the various type classes such as Integer, Float, and String
illustrate a useful technique of using class variables to customize an
implementation. The Ty ped descriptor merely looks for an expected_type
attribute that is provided by each of those subclasses.

The use of a class decorator or metaclass is often useful for simplifying the
specification by the user.

The code for the class decorator and metaclass simply scan the class dictionary
looking for descriptors. When found, they simply fill in the descriptor name
based on the key value.

As a final twist, a class decorator approach can also be used as a replacement
for mixin classes, multiple inheritance, and tricky use of the super() function

The classes defined in this alternative formulation work in exactly the same
manner as before (none of the earlier example code changes) except that
everything runs much faster. For example, a simple timing test of setting a
typed attribute reveals that the class decorator approach runs almost 100%
faster than the approach using mixins.
"""
from typing import Sequence


class Descriptor:
    # Base class. Uses a descriptor to set a value
    def __init__(self, name=None, **opts):
        self.name = name
        self.__dict__.update(opts)

    def __set__(self, instance, value):
        instance.__dict__[self.name] = value


def Typed(expected_type, cls=None):
    # Decorator for applying type checking
    if cls is None:
        return lambda cls: Typed(expected_type, cls)

    super_set = cls.__set__

    def __set__(self, instance, value):
        if not isinstance(value, expected_type):
            raise TypeError('expected ' + str(expected_type))
        super_set(self, instance, value)

    cls.__set__ = __set__
    return cls


def TypedSequence(expected_element_types, cls=None):
    # Decorator enforcing that all elements in an sequence are specific type(s).
    # using the python ABC definition of "Sequence" (list, tuple)
    # https://docs.python.org/3/library/collections.abc.html#collections.abc.Sequence
    if cls is None:
        return lambda cls: TypedSequence(expected_element_types, cls)

    super_set = cls.__set__
    def __set__(self, instance, value):
        if not isinstance(value, Sequence):
            raise TypeError(f'input is not Sequence type, recieved {type(value)}')
        elif not all([isinstance(el, expected_element_types) for el in value]):
            raise TypeError(f'not all elements are {expected_element_types} type(s) in {value}')
        super_set(self, instance, value)
    cls.__set__ = __set__
    return cls


def OneOf(expected_values, cls=None):
    # Decorator for enforcing values
    if cls is None:
        return lambda cls: OneOf(expected_values, cls)

    super_set = cls.__set__
    def __set__(self, instance, value):
        if value not in expected_values:
            raise ValueError(f'expected one of {expected_values} recieved {value}')
        super_set(self, instance, value)
    cls.__set__ = __set__
    return cls


def MaxSized(cls):
    # Decorator for allowing sized values
    super_init = cls.__init__
    def __init__(self, name=None, **opts):
        if 'size' not in opts:
            raise TypeError('missing size option')
        self.size = opts['size']
        super_init(self, name, **opts)
    cls.__init__ = __init__
    super_set = cls.__set__
    def __set__(self, instance, value):
        if len(value) > self.size:
            raise ValueError('size must be < ' + str(self.size))
        super_set(self, instance, value)
    cls.__set__ = __set__
    return cls


def DictItems(expected_keys_required, expected_values, cls=None):
    # check a dictionary for the existence of keys. expected_keys should be a dictionary of keys,
    # with bool values set to indicate if they are required or not. expected_values should be
    # mapping of same keys to list of acceptable values.
    if cls is None:
        return lambda cls: DictItems(expected_keys_required, expected_values, cls)

    super_set = cls.__set__
    def __set__(self, instance, value):
        if not isinstance(value, dict):
            raise TypeError(f'expected {dict}, recieved {type(value)}')
        for expected_key, required in expected_keys_required.items():
            try:
                if value[expected_key] not in expected_values[expected_key]:
                    raise ValueError(f'{value[expected_key]} invalid for key {expected_key}')
            except KeyError as e:
                if required:
                    raise e
        for recieved_key in value.keys():
            if recieved_key not in expected_keys_required:
                raise TypeError(f'Not supposed to have key {recieved_key}')
        super_set(self, instance, value)
    cls.__set__ = __set__
    return cls


@Typed(str)
class String(Descriptor):
    pass


@DictItems(expected_keys_required={},
           expected_values={},)
class EmptyDict(Descriptor):
    pass


@Typed((dict, type(None)))
class OptionalDict(Descriptor):
    pass


@Typed((str, type(None)))
class OptionalString(Descriptor):
    pass


@Typed(tuple)
class Tuple(Descriptor):
    pass


@MaxSized
@TypedSequence(int)
class SizedIntegerTuple(Tuple):
    pass


class checkedmeta(type):
    # A metaclass that applies checking
    def __new__(cls, clsname, bases, methods):
        # Attach attribute names to the descriptors
        for key, value in methods.items():
            if isinstance(value, Descriptor):
                value.name = key
        return type.__new__(cls, clsname, bases, methods)


================================================
FILE: src/hangar/typesystem/ndarray.py
================================================
import numpy as np

from .base import ColumnBase
from .descriptors import OneOf, String, OptionalString, SizedIntegerTuple, OptionalDict
from ..records import CompatibleData


@OneOf(['variable_shape', 'fixed_shape'])
class NdarraySchemaType(String):
    pass


@OneOf(['ndarray'])
class NdarrayColumnType(String):
    pass


@OneOf(['0'])
class DataHasherTcode(String):
    pass


class NdarraySchemaBase(ColumnBase):
    _schema_type = NdarraySchemaType()
    _column_type = NdarrayColumnType()
    _data_hasher_tcode = DataHasherTcode()

    def __init__(
            self,
            shape,
            dtype,
            backend=None,
            backend_options=None,
            *args, **kwargs
    ):
        if 'data_hasher_tcode' not in kwargs:
            kwargs['data_hasher_tcode'] = '0'
        super().__init__(*args, **kwargs)

        if backend_options is not None and backend is None:
            raise ValueError(
                '`backend_options` cannot be set if `backend` is not also provided.')

        if not isinstance(dtype, str):
            dtype = np.dtype(dtype).name
        self._dtype = dtype
        self._shape = shape
        self._backend = backend
        self._backend_options = backend_options
        self._schema_attributes.extend(
            ['_schema_type', '_shape', '_dtype', '_backend', '_backend_options'])

    def backend_from_heuristics(self):
        # uncompressed numpy memmap data is most appropriate for data whose shape is
        # likely small tabular row data (CSV or such...)
        if (len(self._shape) == 1) and (self._shape[0] < 400):
            backend = '10'
        # hdf5 is the default backend for larger array sizes.
        elif (len(self._shape) == 1) and (self._shape[0] <= 10_000_000):
            backend = '00'
        # on fixed arrays sized arrays apply optimizations.
        elif self._schema_type == 'fixed_shape':
            backend = '01'
        else:
            backend = '00'
        self._backend = backend

    @property
    def schema_type(self):
        return self._schema_type

    @property
    def shape(self):
        return self._shape

    @property
    def dtype(self):
        return np.dtype(self._dtype)

    @property
    def backend(self):
        return self._backend

    @property
    def backend_options(self):
        return self._backend_options

    def data_hash_digest(self, data: np.ndarray) -> str:
        return self._data_hasher_func(data)

    def change_backend(self, backend, backend_options=None):
        old_backend = self._backend
        old_backend_options = self._backend_options
        try:
            del self._beopts
            self._backend = backend
            self._beopts = backend_options
            self._backend_options = self._beopts.backend_options
        except (TypeError, ValueError) as e:
            del self._beopts
            self._backend = old_backend
            self._beopts = old_backend_options
            self._backend_options = self._beopts.backend_options
            raise e from None

    def data_nbytes(self, obj: np.ndarray):
        return obj.nbytes


@OneOf(['00', '01', '10', '50', None])
class NdarrayFixedShapeBackends(OptionalString):
    pass


@OneOf(['fixed_shape'])
class FixedShapeSchemaType(String):
    pass


class NdarrayFixedShape(NdarraySchemaBase):
    _shape = SizedIntegerTuple(size=31)
    _dtype = String()
    _backend = NdarrayFixedShapeBackends()
    _backend_options = OptionalDict()
    _schema_type = FixedShapeSchemaType()

    def __init__(self, *args, **kwargs):
        if 'column_type' in kwargs:
            super().__init__(*args, **kwargs)
        else:
            super().__init__(column_type='ndarray', *args, **kwargs)

        if 'schema_type' in kwargs:
            self._schema_type = kwargs['schema_type']
        else:
            self._schema_type = 'fixed_shape'

        if self.backend is None:
            self.backend_from_heuristics()
        self._backend_options = self._beopts.backend_options

    def verify_data_compatible(self, data):
        compatible = True
        reason = ''

        if not isinstance(data, np.ndarray):
            compatible = False
            reason = f'`data` argument type: {type(data)} != `np.ndarray`'
        elif data.dtype != self._dtype:
            compatible = False
            reason = f'dtype: {data.dtype.name} != aset: {self._dtype}.'
        elif not data.flags.c_contiguous:
            compatible = False
            reason = f'`data` must be "C" contiguous array.'
        elif data.shape != self._shape:
            compatible = False
            reason = f'data shape {data.shape} != fixed schema {self._shape}'

        res = CompatibleData(compatible, reason)
        return res


@OneOf(['00', '10', '50', None])
class NdarrayVariableShapeBackends(OptionalString):
    pass


@OneOf(['variable_shape'])
class VariableShapeSchemaType(String):
    pass


class NdarrayVariableShape(NdarraySchemaBase):
    _shape = SizedIntegerTuple(size=31)
    _dtype = String()
    _backend = NdarrayVariableShapeBackends()
    _backend_options = OptionalDict()
    _schema_type = VariableShapeSchemaType()

    def __init__(self, *args, **kwargs):
        if 'column_type' in kwargs:
            super().__init__(*args, **kwargs)
        else:
            super().__init__(column_type='ndarray', *args, **kwargs)

        if 'schema_type' in kwargs:
            self._schema_type = kwargs['schema_type']
        else:
            self._schema_type = 'variable_shape'

        if self.backend is None:
            self.backend_from_heuristics()
        self._backend_options = self._beopts.backend_options

    def verify_data_compatible(self, data):
        compatible = True
        reason = ''

        if not isinstance(data, np.ndarray):
            compatible = False
            reason = f'`data` argument type: {type(data)} != `np.ndarray`'
        elif data.dtype != self._dtype:
            compatible = False
            reason = f'dtype: {data.dtype.name} != aset: {self._dtype}.'
        elif not data.flags.c_contiguous:
            compatible = False
            reason = f'`data` must be "C" contiguous array.'
        elif data.ndim != len(self._shape):
            compatible = False
            reason = f'data rank {data.ndim} != aset rank {len(self._shape)}'
        elif not all([(dim <= maxdim) for dim, maxdim in zip(data.shape, self._shape)]):
            compatible = False
            reason = f'shape {data.shape} exceeds schema max {self._shape}'

        res = CompatibleData(compatible, reason)
        return res


================================================
FILE: src/hangar/typesystem/pybytes.py
================================================
from .base import ColumnBase
from .descriptors import OneOf, Descriptor, String, OptionalString, OptionalDict
from ..records import CompatibleData
from ..utils import format_bytes


@OneOf([''])
class BytesDType(Descriptor):
    pass


SERIAL_DTYPE_TO_OBJ = {
    '': bytes,
}


@OneOf(['variable_shape'])
class BytesSchemaType(String):
    pass


@OneOf(['bytes'])
class BytesColumnType(String):
    pass


@OneOf(['3'])
class DataHasherTcode(String):
    pass


class BytesSchemaBase(ColumnBase):
    _schema_type = BytesSchemaType()
    _column_type = BytesColumnType()
    _data_hasher_tcode = DataHasherTcode()

    def __init__(
            self,
            dtype,
            backend=None,
            backend_options=None,
            *args, **kwargs
    ):
        if 'data_hasher_tcode' not in kwargs:
            kwargs['data_hasher_tcode'] = '3'
        super().__init__(*args, **kwargs)

        if backend_options is not None and backend is None:
            raise ValueError(
                '`backend_options` cannot be set if `backend` is not also provided.')

        if not isinstance(dtype, str):
            dtype = repr(dtype).replace(' ', '')

        self._dtype = dtype
        self._backend = backend
        self._backend_options = backend_options
        self._schema_attributes.extend(
            ['_schema_type', '_dtype', '_backend', '_backend_options']
        )

    def backend_from_heuristics(self):
        self._backend = '31'

    @property
    def schema_type(self):
        return self._schema_type

    @property
    def dtype(self):
        return SERIAL_DTYPE_TO_OBJ[self._dtype]

    @property
    def backend(self):
        return self._backend

    @property
    def backend_options(self):
        return self._backend_options

    def data_hash_digest(self, data: str) -> str:
        return self._data_hasher_func(data)

    def change_backend(self, backend, backend_options=None):
        old_backend = self._backend
        old_backend_options = self._backend_options
        try:
            self._backend = backend
            self._backend_options = backend_options
            # del and reset beopts object to reverify input correctness.
            del self._beopts
            self._backend_options = self._beopts.backend_options
        except (TypeError, ValueError) as e:
            del self._beopts
            self._backend = old_backend
            self._backend_options = old_backend_options
            self._backend_options = self._beopts.backend_options
            raise e from None


@OneOf(['31', '50', None])
class BytesVariableShapeBackends(OptionalString):
    pass


@OneOf(['variable_shape'])
class VariableShapeSchemaType(String):
    pass


class BytesVariableShape(BytesSchemaBase):
    _dtype = BytesDType()
    _backend = BytesVariableShapeBackends()
    _backend_options = OptionalDict()
    _schema_type = VariableShapeSchemaType()

    def __init__(self, *args, **kwargs):
        if 'column_type' in kwargs:
            super().__init__(*args, **kwargs)
        else:
            super().__init__(column_type='bytes', *args, **kwargs)

        if 'schema_type' in kwargs:
            self._schema_type = kwargs['schema_type']
        else:
            self._schema_type = 'variable_shape'

        if self.backend is None:
            self.backend_from_heuristics()
        self._backend_options = self._beopts.backend_options

    def verify_data_compatible(self, data):
        compatible = True
        reason = ''
        if not isinstance(data, bytes):
            compatible = False
            reason = f'data {data} not valid, must be of type {bytes} not{type(data)}'
        elif len(data) > 2000000:  # 2MB
            compatible = False
            reason = f'bytes must be less than 2MB in size, recieved {format_bytes(len(data))}'

        res = CompatibleData(compatible, reason)
        return res


================================================
FILE: src/hangar/typesystem/pystring.py
================================================
from .base import ColumnBase
from .descriptors import OneOf, Descriptor, String, OptionalString, OptionalDict
from ..records import CompatibleData
from ..utils import format_bytes


@OneOf([''])
class StringDType(Descriptor):
    pass


SERIAL_DTYPE_TO_OBJ = {
    '': str,
}


@OneOf(['variable_shape'])
class StringSchemaType(String):
    pass


@OneOf(['str'])
class StrColumnType(String):
    pass


@OneOf(['2'])
class DataHasherTcode(String):
    pass


class StringSchemaBase(ColumnBase):
    _schema_type = StringSchemaType()
    _column_type = StrColumnType()
    _data_hasher_tcode = DataHasherTcode()

    def __init__(
            self,
            dtype,
            backend=None,
            backend_options=None,
            *args, **kwargs
    ):
        if 'data_hasher_tcode' not in kwargs:
            kwargs['data_hasher_tcode'] = '2'
        super().__init__(*args, **kwargs)

        if backend_options is not None and backend is None:
            raise ValueError(
                '`backend_options` cannot be set if `backend` is not also provided.')

        if not isinstance(dtype, str):
            dtype = repr(dtype).replace(' ', '')

        self._dtype = dtype
        self._backend = backend
        self._backend_options = backend_options
        self._schema_attributes.extend(
            ['_schema_type', '_dtype', '_backend', '_backend_options']
        )

    def backend_from_heuristics(self):
        self._backend = '30'

    @property
    def schema_type(self):
        return self._schema_type

    @property
    def dtype(self):
        return SERIAL_DTYPE_TO_OBJ[self._dtype]

    @property
    def backend(self):
        return self._backend

    @property
    def backend_options(self):
        return self._backend_options

    def data_hash_digest(self, data: str) -> str:
        return self._data_hasher_func(data)

    def change_backend(self, backend, backend_options=None):
        old_backend = self._backend
        old_backend_options = self._backend_options
        try:
            self._backend = backend
            self._backend_options = backend_options
            # del and reset beopts object to reverify input correctness.
            del self._beopts
            self._backend_options = self._beopts.backend_options
        except (TypeError, ValueError) as e:
            del self._beopts
            self._backend = old_backend
            self._backend_options = old_backend_options
            self._backend_options = self._beopts.backend_options
            raise e from None

    def data_nbytes(self, obj: str):
        return len(obj.encode())


@OneOf(['30', '50', None])
class StringVariableShapeBackends(OptionalString):
    pass


@OneOf(['variable_shape'])
class VariableShapeSchemaType(String):
    pass


class StringVariableShape(StringSchemaBase):
    _dtype = StringDType()
    _backend = StringVariableShapeBackends()
    _backend_options = OptionalDict()
    _schema_type = VariableShapeSchemaType()

    def __init__(self, *args, **kwargs):
        if 'column_type' in kwargs:
            super().__init__(*args, **kwargs)
        else:
            super().__init__(column_type='str', *args, **kwargs)

        if 'schema_type' in kwargs:
            self._schema_type = kwargs['schema_type']
        else:
            self._schema_type = 'variable_shape'

        if self.backend is None:
            self.backend_from_heuristics()
        self._backend_options = self._beopts.backend_options

    def verify_data_compatible(self, data):
        compatible = True
        reason = ''

        if not isinstance(data, str):
            compatible = False
            reason = f'data {data} must be {str} type, not {type(data)}'
        elif len(data.encode()) > 2000000:  # 2MB
            compatible = False
            reason = f'str bytes must be less than 2MB in size, recieved {format_bytes(len(data))}'

        res = CompatibleData(compatible, reason)
        return res


================================================
FILE: src/hangar/utils.py
================================================
import os
import re
import secrets
import string
import sys
import time
from collections import deque
from io import StringIO
from pathlib import Path
from itertools import tee, filterfalse, count, zip_longest
from typing import Union

import blosc


NumType = Union[int, float]


def bound(low: NumType, high: NumType, value: NumType) -> NumType:
    """Bound value such that ``low <= value <= high``

    >>> bound(0, 100, 10)
    10
    >>> bound(0, 100, -1)
    -1
    >>> bound(0, 100, 500)
    100
    >>> bound(-5, -2, -3)
    -3
    >>> bound(-6.0, -5.0, 0.1)
    -5.0
    >>> bound(0.0, 5, 3.5)
    3.5
    """
    return max(low, min(high, value))


def calc_num_threadpool_workers() -> int:
    nCores = os.cpu_count()  # includes hyperthreads
    return bound(2, 10, nCores * 2)


def is_64bits():
    """bool indicating if running on atleast a 64 bit machine
    """
    return sys.maxsize > 2 ** 32


def set_blosc_nthreads() -> int:  # pragma: no cover
    """set the blosc library to two less than the core count on the system.

    If less than 2 cores are ncores-2, we set the value to two.

    Returns
    -------
    int
        ncores blosc will use on the system
    """
    nCores = blosc.detect_number_of_cores()
    if nCores == 1:
        nUsed = 1
    elif nCores == 2:
        nUsed = 2
    elif nCores <= 4:
        nUsed = nCores - 1
    else:
        nUsed = nCores - 2
    blosc.set_nthreads(nUsed)
    return nUsed


def random_string(
    n: int = 8,
    *, _ALPHABET=''.join([string.ascii_lowercase, string.digits])
) -> str:
    """Generate a random string of lowercase ascii letters and digits.

    Parameters
    ----------
    n: int, optional
        The number of characters which the output string will have. Default=8
    """
    token = [secrets.choice(_ALPHABET) for i in range(n)]
    return ''.join(token)


_SuitableCharRE = re.compile(r'[\w\.\-\_]+\Z', flags=re.ASCII)


def is_suitable_user_key(key: Union[str, int]) -> bool:
    """Checks if only alpha-numeric ascii chars or ['.', '-' '_'] (no whitespace)

    Necessary because python 3.6 does not have a str.isascii() method. In
    addition, checks that all input keys are less than 64 characters long.

    Parameters
    ----------
    key : Union[str, int]
        string to check if it contains only ascii characters

    Returns
    -------
    bool
        True if only ascii characters in the string, else False.
    """
    try:
        if isinstance(key, int) and (key >= 0):
            str_data = str(key)
        elif isinstance(key, str):
            str_data = str(key)
        else:
            raise TypeError
        if len(str_data) > 64:
            return False
        return bool(_SuitableCharRE.match(str_data))
    except TypeError:
        return False


def is_ascii(str_data: str) -> bool:
    """Checks if string contains only ascii chars.

    Necessary because python 3.6 does not have a str.isascii() method.

    Parameters
    ----------
    str_data : str
        string to check if it contains only ascii characters

    Returns
    -------
    bool
        True if only ascii characters in the string, else False.
    """
    try:
        str_data.encode('ascii')
    except (UnicodeEncodeError, AttributeError):
        return False
    return True


def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = tee(iterable)
    next(b, None)
    return zip(a, b)


def unique_everseen(iterable, key=None):
    """List unique elements, preserving order. Remember all elements ever seen.

    >>> list(unique_everseen('AAAABBBCCDAABBB'))
    ['A', 'B', 'C', 'D']
    >>> list(unique_everseen('ABBCcAD', str.lower))
    ['A', 'B', 'C', 'D']
    """
    seen = set()
    seen_add = seen.add
    if key is None:
        for element in filterfalse(seen.__contains__, iterable):
            seen_add(element)
            yield element
    else:
        for element in iterable:
            k = key(element)
            if k not in seen:
                seen_add(k)
                yield element


def ilen(iterable):
    """Return the number of items in *iterable*.

        >>> ilen(x for x in range(1000000) if x % 3 == 0)
        333334
        >>> it = iter([0, 1, 2, False])
        >>> ilen(it)
        4

    This consumes the iterable, so handle with care.
    """
    counter = count()
    deque(zip(iterable, counter), maxlen=0)
    return next(counter)


def grouper(iterable, n, fillvalue=None):
    """split iterable into n sized groups upon each call to `next()`

    >>> for grp in grouper([(x, x*2) for x in range(4)], 2):
    ...     print(grp)
    [(0, 0), (1, 2)]
    [(2, 4), (3, 6)]
    >>> for grp in grouper([x for x in range(5)], 2, fillvalue=None):
    ...     print(grp)
    [0, 1]
    [2, 3]
    >>> for grp in grouper([(x, x*2) for x in range(5)], 2, fillvalue=('FOO', 'BAR')):
    ...     print(grp)
    [(0, 0), (1, 2)]
    [(2, 4), (3, 6)]
    [(4, 8), ('FOO', 'BAR')]
    """
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)


def file_size(p: Path) -> int:  # pragma: no cover
    """Query the file size of a specific file

    Parameters
    ----------
    p : Path
        path to a file that exists on disk.

    Raises
    ------
    FileNotFoundError
        if the file does not exist

    Returns
    -------
    int
        nbytes the file consumes on disk.
    """
    if not p.is_file():
        err = f'Cannot query size of: {str(p)}. File does not exist'
        raise FileNotFoundError(err)
    return p.stat().st_size


def folder_size(p: Path, *, recurse: bool = False) -> int:
    """size of all files in a folder.

    Default is to not include subdirectories. Set "recurse=True"
    to enable recursive calculation.

    Parameters
    ----------
    p : Path
        path to the repository on disk.
    recurse : bool, kwarg-only
        to calculate the full size of the repo (Default value = False)

    Returns
    -------
    int
        number of bytes used up in the repo_path
    """
    total = 0
    for entry in p.iterdir():
        if entry.is_file() and not entry.is_symlink():
            total += entry.stat().st_size
        elif recurse and entry.is_dir() and not entry.is_symlink():
            total += folder_size(entry.resolve(), recurse=True)
    return total


def is_valid_directory_path(p: Path) -> Path:
    """Check if path is directory which user has write permission to.

    Parameters
    ----------
    p : Path
        path to some location on disk

    Returns
    -------
    Path
        If successful, the path with any user constructions expanded
        (ie. `~/somedir` -> `/home/foo/somedir`)

    Raises
    ------
    TypeError
        If the provided path argument is not a pathlike object
    NotADirectoryError
        If the path does not exist, or is not a directory on disk
    PermissionError
        If the user does not have write access to the specified path
    """
    if not isinstance(p, Path):
        msg = f'Path arg `p`: {p} of type: {type(p)} is not valid path specifier'
        raise TypeError(msg)

    usr_path = p.expanduser().resolve(strict=True)

    if not usr_path.is_dir():
        msg = f'Path arg `p`: {p} is not a directory.'
        raise NotADirectoryError(msg)
    if not os.access(str(usr_path), os.W_OK):  # pragma: no cover
        msg = f'User does not have permission to write to directory path: {p}'
        raise PermissionError(msg)

    return usr_path


# ----------------- human & machine nbytes ------------------------------------


def format_bytes(n: int) -> str:
    """ Format bytes as text
    >>> format_bytes(1)
    '1.00 B'
    >>> format_bytes(1234)
    '1.23 kB'
    >>> format_bytes(12345678)
    '12.35 MB'
    >>> format_bytes(1234567890)
    '1.23 GB'
    >>> format_bytes(1234567890000)
    '1.23 TB'
    >>> format_bytes(1234567890000000)
    '1.23 PB'
    """
    for x in ['B', 'kB', 'MB', 'GB', 'TB', 'PB']:
        if n < 1000.0:
            return "%3.2f %s" % (n, x)
        n /= 1000.0


_byte_sizes = {
    'kb': 1000,
    'mb': 1000000,
    'gb': 1000000000,
    'tb': 1000000000000,
    'pb': 1000000000000000,
    'kib': 1024,
    'mib': 1048576,
    'gib': 1073741824,
    'tib': 1099511627776,
    'pib': 1125899906842624,
    'b': 1,
    '': 1,
    'k': 1000,
    'm': 1000000,
    'g': 1000000000,
    't': 1000000000000,
    'p': 1000000000000000,
    'ki': 1024,
    'mi': 1048576,
    'gi': 1073741824,
    'ti': 1099511627776,
    'pi': 1125899906842624
}


def parse_bytes(s: str) -> int:
    """ Parse byte string to numbers
    >>> parse_bytes('100')
    100
    >>> parse_bytes('100 MB')
    100000000
    >>> parse_bytes('100M')
    100000000
    >>> parse_bytes('5kB')
    5000
    >>> parse_bytes('5.4 kB')
    5400
    >>> parse_bytes('1kiB')
    1024
    >>> parse_bytes('1e6')
    1000000
    >>> parse_bytes('1e6 kB')
    1000000000
    >>> parse_bytes('MB')
    1000000
    """
    s = s.replace(' ', '').lower()
    s = f'1{s}' if not s[0].isdigit() else s
    for i in range(len(s) - 1, -1, -1):
        if not s[i].isalpha():
            break

    n = float(s[:i + 1])
    mult = _byte_sizes[s[i + 1:]]
    return int(n * mult)


def readme_contents(user_name: str, user_email: str) -> StringIO:
    """Create the Hangar README.txt contents used to fill out file on repo initialization

    Parameters
    ----------
    user_name : str
        name of the user initializing the repository on the machine.
    user_email : str
        email of the user initializing the repository on the machine.

    Returns
    -------
    StringIO
        Buffered string text ready to be sent to a file writer.
    """
    from . import __version__
    from .constants import DIR_HANGAR

    buf = StringIO()
    buf.write(f'This directory has been used to initialize a Hangar Repository\n')
    buf.write(f'\n')
    buf.write(f'This repository was initialized by:\n')
    buf.write(f'    User Name:        {user_name}\n')
    buf.write(f'    User Email:       {user_email}\n')
    buf.write(f'    Creation Time:    {time.asctime(time.gmtime())} UTC\n')
    buf.write(f'    Software Version: {__version__}\n')
    buf.write(f'\n')
    buf.write(f'NOTE: The repository may have been updated to work with newer Hangar versions\n')
    buf.write(f'since initialization.\n')
    buf.write(f'\n')
    buf.write(f'Do not modify the contents of this `{DIR_HANGAR}` folder under any circumstances.\n')
    buf.write(f'The contents are not meant to be understood by humans. Doing so will result\n')
    buf.write(f'in data loss / corruption.\n')
    buf.write(f'\n')
    buf.write(f'The project homepage can be found at: https://github.com/tensorwerk/hangar-py/ \n')
    buf.write(f'Documention is available at: https://hangar-py.readthedocs.io/en/latest/ \n')
    buf.write(f'\n')
    buf.write(f'NOTE: If this Repository has been initialized in a directory under traditional\n')
    buf.write(f'version control systems, please add `{DIR_HANGAR}` as an ignored directory path.\n')
    buf.write(f'Failure to do so will result in undesireable performance of version control\n')
    buf.write(f'systems meant for text/code such as Git, Mercurial, Subversion, etc.\n')

    return buf


================================================
FILE: tests/bulk_importer/test_bulk_importer.py
================================================
import pytest
import numpy as np


def assert_equal(arr, arr2):
    assert np.array_equal(arr, arr2)
    assert arr.dtype == arr2.dtype


def test_bulk_importer_ndarray(repo):
    from hangar.bulk_importer import run_bulk_import
    from hangar.bulk_importer import UDF_Return

    def make_ndarray(column, key, shape, dtype, multiplier):
        size = np.prod(shape)
        arr = np.arange(size, dtype=dtype).reshape(shape) * multiplier
        yield UDF_Return(column=column, key=key, data=arr)

    co = repo.checkout(write=True)
    co.add_ndarray_column('arr', shape=(5, 5), dtype=np.uint32)
    co.commit('first')
    co.close()

    kwargs = []
    expected_kv = []
    for idx in range(200):
        _kw_dict = {
            'column': 'arr',
            'key': idx,
            'shape': (5, 5),
            'dtype': np.uint32,
            'multiplier': idx
        }
        kwargs.append(_kw_dict)

        for _udf_val in make_ndarray(**_kw_dict):
            expected_kv.append(_udf_val)
    assert len(expected_kv) == 200

    run_bulk_import(
        repo,
        branch_name='master',
        column_names=['arr'],
        udf=make_ndarray,
        udf_kwargs=kwargs,
        ncpus=2)

    co = repo.checkout()
    try:
        arr_col = co['arr']
        assert len(arr_col) == 200
        for _expected_udf_val in expected_kv:
            assert _expected_udf_val.key in arr_col
            assert_equal(arr_col[_expected_udf_val.key], _expected_udf_val.data)
    finally:
        co.close()


def test_bulk_importer_pystr(repo):
    from hangar.bulk_importer import run_bulk_import
    from hangar.bulk_importer import UDF_Return

    def make_pystr(column, key, str_val):
        yield UDF_Return(column=column, key=key, data=str_val)

    co = repo.checkout(write=True)
    co.add_str_column('str')
    co.commit('first')
    co.close()

    kwargs = []
    expected_kv = []
    for idx in range(200):
        _kw_dict = {
            'column': 'str',
            'key': idx,
            'str_val': f'{str(idx) * 2}',
        }
        kwargs.append(_kw_dict)

        for _udf_val in make_pystr(**_kw_dict):
            expected_kv.append(_udf_val)
    assert len(expected_kv) == 200

    run_bulk_import(
        repo,
        branch_name='master',
        column_names=['str'],
        udf=make_pystr,
        udf_kwargs=kwargs,
        ncpus=2)

    co = repo.checkout()
    try:
        str_col = co['str']
        assert len(str_col) == 200
        for _expected_udf_val in expected_kv:
            assert _expected_udf_val.key in str_col
            assert str_col[_expected_udf_val.key] == _expected_udf_val.data
    finally:
        co.close()


def test_bulk_importer_pybytes(repo):
    from hangar.bulk_importer import run_bulk_import
    from hangar.bulk_importer import UDF_Return

    def make_pybytes(column, key, str_val):
        raw = str_val.encode()
        yield UDF_Return(column=column, key=key, data=raw)

    co = repo.checkout(write=True)
    co.add_bytes_column('bytes')
    co.commit('first')
    co.close()

    kwargs = []
    expected_kv = []
    for idx in range(200):
        _kw_dict = {
            'column': 'bytes',
            'key': idx,
            'str_val': f'{str(idx) * 2}',
        }
        kwargs.append(_kw_dict)

        for _udf_val in make_pybytes(**_kw_dict):
            expected_kv.append(_udf_val)
    assert len(expected_kv) == 200

    run_bulk_import(
        repo,
        branch_name='master',
        column_names=['bytes'],
        udf=make_pybytes,
        udf_kwargs=kwargs,
        ncpus=2)

    co = repo.checkout()
    try:
        bytes_col = co['bytes']
        assert len(bytes_col) == 200
        for _expected_udf_val in expected_kv:
            assert _expected_udf_val.key in bytes_col
            assert bytes_col[_expected_udf_val.key] == _expected_udf_val.data
    finally:
        co.close()


def test_bulk_importer_two_col_pybytes_pystr(repo):
    from hangar.bulk_importer import run_bulk_import
    from hangar.bulk_importer import UDF_Return

    def _make_pystr(column, key, str_val):
        yield UDF_Return(column=column, key=key, data=str_val)

    def _make_pybytes(column, key, str_val):
        raw = str_val.encode()
        yield UDF_Return(column=column, key=key, data=raw)

    def make_pystr_pybytes(str_col, bytes_col, key, str_val):
        yield from _make_pystr(column=str_col, key=key, str_val=str_val)
        yield from _make_pybytes(column=bytes_col, key=key, str_val=str_val)

    co = repo.checkout(write=True)
    co.add_bytes_column('bytes')
    co.add_str_column('str')
    co.commit('first')
    co.close()

    kwargs = []
    expected_kv = []
    for idx in range(200):
        _kw_dict = {
            'str_col': 'str',
            'bytes_col': 'bytes',
            'key': idx,
            'str_val': f'{str(idx) * 2}',
        }
        kwargs.append(_kw_dict)

        for _udf_val in make_pystr_pybytes(**_kw_dict):
            expected_kv.append(_udf_val)
    assert len(expected_kv) == 400

    run_bulk_import(
        repo,
        branch_name='master',
        column_names=['bytes', 'str'],
        udf=make_pystr_pybytes,
        udf_kwargs=kwargs,
        ncpus=2)

    co = repo.checkout()
    try:
        pybytes_col = co['bytes']
        pystr_col = co['str']
        assert len(pybytes_col) == 200
        assert len(pystr_col) == 200
        for _expected_udf_val in expected_kv:
            assert _expected_udf_val.column in ['str', 'bytes']
            if _expected_udf_val.column == 'str':
                assert _expected_udf_val.key in pystr_col
                assert pystr_col[_expected_udf_val.key] == _expected_udf_val.data
            elif _expected_udf_val.column == 'bytes':
                assert _expected_udf_val.key in pystr_col
                assert pybytes_col[_expected_udf_val.key] == _expected_udf_val.data
            else:
                raise ValueError(_expected_udf_val.column)
    finally:
        co.close()


def test_signature_wrong(repo):
    from hangar.bulk_importer import run_bulk_import
    from hangar.bulk_importer import UDF_Return

    def wrong_sig_udf(a, b, c=None):
        yield UDF_Return(column='str', key=a, data=f'{a} {b} {c}')

    co = repo.checkout(write=True)
    co.add_str_column('str')
    co.commit('first')
    co.close()

    kwargs = []
    for idx in range(200):
        _kw_dict = {
            'a': 'bytes',
            'str_val': f'{str(idx) * 2}',
        }
        kwargs.append(_kw_dict)

    with pytest.raises(TypeError):
        run_bulk_import(
            repo,
            branch_name='master',
            column_names=['str'],
            udf=wrong_sig_udf,
            udf_kwargs=kwargs,
            ncpus=2)


================================================
FILE: tests/conftest.py
================================================
import time
import shutil
import random
from os.path import join as pjoin
from os import mkdir

import pytest
import numpy as np

from hangar import Repository
from hangar.checkout import WriterCheckout
import hangar


variable_shape_backend_params = ['00', '10']
fixed_shape_backend_params = ['00', '01', '10']


@pytest.fixture(scope="session")
def monkeysession(request):
    from _pytest.monkeypatch import MonkeyPatch
    mpatch = MonkeyPatch()
    yield mpatch
    mpatch.undo()


@pytest.fixture(scope='class')
def classrepo(tmp_path_factory) -> Repository:
    old00_count = hangar.backends.hdf5_00.COLLECTION_COUNT
    old00_size = hangar.backends.hdf5_00.COLLECTION_SIZE
    old01_count = hangar.backends.hdf5_01.COLLECTION_COUNT
    old01_size = hangar.backends.hdf5_01.COLLECTION_SIZE
    old10_size = hangar.backends.numpy_10.COLLECTION_SIZE
    old30_lmdb_settings = hangar.backends.lmdb_30.LMDB_SETTINGS
    old31_lmdb_settings = hangar.backends.lmdb_31.LMDB_SETTINGS
    hangar.backends.hdf5_00.COLLECTION_COUNT = 20
    hangar.backends.hdf5_00.COLLECTION_SIZE = 20
    hangar.backends.hdf5_01.COLLECTION_COUNT = 20
    hangar.backends.hdf5_01.COLLECTION_SIZE = 20
    hangar.backends.numpy_10.COLLECTION_SIZE = 100
    hangar.backends.lmdb_30.LMDB_SETTINGS['map_size'] = 1_000_000
    hangar.backends.lmdb_31.LMDB_SETTINGS['map_size'] = 1_000_000

    old_map_size = hangar.constants.LMDB_SETTINGS['map_size']
    hangar.constants.LMDB_SETTINGS['map_size'] = 2_000_000
    hangar.txnctx.TxnRegisterSingleton._instances = {}

    pth = tmp_path_factory.mktemp('classrepo')
    repo_obj = Repository(path=str(pth), exists=False)
    repo_obj.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    yield repo_obj
    hangar.constants.LMDB_SETTINGS['map_size'] = old_map_size
    hangar.backends.hdf5_00.COLLECTION_COUNT = old00_count
    hangar.backends.hdf5_00.COLLECTION_SIZE = old00_size
    hangar.backends.hdf5_01.COLLECTION_COUNT = old01_count
    hangar.backends.hdf5_01.COLLECTION_SIZE = old01_size
    hangar.backends.numpy_10.COLLECTION_SIZE = old10_size
    hangar.backends.lmdb_30.LMDB_SETTINGS = old30_lmdb_settings
    hangar.backends.lmdb_31.LMDB_SETTINGS = old31_lmdb_settings
    repo_obj._env._close_environments()


@pytest.fixture()
def managed_tmpdir(monkeypatch, tmp_path):
    monkeypatch.setitem(hangar.constants.LMDB_SETTINGS, 'map_size', 2_000_000)
    monkeypatch.setitem(hangar.backends.lmdb_30.LMDB_SETTINGS, 'map_size', 1_000_000)
    monkeypatch.setitem(hangar.backends.lmdb_31.LMDB_SETTINGS, 'map_size', 1_000_000)
    monkeypatch.setattr(hangar.backends.hdf5_00, 'COLLECTION_COUNT', 20)
    monkeypatch.setattr(hangar.backends.hdf5_00, 'COLLECTION_SIZE', 20)
    monkeypatch.setattr(hangar.backends.hdf5_01, 'COLLECTION_COUNT', 20)
    monkeypatch.setattr(hangar.backends.hdf5_01, 'COLLECTION_SIZE', 20)
    monkeypatch.setattr(hangar.backends.numpy_10, 'COLLECTION_SIZE', 100)
    hangar.txnctx.TxnRegisterSingleton._instances = {}
    yield tmp_path
    shutil.rmtree(tmp_path)



@pytest.fixture(scope='class')
def managed_tmpdir_class(monkeysession, tmp_path_factory):
    pth = tmp_path_factory.mktemp('classrepo2', numbered=True)
    tmp_path = str(pth)
    monkeysession.setitem(hangar.constants.LMDB_SETTINGS, 'map_size', 2_000_000)
    monkeysession.setitem(hangar.backends.lmdb_30.LMDB_SETTINGS, 'map_size', 1_000_000)
    monkeysession.setitem(hangar.backends.lmdb_31.LMDB_SETTINGS, 'map_size', 1_000_000)
    monkeysession.setattr(hangar.backends.hdf5_00, 'COLLECTION_COUNT', 20)
    monkeysession.setattr(hangar.backends.hdf5_00, 'COLLECTION_SIZE', 20)
    monkeysession.setattr(hangar.backends.hdf5_01, 'COLLECTION_COUNT', 20)
    monkeysession.setattr(hangar.backends.hdf5_01, 'COLLECTION_SIZE', 20)
    monkeysession.setattr(hangar.backends.numpy_10, 'COLLECTION_SIZE', 100)
    hangar.txnctx.TxnRegisterSingleton._instances = {}
    yield tmp_path
    shutil.rmtree(tmp_path)




@pytest.fixture()
def repo(managed_tmpdir) -> Repository:
    repo_obj = Repository(path=managed_tmpdir, exists=False)
    repo_obj.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    yield repo_obj
    repo_obj._env._close_environments()


@pytest.fixture()
def aset_samples_initialized_repo(repo) -> Repository:
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float64)
    co.commit('this is a commit message')
    co.close()
    yield repo


@pytest.fixture()
def aset_subsamples_initialized_repo(repo) -> Repository:
    co = repo.checkout(write=True)
    co.add_ndarray_column(
        name='writtenaset', shape=(5, 7), dtype=np.float64, contains_subsamples=True)
    co.commit('this is a commit message')
    co.close()
    yield repo


@pytest.fixture(params=fixed_shape_backend_params)
def repo_20_filled_samples(request, aset_samples_initialized_repo, array5by7) -> Repository:
    co = aset_samples_initialized_repo.checkout(write=True)
    second_aset = co.add_ndarray_column('second_aset', prototype=array5by7, backend=request.param)
    first_aset = co.columns['writtenaset']
    for i in range(0, 20):
        array5by7[:] = i
        first_aset[str(i)] = array5by7
        array5by7[:] = -i
        second_aset[str(i)] = array5by7
    co.commit('20 samples')
    co.close()
    yield aset_samples_initialized_repo


@pytest.fixture(params=fixed_shape_backend_params)
def repo_20_filled_subsamples(request, aset_subsamples_initialized_repo, array5by7) -> Repository:
    co = aset_subsamples_initialized_repo.checkout(write=True)
    second_aset = co.add_ndarray_column('second_aset', prototype=array5by7,
                                        backend=request.param, contains_subsamples=True)
    firstaset = co['writtenaset']
    secondaset = co['second_aset']
    array5by7[:] = 1
    firstaset[0] = {1: array5by7 * 1, 2: array5by7 * 2, 3: array5by7 * 3}
    firstaset[1] = {4: array5by7 * 4, 5: array5by7 * 5, 6: array5by7 * 6}
    secondaset[0] = {1: array5by7 * 10, 2: array5by7 * 20, 3: array5by7 * 30}
    secondaset[1] = {4: array5by7 * 40, 5: array5by7 * 50, 6: array5by7 * 60}
    co.commit('added data')
    co.close()
    yield aset_subsamples_initialized_repo


@pytest.fixture(params=fixed_shape_backend_params)
def repo_300_filled_samples(request, aset_samples_initialized_repo, array5by7) -> Repository:
    co = aset_samples_initialized_repo.checkout(write=True)
    aset = co.add_ndarray_column('aset', prototype=array5by7, backend=request.param)
    with aset:
        for i in range(300):
            array5by7[:] = i
            aset[i] = array5by7
    co.commit('1000 samples')
    co.close()
    yield aset_samples_initialized_repo


@pytest.fixture()
def repo_20_filled_samples2(repo) -> Repository:
    # for diff testing
    dummyData = np.arange(50).astype(np.int64)
    co1 = repo.checkout(write=True, branch='master')
    co1.add_ndarray_column(name='dummy', prototype=dummyData)
    for idx in range(10):
        dummyData[:] = idx
        co1.columns['dummy'][idx] = dummyData
    co1.commit('first commit adding dummy data and hello meta')
    co1.close()
    return repo


@pytest.fixture(params=variable_shape_backend_params)
def aset_samples_var_shape_initialized_repo(request, repo) -> Repository:
    co = repo.checkout(write=True)
    co.add_ndarray_column(
        name='writtenaset', shape=(10, 10), dtype=np.float64, variable_shape=True, backend=request.param)
    co.commit('this is a commit message')
    co.close()
    yield repo


@pytest.fixture()
def aset_samples_initialized_w_checkout(aset_samples_initialized_repo) -> WriterCheckout:
    co = aset_samples_initialized_repo.checkout(write=True)
    yield co
    co.close()


@pytest.fixture()
def array5by7():
    return np.random.random((5, 7))


@pytest.fixture()
def randomsizedarray():
    a = random.randint(2, 8)
    b = random.randint(2, 8)
    return np.random.random((a, b))


@pytest.fixture(params=fixed_shape_backend_params)
def two_commit_filled_samples_repo(request, repo, array5by7) -> Repository:
    co = repo.checkout(write=True)
    co.add_ndarray_column(
        name='writtenaset', shape=(5, 7), dtype=np.float32, backend=request.param)
    for cIdx in range(2):
        if cIdx != 0:
            co = repo.checkout(write=True)

        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range((cIdx + 1) * 5):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
        co.commit(f'commit number: {cIdx}')
        co.close()
    yield repo


@pytest.fixture()
def repo_1_br_no_conf(repo) -> Repository:

    dummyData = np.arange(50)
    co1 = repo.checkout(write=True, branch='master')
    co1.add_ndarray_column(name='dummy', prototype=dummyData)
    for idx in range(10):
        dummyData[:] = idx
        co1.columns['dummy'][str(idx)] = dummyData
    co1.commit('first commit adding dummy data')
    co1.close()

    repo.create_branch('testbranch')
    co2 = repo.checkout(write=True, branch='testbranch')
    for idx in range(10, 20):
        dummyData[:] = idx
        co2.columns['dummy'][str(idx)] = dummyData
        co2.columns['dummy'][idx] = dummyData
    co2.commit('first commit on test branch adding non-conflict data')
    co2.close()
    return repo


@pytest.fixture()
def repo_2_br_no_conf(repo_1_br_no_conf) -> Repository:

    dummyData = np.arange(50)
    repo = repo_1_br_no_conf
    co1 = repo.checkout(write=True, branch='master')
    for idx in range(20, 30):
        dummyData[:] = idx
        co1.columns['dummy'][str(idx)] = dummyData
        co1.columns['dummy'][idx] = dummyData
    co1.commit('second commit on master adding non-conflict data')
    co1.close()
    return repo


def mock_server_config(*args, **kwargs):
    import os
    import configparser
    from pathlib import Path
    from hangar import constants as c
    from hangar import remote

    src_path = Path(os.path.dirname(remote.__file__), c.CONFIG_SERVER_NAME)
    CFG = configparser.ConfigParser()
    CFG.read(src_path)
    CFG['SERVER_GRPC']['max_concurrent_rpcs'] = '16'
    CFG['SERVER_GRPC']['max_thread_pool_workers'] = '4'
    return CFG


@pytest.fixture()
def server_instance(monkeypatch, managed_tmpdir, worker_id):
    from secrets import choice
    from hangar.remote import server
    monkeypatch.setattr(server, 'server_config', mock_server_config)

    possibble_addresses = [x for x in range(50000, 59999)]
    chosen_address = choice(possibble_addresses)
    address = f'localhost:{chosen_address}'
    base_tmpdir = pjoin(managed_tmpdir, f'{worker_id[-1]}')
    mkdir(base_tmpdir)
    server, hangserver, _ = server.serve(base_tmpdir, overwrite=True, channel_address=address)
    server.start()
    yield address

    hangserver.close()
    server.stop(0.1)
    server.wait_for_termination(timeout=2)


@pytest.fixture(scope='class')
def server_instance_class(monkeysession, tmp_path_factory, worker_id):
    from secrets import choice
    from hangar.remote import server
    monkeysession.setattr(server, 'server_config', mock_server_config)
    monkeysession.setitem(hangar.constants.LMDB_SETTINGS, 'map_size', 2_000_000)
    monkeysession.setitem(hangar.backends.lmdb_30.LMDB_SETTINGS, 'map_size', 1_000_000)
    monkeysession.setitem(hangar.backends.lmdb_31.LMDB_SETTINGS, 'map_size', 1_000_000)
    monkeysession.setattr(hangar.backends.hdf5_00, 'COLLECTION_COUNT', 20)
    monkeysession.setattr(hangar.backends.hdf5_00, 'COLLECTION_SIZE', 20)
    monkeysession.setattr(hangar.backends.hdf5_01, 'COLLECTION_COUNT', 20)
    monkeysession.setattr(hangar.backends.hdf5_01, 'COLLECTION_SIZE', 20)
    monkeysession.setattr(hangar.backends.numpy_10, 'COLLECTION_SIZE', 100)

    possibble_addresses = [x for x in range(50000, 59999)]
    chosen_address = choice(possibble_addresses)
    address = f'localhost:{chosen_address}'
    base_tmpdir = tmp_path_factory.mktemp(f'{worker_id[-1]}')
    server, hangserver, _ = server.serve(str(base_tmpdir), overwrite=True, channel_address=address)
    server.start()
    yield address

    hangserver.close()
    server.stop(0.1)
    server.wait_for_termination(timeout=2)


@pytest.fixture()
def written_two_cmt_server_repo(server_instance, two_commit_filled_samples_repo) -> tuple:
    time.sleep(0.1)  # wait for ready
    two_commit_filled_samples_repo.remote.add('origin', server_instance)
    success = two_commit_filled_samples_repo.remote.push('origin', 'master')
    assert success == 'master'
    yield (server_instance, two_commit_filled_samples_repo)


@pytest.fixture()
def server_instance_push_restricted(monkeypatch, managed_tmpdir, worker_id):
    from hangar.remote import server
    from secrets import choice
    monkeypatch.setattr(server, 'server_config', mock_server_config)

    possibble_addresses = [x for x in range(50000, 59999)]
    chosen_address = choice(possibble_addresses)
    address = f'localhost:{chosen_address}'
    base_tmpdir = pjoin(managed_tmpdir, f'{worker_id[-1]}')
    mkdir(base_tmpdir)
    server, hangserver, _ = server.serve(base_tmpdir,
                                         overwrite=True,
                                         channel_address=address,
                                         restrict_push=True,
                                         username='right_username',
                                         password='right_password')
    server.start()
    yield address

    hangserver.env._close_environments()
    hangserver.close()
    server.stop(0.1)
    server.wait_for_termination(timeout=2)



================================================
FILE: tests/ml_datasets/test_dataset.py
================================================
import sys

import numpy as np
import pytest
from torch.utils.data import DataLoader
import warnings
with warnings.catch_warnings():
    warnings.simplefilter('ignore', category=DeprecationWarning)
    import tensorflow as tf
tf.compat.v1.enable_eager_execution()

from hangar.dataset import make_numpy_dataset
from hangar.dataset import make_torch_dataset
from hangar.dataset import make_tensorflow_dataset
from hangar.dataset.common import HangarDataset


class TestInternalDatasetClass:

    def test_column_without_wrapping_list(self, repo_20_filled_samples, array5by7):
        co = repo_20_filled_samples.checkout()
        first_col = co.columns['writtenaset']
        second_col = co.columns['second_aset']
        dataset = HangarDataset((first_col, second_col))
        key1, key2 = dataset._keys[0]
        assert key1 == key2
        target = array5by7[:] = int(key1)
        assert np.allclose(dataset.index_get(0), target)
        co.close()

    def test_no_column(self):
        with pytest.raises(TypeError):
            HangarDataset([])

    def test_fails_on_write_enabled_columns(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout(write=True)
        first_aset = co.columns['writtenaset']
        with pytest.raises(PermissionError):
            HangarDataset((first_aset,))
        co.close()

    @pytest.mark.filterwarnings("ignore:Column.* writtenaset contains `reference-only` samples")
    def test_columns_without_local_data_and_without_key_argument(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        from hangar.backends import backend_decoder

        # perform a mock for nonlocal data
        for k in co._columns._columns['writtenaset']._samples:
            co._columns._columns['writtenaset']._samples[k] = backend_decoder(b'50:daeaaeeaebv')
        col = co.columns['writtenaset']
        with pytest.raises(RuntimeError):
            HangarDataset((col,))

        # perform a mock for nonlocal data
        co = repo.checkout()
        template = backend_decoder(b'50:daeaaeeaebv')
        co._columns._columns['writtenaset']._samples['4'] = template
        col = co.columns['writtenaset']
        dataset = HangarDataset((col,))
        dataset_available_keys = dataset._keys
        assert len(dataset_available_keys) == 19
        assert '4' not in dataset_available_keys
        column_reported_local_keys = list(col.keys(local=True))
        for dset_avail_key in dataset_available_keys:
            assert dset_avail_key in column_reported_local_keys
        assert len(dataset_available_keys) == len(column_reported_local_keys)
        co.close()

    def test_columns_without_common_keys_and_without_key_argument(self, repo_20_filled_samples):
        co = repo_20_filled_samples.checkout(write=True)
        first_col = co.columns['writtenaset']
        first_col['AnExtraKey'] = first_col['0']
        co.commit('added an extra key')
        co.close()
        co = repo_20_filled_samples.checkout()
        first_col = co.columns['writtenaset']
        second_col = co.columns['second_aset']
        with pytest.raises(KeyError):
            HangarDataset((first_col, second_col))
        co.close()

    def test_keys_single_column_success(self, repo_20_filled_samples):
        co = repo_20_filled_samples.checkout()
        first_col = co.columns['writtenaset']
        keys = ['1', '2', '3']
        dataset = HangarDataset((first_col,), keys=keys)
        assert dataset._keys == keys
        co.close()

    def test_keys_multiple_column_success(self, repo_20_filled_samples):
        co = repo_20_filled_samples.checkout()
        first_col = co.columns['writtenaset']
        second_col = co.columns['second_aset']
        keys = [('1', '2'), ('2', '3'), ('3', '4')]
        dataset = HangarDataset((first_col, second_col), keys=keys)
        for i, key in enumerate(keys):
            data = dataset.index_get(i)
            assert np.allclose(data[0], first_col[key[0]])
            assert np.allclose(data[1], second_col[key[1]])
        co.close()

    def test_keys_nested_column_success(self, repo_20_filled_subsamples):
        co = repo_20_filled_subsamples.checkout()
        col1 = co['writtenaset']
        col2 = co['second_aset']

        dataset = HangarDataset([col1, col2])
        data = dataset.index_get(1)
        assert tuple(data[0].keys()) == tuple(data[1].keys()) == (4, 5, 6)
        assert isinstance(data[0], dict)
        assert isinstance(data[1], dict)

        keys = (((0, ...), (0, 1)), ((1, ...), (1, 4)))
        dataset = HangarDataset([col1, col2], keys=keys)
        data = dataset.index_get(1)
        assert tuple(data[0].keys()) == (4, 5, 6)
        assert np.allclose(data[1], col2[1][4])
        co.close()

    def test_keys_not_valid(self, repo_20_filled_samples):
        co = repo_20_filled_samples.checkout()
        first_col = co.columns['writtenaset']
        keys = ['w', 'r', 'o', 'n', 'g']
        dataset = HangarDataset((first_col,), keys=keys)
        with pytest.raises(KeyError):
            dataset.index_get(1)
        co.close()

    @pytest.mark.filterwarnings("ignore:Column.* writtenaset contains `reference-only` samples")
    def test_keys_non_local(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        # perform a mock for nonlocal data
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        co._columns._columns['writtenaset']._samples['4'] = template

        col = co.columns['writtenaset']
        col_reported_remote_keys = col.remote_reference_keys
        assert col_reported_remote_keys == ('4',)
        assert len(col_reported_remote_keys) == 1
        dataset = HangarDataset((col,), keys=('0', *col_reported_remote_keys))
        with pytest.raises(KeyError):
            # TODO: hangar internal should raise FileNotFoundError?
            dataset.index_get(1)
        co.close()

# ====================================   Numpy    ====================================


@pytest.mark.filterwarnings("ignore:.* experimental method")
class TestNumpyDataset:
    def test_multiple_dataset_batched_loader(self, repo_20_filled_samples):
        co = repo_20_filled_samples.checkout()
        first_aset = co.columns['writtenaset']
        second_aset = co.columns['second_aset']
        dset = make_numpy_dataset([first_aset, second_aset], batch_size=6, drop_last=True)
        total_samples = 0
        for dset1, dset2 in dset:
            total_samples += dset1.shape[0]
            assert dset1.shape == (6, 5, 7)
            assert dset2.shape == (6, 5, 7)
        assert total_samples == 18  # drop last is True

        # testing with batch_size = 1
        dset = make_numpy_dataset([first_aset, second_aset], batch_size=1, drop_last=True)
        total_samples = 0
        for dset1, dset2 in dset:
            total_samples += dset1.shape[0]
            assert dset1.shape == (1, 5, 7)
            assert dset2.shape == (1, 5, 7)
        assert total_samples == 20  # drop last is True will not have any effect

        with pytest.raises(RuntimeError, match="Setting `drop_last` is a no-op when "
                                               "batching is not enabled"):
            # Setting drop_last without batching
            dset = make_numpy_dataset([first_aset, second_aset], batch_size=0, drop_last=True)
        dset = make_numpy_dataset([first_aset, second_aset], batch_size=0)
        total_samples = 0
        for dset1, dset2 in dset:
            total_samples += 1
            assert dset1.shape == (5, 7)
            assert dset2.shape == (5, 7)
        assert total_samples == 20
        co.close()

    def test_nested_column(self, repo_20_filled_subsamples):
        co = repo_20_filled_subsamples.checkout()
        col1 = co['writtenaset']
        col2 = co['second_aset']
        dset = make_numpy_dataset([col1, col2])
        for data1, data2 in dset:
            assert isinstance(data1, dict)
            assert isinstance(data2, dict)
            assert tuple(data1.keys()) == tuple(data2.keys())

        dset = make_numpy_dataset([col1, col2], batch_size=1, drop_last=True)
        for data1, data2 in dset:
            assert type(data1) is type(data2) is tuple
            assert len(data1) == len(data2) == 1
            assert tuple(data1[0].keys()) == tuple(data2[0].keys())

        dset = make_numpy_dataset([col1, col2], batch_size=2, drop_last=True)
        for data1, data2 in dset:
            assert len(data1) == len(data2) == 2
        co.close()

    def test_lots_of_data_with_multiple_backend(self, repo_300_filled_samples):
        repo = repo_300_filled_samples
        co = repo.checkout()
        aset = co.columns['aset']
        np_dset = make_numpy_dataset([aset], batch_size=10, drop_last=True)
        for data in np_dset:
            assert isinstance(data, np.ndarray)
            assert data.shape == (10, 5, 7)
        co.close()

    def test_shuffle(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        first_aset = co.columns['writtenaset']

        unshuffled_dataset = make_numpy_dataset((first_aset,),
                                                keys=[str(i) for i in range(15)],
                                                shuffle=False)
        expected_unshuffled_content = [i for i in range(15)]
        recieved_unshuffled_content = []
        for data in unshuffled_dataset:
            recieved_unshuffled_content.append(int(data[0][0]))
        assert expected_unshuffled_content == recieved_unshuffled_content

        shuffled_dataset = make_numpy_dataset((first_aset,),
                                              keys=[str(i) for i in range(15)],
                                              shuffle=True)
        recieved_shuffled_content = []
        for data in shuffled_dataset:
            recieved_shuffled_content.append(int(data[0][0]))
        assert recieved_shuffled_content != expected_unshuffled_content
        co.close()

    def test_collate_fn(self, repo_20_filled_subsamples):
        co = repo_20_filled_subsamples.checkout()
        col1 = co['writtenaset']
        col2 = co['second_aset']
        keys = (((0, ...), (0, 1)), ((1, ...), (1, 4)))

        dataset = make_numpy_dataset([col1, col2], keys=keys,
                                     shuffle=False, batch_size=2)
        col1data, col2data = next(iter(dataset))
        assert isinstance(col1data, tuple)
        assert isinstance(col2data, np.ndarray)
        assert list(col1data[0].keys()) == [1, 2, 3]
        assert list(col1data[1].keys()) == [4, 5, 6]
        assert np.allclose(col2data, np.stack((col2[0][1], col2[1][4])))

        def collate_fn(data_arr):
            arr1 = []
            arr2 = []
            for elem in data_arr:
                # picking one arbitrary subsample
                k = list(elem[0].keys())[2]
                data1 = elem[0][k]
                data2 = elem[1]
                arr1.append(data1)
                arr2.append(data2)
            return np.stack(arr1), np.stack(arr2)

        dataset = make_numpy_dataset([col1, col2], keys=keys, shuffle=False,
                                     batch_size=2, collate_fn=collate_fn)
        col1data, col2data = next(iter(dataset))
        assert np.allclose(col1data, np.stack((col1[0][3], col1[1][6])))
        assert np.allclose(col2data, np.stack((col2[0][1], col2[1][4])))
        co.close()


# ====================================   PyTorch  ====================================


class TestTorchDataset(object):

    def test_multiple_dataset_loader(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        first_aset = co.columns['writtenaset']
        second_aset = co.columns['second_aset']
        torch_dset = make_torch_dataset([first_aset, second_aset])
        loader = DataLoader(torch_dset, batch_size=6, drop_last=True)
        total_samples = 0
        for dset1, dset2 in loader:
            total_samples += dset1.shape[0]
            assert dset1.shape == (6, 5, 7)
            assert dset2.shape == (6, 5, 7)
        assert total_samples == 18  # drop last is True
        co.close()

    def test_return_as_dict(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        first_aset = co.columns['writtenaset']
        second_aset = co.columns['second_aset']
        torch_dset = make_torch_dataset([first_aset, second_aset], as_dict=True)
        assert len(torch_dset) == 20
        loader = DataLoader(torch_dset, batch_size=5)
        for sample in loader:
            assert 'writtenaset' in sample.keys()
            assert 'second_aset' in sample.keys()
        co.close()

    def test_lots_of_data_with_multiple_backend(self, repo_300_filled_samples):
        repo = repo_300_filled_samples
        co = repo.checkout()
        aset = co.columns['aset']
        torch_dset = make_torch_dataset([aset], as_dict=True)
        loader = DataLoader(torch_dset, batch_size=10, drop_last=True)
        for data in loader:
            assert isinstance(data, dict)
            assert data['aset'].shape == (10, 5, 7)
        co.close()

    @pytest.mark.skipif(sys.platform == "win32",
                        reason="multiprocess workers does not run on windows")
    def test_lots_of_data_with_multiple_backend_multiple_worker_dataloader(self, repo_300_filled_samples):
        repo = repo_300_filled_samples
        co = repo.checkout()
        aset = co.columns['aset']
        torch_dset = make_torch_dataset([aset])
        loader = DataLoader(torch_dset, batch_size=10, drop_last=True, num_workers=2)
        for data in loader:
            assert data.shape == (10, 5, 7)
        co.close()

    @pytest.mark.skipif(sys.platform == "win32",
                        reason="multiprocess workers does not run on windows")
    def test_two_aset_loader_two_worker_dataloader(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        first_aset = co.columns['writtenaset']
        second_aset = co.columns['second_aset']
        torch_dset = make_torch_dataset([first_aset, second_aset])
        loader = DataLoader(torch_dset, batch_size=2, drop_last=True, num_workers=2)
        count = 0
        for asets_batch in loader:
            assert isinstance(asets_batch, list)
            assert len(asets_batch) == 2
            assert asets_batch[0].shape == (2, 5, 7)
            assert asets_batch[1].shape == (2, 5, 7)
            assert np.allclose(asets_batch[0], -asets_batch[1])
            count += 1
        assert count == 10
        co.close()


# ==================================== Tensorflow ====================================


class TestTfDataset(object):
    # TODO: Add TF2.0 and 1.0 test cases

    def test_dataset_loader(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        first_aset = co.columns['writtenaset']
        second_aset = co.columns['second_aset']

        # multiple datasets
        tf_dset = make_tensorflow_dataset([first_aset, second_aset])
        tf_dset = tf_dset.batch(6)
        for dset1, dset2 in tf_dset.take(2):
            assert dset1.shape == tf.TensorShape((6, 5, 7))
            assert dset2.shape == tf.TensorShape((6, 5, 7))
        co.close()

    def test_variably_shaped(self, aset_samples_var_shape_initialized_repo):
        # Variably shaped test is required since the collation is dependent on
        # the way we return the data from generator
        repo = aset_samples_var_shape_initialized_repo
        co = repo.checkout(write=True)
        aset = co.columns['writtenaset']
        for i in range(5, 10):
            aset[i] = np.random.random((2, i))
        co.commit('added data')
        co.close()

        co = repo.checkout()
        aset = co.columns['writtenaset']
        tf_dset = make_tensorflow_dataset((aset,))
        shape_obj = tf.TensorShape((2, None))
        tf_dset = tf_dset.padded_batch(5, padded_shapes=(shape_obj,))
        for val in tf_dset:
            assert val[0].shape[0] == 5
            assert val[0].shape[1] == 2
            assert 11 > val[0].shape[2] > 4
        co.close()

    def test_lots_of_data_with_multiple_backend(self, repo_300_filled_samples):
        repo = repo_300_filled_samples
        co = repo.checkout()
        aset = co.columns['aset']
        tf_dset = make_tensorflow_dataset([aset])
        tf_dset = tf_dset.batch(10)
        for data in tf_dset:
            assert data[0].shape == (10, 5, 7)
        co.close()

    def test_shuffle(self, repo_20_filled_samples):
        repo = repo_20_filled_samples
        co = repo.checkout()
        first_aset = co.columns['writtenaset']
        unshuffled_dataset = make_tensorflow_dataset((first_aset,),
                                                     keys=[str(i) for i in range(15)],
                                                     shuffle=False)
        expected_unshuffled_content = [i for i in range(15)]
        recieved_unshuffled_content = []
        for data in unshuffled_dataset:
            recieved_unshuffled_content.append(int(data[0][0][0]))
        assert expected_unshuffled_content == recieved_unshuffled_content

        shuffled_dataset = make_tensorflow_dataset((first_aset,),
                                                   keys=[str(i) for i in range(15)],
                                                   shuffle=True)
        recieved_shuffled_content = []
        for data in shuffled_dataset:
            recieved_shuffled_content.append(int(data[0][0][0]))
        assert recieved_shuffled_content != expected_unshuffled_content
        co.close()


================================================
FILE: tests/property_based/conftest.py
================================================
import pytest

variable_shape_backend_params = ['00', '10']
fixed_shape_backend_params = ['00', '01', '10']
str_variable_shape_backend_params = ['30']
bytes_variable_shape_backend_params = ['31']


================================================
FILE: tests/property_based/test_pbt_column_flat.py
================================================
import pytest
import numpy as np

from conftest import (
    variable_shape_backend_params,
    fixed_shape_backend_params,
    str_variable_shape_backend_params,
    bytes_variable_shape_backend_params
)

import string
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
from hypothesis.extra import numpy as npst

from hangar import Repository


# ------------------------ Fixture Setup ------------------------------


added_samples = set()


@pytest.fixture(params=fixed_shape_backend_params, scope='class')
def fixed_shape_repo_co_float32_aset_flat(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples
    added_samples = set()
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset',
                          shape=(5, 5, 5),
                          dtype=np.float32,
                          variable_shape=False,
                          backend=request.param,
                          contains_subsamples=False)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=variable_shape_backend_params, scope='class')
def variable_shape_repo_co_float32_aset_flat(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples
    added_samples = set()
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset',
                          shape=(5, 5, 5),
                          dtype=np.float32,
                          variable_shape=True,
                          backend=request.param,
                          contains_subsamples=False)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=variable_shape_backend_params, scope='class')
def variable_shape_repo_co_uint8_aset_flat(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples
    added_samples = set()
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset',
                          shape=(5, 5, 5),
                          dtype=np.uint8,
                          variable_shape=True,
                          backend=request.param,
                          contains_subsamples=False)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=str_variable_shape_backend_params, scope='class')
def variable_shape_repo_co_str_aset_flat(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples
    added_samples = set()
    co = classrepo.checkout(write=True)
    co.add_str_column(name='strcolumn',
                      contains_subsamples=False,
                      backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=bytes_variable_shape_backend_params, scope='class')
def variable_shape_repo_co_bytes_aset_flat(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples
    added_samples = set()
    co = classrepo.checkout(write=True)
    co.add_bytes_column(name='bytescolumn',
                        contains_subsamples=False,
                        backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


# -------------------------- Test Generation ---------------------------------
# Test cases are encapsulated in a classes (and fixture functions are set to
# "class" level scope in order to handle a warning introduced in hypothesis
# version 5.6.0 - 2020-02-29
#
# From release notes:
# > This release adds an explicit warning for tests that are both decorated with @given(...)
#   and request a function-scoped pytest fixture, because such fixtures are only executed once
#   for all Hypothesis test cases and that often causes trouble. See issue #377
#   (https://github.com/HypothesisWorks/hypothesis/issues/377)
#
# However, this is actually the intended behavior for hangar, since we ant to reuse
# the same repo/checkout across all of the test case inputs that hypothesis generates


st_valid_names = st.text(
    min_size=1, max_size=8, alphabet=string.ascii_letters + string.digits + '_-.')
st_valid_ints = st.integers(min_value=0, max_value=999_999)
st_valid_keys = st.one_of(st_valid_ints, st_valid_names)


valid_arrays_fixed = npst.arrays(np.float32,
                                 shape=(5, 5, 5),
                                 fill=st.floats(min_value=-10,
                                                max_value=10,
                                                allow_nan=False,
                                                allow_infinity=False,
                                                width=32),
                                 elements=st.floats(min_value=-10,
                                                    max_value=10,
                                                    allow_nan=False,
                                                    allow_infinity=False,
                                                    width=32))


class TestColumn1:

    @given(key=st_valid_keys, val=valid_arrays_fixed)
    @settings(max_examples=200, deadline=None)
    def test_arrayset_fixed_key_values(self, key, val, fixed_shape_repo_co_float32_aset_flat):
        global added_samples

        co = fixed_shape_repo_co_float32_aset_flat
        col = co.columns['writtenaset']
        assert col.schema_type == 'fixed_shape'

        col[key] = val
        added_samples.add(key)
        out = col[key]
        assert out.dtype == val.dtype
        assert out.shape == val.shape
        assert np.allclose(out, val)
        assert len(col) == len(added_samples)


valid_shapes_var = npst.array_shapes(min_dims=3, max_dims=3, min_side=1, max_side=5)
valid_arrays_var_float32 = npst.arrays(np.float32,
                                       shape=valid_shapes_var,
                                       fill=st.floats(min_value=-10,
                                                      max_value=10,
                                                      allow_nan=False,
                                                      allow_infinity=False,
                                                      width=32),
                                       elements=st.floats(min_value=-10,
                                                          max_value=10,
                                                          allow_nan=False,
                                                          allow_infinity=False,
                                                          width=32))


class TestColumn2:

    @given(key=st_valid_keys, val=valid_arrays_var_float32)
    @settings(max_examples=200, deadline=None)
    def test_arrayset_variable_shape_float32(self, key, val, variable_shape_repo_co_float32_aset_flat):
        global added_samples

        co = variable_shape_repo_co_float32_aset_flat
        col = co.columns['writtenaset']
        assert col.schema_type == 'variable_shape'

        col[key] = val
        added_samples.add(key)
        out = col[key]
        assert out.dtype == val.dtype
        assert out.shape == val.shape
        assert np.allclose(out, val)
        assert len(col) == len(added_samples)


valid_arrays_var_uint8 = npst.arrays(np.uint8,
                                     shape=valid_shapes_var,
                                     elements=st.integers(min_value=0, max_value=255),
                                     fill=st.integers(min_value=0, max_value=255))


class TestColumn3:

    @given(key=st_valid_keys, val=valid_arrays_var_uint8)
    @settings(max_examples=200, deadline=None)
    def test_arrayset_variable_shape_uint8(self, key, val, variable_shape_repo_co_uint8_aset_flat):
        global added_samples

        co = variable_shape_repo_co_uint8_aset_flat
        col = co.columns['writtenaset']
        assert col.schema_type == 'variable_shape'

        col[key] = val
        added_samples.add(key)
        out = col[key]
        assert out.dtype == val.dtype
        assert out.shape == val.shape
        assert np.allclose(out, val)
        assert len(col) == len(added_samples)


ascii_characters = st.characters(min_codepoint=0, max_codepoint=127)
ascii_text_stratagy = st.text(alphabet=ascii_characters, min_size=0, max_size=500)


class TestColumn4:

    @given(key=st_valid_keys, val=ascii_text_stratagy)
    @settings(max_examples=200, deadline=None)
    def test_str_column_variable_shape(self, key, val, variable_shape_repo_co_str_aset_flat):
        global added_samples

        co = variable_shape_repo_co_str_aset_flat
        col = co.columns['strcolumn']
        col[key] = val

        assert col.schema_type == 'variable_shape'
        assert col.column_type == 'str'
        assert col.dtype == str

        added_samples.add(key)
        out = col[key]
        assert out == val
        assert len(col) == len(added_samples)


bytes_stratagy = st.binary(max_size=2000)


class TestColumn5:

    @given(key=st_valid_keys, val=bytes_stratagy)
    @settings(max_examples=200, deadline=None)
    def test_bytes_column_variable_shape(self, key, val, variable_shape_repo_co_bytes_aset_flat):
        global added_samples

        co = variable_shape_repo_co_bytes_aset_flat
        col = co.columns['bytescolumn']
        col[key] = val

        assert col.schema_type == 'variable_shape'
        assert col.column_type == 'bytes'
        assert col.dtype == bytes

        added_samples.add(key)
        out = col[key]
        assert out == val
        assert len(col) == len(added_samples)



================================================
FILE: tests/property_based/test_pbt_column_nested.py
================================================
from collections import defaultdict

import pytest
import numpy as np

from conftest import (
    variable_shape_backend_params,
    fixed_shape_backend_params,
    str_variable_shape_backend_params,
    bytes_variable_shape_backend_params
)

import string
from hypothesis import given, settings, HealthCheck
import hypothesis.strategies as st
from hypothesis.extra import numpy as npst

from hangar import Repository


# ------------------------ Fixture Setup ------------------------------


added_samples_subsamples = defaultdict(set)


@pytest.fixture(params=fixed_shape_backend_params, scope='class')
def fixed_shape_repo_co_float32_aset_nested(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples_subsamples[sample_key] = set(subsample_keys)
    global added_samples_subsamples
    added_samples_subsamples = defaultdict(set)
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset',
                          shape=(5, 5, 5),
                          dtype=np.float32,
                          variable_shape=False,
                          contains_subsamples=True,
                          backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=variable_shape_backend_params, scope='class')
def variable_shape_repo_co_float32_aset_nested(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples_subsamples[sample_key] = set(subsample_keys)
    global added_samples_subsamples
    added_samples_subsamples = defaultdict(set)
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset',
                          shape=(5, 5, 5),
                          dtype=np.float32,
                          variable_shape=True,
                          contains_subsamples=True,
                          backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=variable_shape_backend_params, scope='class')
def variable_shape_repo_co_uint8_aset_nested(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples_subsamples[sample_key] = set(subsample_keys)
    global added_samples_subsamples
    added_samples_subsamples = defaultdict(set)
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset',
                          shape=(5, 5, 5),
                          dtype=np.uint8,
                          variable_shape=True,
                          contains_subsamples=True,
                          backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=str_variable_shape_backend_params, scope='class')
def variable_shape_repo_co_str_aset_nested(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples_subsamples
    added_samples_subsamples = defaultdict(set)
    co = classrepo.checkout(write=True)
    co.add_str_column(name='strcolumn',
                      contains_subsamples=True,
                      backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


@pytest.fixture(params=bytes_variable_shape_backend_params, scope='class')
def variable_shape_repo_co_bytes_aset_nested(classrepo, request) -> Repository:
    # needed because fixtures don't reset between each hypothesis run
    # tracks added_samples = set(sample_key)
    global added_samples_subsamples
    added_samples_subsamples = defaultdict(set)
    co = classrepo.checkout(write=True)
    co.add_bytes_column(name='bytescolumn',
                        contains_subsamples=True,
                        backend=request.param)
    yield co
    co.reset_staging_area()
    co.close()


# -------------------------- Test Generation ---------------------------------
# Test cases are encapsulated in a classes (and fixture functions are set to
# "class" level scope in order to handle a warning introduced in hypothesis
# version 5.6.0 - 2020-02-29
#
# From release notes:
# > This release adds an explicit warning for tests that are both decorated with @given(...)
#   and request a function-scoped pytest fixture, because such fixtures are only executed once
#   for all Hypothesis test cases and that often causes trouble. See issue #377
#   (https://github.com/HypothesisWorks/hypothesis/issues/377)
#
# However, this is actually the intended behavior for hangar, since we ant to reuse
# the same repo/checkout across all of the test case inputs that hypothesis generates


st_valid_names = st.text(
    min_size=1, max_size=16, alphabet=string.ascii_letters + string.digits + '_-.')
st_valid_ints = st.integers(min_value=0, max_value=999_999)
st_valid_keys = st.one_of(st_valid_ints, st_valid_names)

valid_arrays_fixed = npst.arrays(np.float32,
                                 shape=(5, 5, 5),
                                 fill=st.floats(min_value=-10,
                                                max_value=10,
                                                allow_nan=False,
                                                allow_infinity=False,
                                                width=32),
                                 elements=st.floats(min_value=-10,
                                                    max_value=10,
                                                    allow_nan=False,
                                                    allow_infinity=False,
                                                    width=32))


class TestColumn1:

    @given(key=st_valid_keys, subkey=st_valid_keys, val=valid_arrays_fixed)
    @settings(max_examples=200, deadline=None)
    def test_arrayset_fixed_key_values_nested(self, key, subkey, val, fixed_shape_repo_co_float32_aset_nested):
        global added_samples_subsamples

        added_samples_subsamples[key].add(subkey)

        co = fixed_shape_repo_co_float32_aset_nested
        col = co.columns['writtenaset']
        assert col.schema_type == 'fixed_shape'
        assert col.contains_subsamples is True
        col[key] = {subkey: val}
        out = col[key][subkey]

        assert len(col) == len(added_samples_subsamples)
        assert len(col[key]) == len(added_samples_subsamples[key])
        assert out.dtype == val.dtype
        assert out.shape == val.shape
        assert np.allclose(out, val)


valid_shapes_var = npst.array_shapes(min_dims=3, max_dims=3, min_side=1, max_side=5)
valid_arrays_var_float32 = npst.arrays(np.float32,
                                       shape=valid_shapes_var,
                                       fill=st.floats(min_value=-10,
                                                      max_value=10,
                                                      allow_nan=False,
                                                      allow_infinity=False,
                                                      width=32),
                                       elements=st.floats(min_value=-10,
                                                          max_value=10,
                                                          allow_nan=False,
                                                          allow_infinity=False,
                                                          width=32))


class TestColumn2:

    @given(key=st_valid_keys, subkey=st_valid_keys, val=valid_arrays_var_float32)
    @settings(max_examples=200, deadline=None)
    def test_arrayset_variable_shape_float32_nested(self, key, val, subkey, variable_shape_repo_co_float32_aset_nested):
        global added_samples_subsamples

        co = variable_shape_repo_co_float32_aset_nested
        col = co.columns['writtenaset']
        assert col.schema_type == 'variable_shape'
        assert col.contains_subsamples is True
        col[key] = {subkey: val}
        out = col[key][subkey]
        added_samples_subsamples[key].add(subkey)

        assert len(col) == len(added_samples_subsamples)
        assert len(col[key]) == len(added_samples_subsamples[key])
        assert out.dtype == val.dtype
        assert out.shape == val.shape
        assert np.allclose(out, val)


valid_arrays_var_uint8 = npst.arrays(np.uint8,
                                     shape=valid_shapes_var,
                                     elements=st.integers(min_value=0, max_value=255),
                                     fill=st.integers(min_value=0, max_value=255))


class TestColumn3:

    @given(key=st_valid_keys, subkey=st_valid_keys, val=valid_arrays_var_uint8)
    @settings(max_examples=200, deadline=None)
    def test_arrayset_variable_shape_uint8_nested(self, key, val, subkey, variable_shape_repo_co_uint8_aset_nested):
        global added_samples_subsamples

        co = variable_shape_repo_co_uint8_aset_nested
        col = co.columns['writtenaset']
        assert col.schema_type == 'variable_shape'
        assert col.contains_subsamples is True
        col[key] = {subkey: val}
        out = col[key][subkey]
        added_samples_subsamples[key].add(subkey)

        assert len(col) == len(added_samples_subsamples)
        assert len(col[key]) == len(added_samples_subsamples[key])
        assert out.dtype == val.dtype
        assert out.shape == val.shape
        assert np.allclose(out, val)


ascii_characters = st.characters(min_codepoint=0, max_codepoint=500)
ascii_text_stratagy = st.text(alphabet=ascii_characters, min_size=0, max_size=500)


class TestStrColumn:

    @given(key=st_valid_keys, subkey=st_valid_keys, val=ascii_text_stratagy)
    @settings(max_examples=200, deadline=None)
    def test_str_column_variable_shape_nested(self, key, subkey, val, variable_shape_repo_co_str_aset_nested):
        global added_samples_subsamples

        co = variable_shape_repo_co_str_aset_nested
        col = co.columns['strcolumn']
        assert col.schema_type == 'variable_shape'
        assert col.contains_subsamples is True

        col[key] = {subkey: val}
        out = col[key][subkey]
        added_samples_subsamples[key].add(subkey)

        assert len(col) == len(added_samples_subsamples)
        assert len(col[key]) == len(added_samples_subsamples[key])
        assert out == val


bytes_stratagy = st.binary(max_size=2000)


class TestBytesColumn:

    @given(key=st_valid_keys, subkey=st_valid_keys, val=bytes_stratagy)
    @settings(max_examples=200, deadline=None)
    def test_bytes_column_variable_shape_nested(self, key, subkey, val, variable_shape_repo_co_bytes_aset_nested):
        global added_samples_subsamples

        co = variable_shape_repo_co_bytes_aset_nested
        col = co.columns['bytescolumn']
        assert col.schema_type == 'variable_shape'
        assert col.contains_subsamples is True

        col[key] = {subkey: val}
        out = col[key][subkey]
        added_samples_subsamples[key].add(subkey)

        assert len(col) == len(added_samples_subsamples)
        assert len(col[key]) == len(added_samples_subsamples[key])
        assert out == val


================================================
FILE: tests/test_backend_hdf5_00_hdf5_01.py
================================================
import pytest
import numpy as np


@pytest.fixture(params=['00', '01'])
def be_filehandle(request):
    if request.param == '00':
        from hangar.backends.hdf5_00 import HDF5_00_FileHandles
        return HDF5_00_FileHandles
    elif request.param == '01':
        from hangar.backends.hdf5_01 import HDF5_01_FileHandles
        return HDF5_01_FileHandles
    else:
        raise ValueError(f'request param "{request.param}" for backend code unknown.')


@pytest.mark.parametrize('clib,clibCode',
                         [('blosc:blosclz', 0), ('blosc:lz4', 1),
                          ('blosc:lz4hc', 2), ('blosc:zlib', 4),
                          ('blosc:zstd', 5)])
@pytest.mark.parametrize('clevel', [1, 4, 8])
@pytest.mark.parametrize('cshuffle,cshuffleCode', [(None, 0), ('byte', 1), ('bit', 2)])
@pytest.mark.parametrize('beCode', ['00', '01'])
def test_blosc_filter_opts_result_in_correct_dataset_args(
        be_filehandle, clib, clibCode, clevel, cshuffle, cshuffleCode, beCode):

    out = be_filehandle._dataset_opts(complib=clib,
                                      complevel=clevel,
                                      shuffle=cshuffle)
    expected = {
        'compression': 32001,
        'compression_opts': (0, 0, 0, 0, clevel, cshuffleCode, clibCode),
        'shuffle': False}
    assert out == expected


@pytest.mark.parametrize('cshuffle,cshuffleCode', [(None, False), ('byte', True)])
def test_lzf_filter_opts_result_in_correct_dataset_args(be_filehandle, cshuffle, cshuffleCode):
    out = be_filehandle._dataset_opts(complib='lzf',
                                      complevel=None,
                                      shuffle=cshuffle)
    expected = {
        'compression': 'lzf',
        'compression_opts': None,
        'shuffle': cshuffleCode}
    assert out == expected


@pytest.mark.parametrize('clevel', [1, 4, 8])
@pytest.mark.parametrize('cshuffle,cshuffleCode', [(None, False), ('byte', True)])
def test_gzip_filter_opts_result_in_correct_dataset_args(be_filehandle, clevel, cshuffle, cshuffleCode):
    out = be_filehandle._dataset_opts(complib='gzip',
                                      complevel=clevel,
                                      shuffle=cshuffle)
    expected = {
        'compression': 'gzip',
        'compression_opts': clevel,
        'shuffle': cshuffleCode}
    assert out == expected


# ------------------------- test actual compression ---------------------------


@pytest.mark.parametrize('clib,clibCode',
                         [('blosc:blosclz', 0), ('blosc:lz4', 1),
                          ('blosc:lz4hc', 2), ('blosc:zlib', 4),
                          ('blosc:zstd', 5)])
@pytest.mark.parametrize('clevel', [1, 4, 8])
@pytest.mark.parametrize('cshuffle,cshuffleCode', [(None, 0), ('byte', 1), ('bit', 2)])
@pytest.mark.parametrize('be_code', ['00', '01'])
def test_arrayset_init_with_various_blosc_opts(repo, array5by7, clib, clibCode, clevel, cshuffle, cshuffleCode, be_code):

    opts = {
        'shuffle': cshuffle,
        'complib': clib,
        'complevel': clevel,
    }
    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column('aset', prototype=array5by7, backend=be_code, backend_options=opts)
    assert aset.backend == be_code
    with aset as a:
        for i in range(10):
            a[i] = array5by7 + i

    wuid = aset._be_fs[be_code].w_uid
    plist = aset._be_fs[be_code].wFp[wuid]['/0'].id.get_create_plist()
    _, _, resopts, _ = plist.get_filter(0)
    res_clevel, res_cshuffle, res_clib = resopts[4:7]
    assert res_clevel == clevel
    assert res_clib == clibCode
    assert res_cshuffle == cshuffleCode
    wco.commit('hi')
    wco.close()


@pytest.mark.parametrize('cshuffle,cshuffleCode', [(False, False), (True, True)])
@pytest.mark.parametrize('be_code', ['00', '01'])
def test_arrayset_init_with_various_lzf_opts(repo, array5by7, cshuffle, cshuffleCode, be_code):

    opts = {
        'shuffle': cshuffle,
        'complib': 'lzf',
        'complevel': None,
    }
    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column('aset', prototype=array5by7, backend=be_code, backend_options=opts)
    assert aset.backend == be_code
    with aset as a:
        for i in range(10):
            a[i] = array5by7 + i

    res_compression = aset._be_fs[be_code].wFp[aset._be_fs[be_code].w_uid]['/0'].compression
    res_shuffle = aset._be_fs[be_code].wFp[aset._be_fs[be_code].w_uid]['/0'].shuffle
    assert res_compression == 'lzf'
    assert res_shuffle == cshuffleCode
    wco.commit('hi')
    wco.close()


@pytest.mark.parametrize('clevel', [1, 4, 8])
@pytest.mark.parametrize('cshuffle,cshuffleCode', [(False, False), (True, True)])
@pytest.mark.parametrize('be_code', ['00', '01'])
def test_arrayset_init_with_various_gzip_opts(repo, array5by7, clevel, cshuffle, cshuffleCode, be_code):

    opts = {
        'shuffle': cshuffle,
        'complib': 'gzip',
        'complevel': clevel,
    }
    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', prototype=array5by7, backend=be_code, backend_options=opts)
    assert aset.backend == be_code
    with aset as a:
        for i in range(10):
            a[i] = array5by7 + i

    res_compression = aset._be_fs[be_code].wFp[aset._be_fs[be_code].w_uid]['/0'].compression
    res_compression_opts = aset._be_fs[be_code].wFp[aset._be_fs[be_code].w_uid]['/0'].compression_opts
    res_shuffle = aset._be_fs[be_code].wFp[aset._be_fs[be_code].w_uid]['/0'].shuffle
    assert res_compression == 'gzip'
    assert res_shuffle == cshuffleCode
    assert res_compression_opts == clevel
    wco.commit('hi')
    wco.close()


@pytest.mark.parametrize('be_code', ['00', '01'])
def test_arrayset_overflows_collection_size_collection_count(be_code, repo, monkeypatch):
    if be_code == '00':
        from hangar.backends import hdf5_00
        monkeypatch.setattr(hdf5_00, 'COLLECTION_COUNT', 5)
        monkeypatch.setattr(hdf5_00, 'COLLECTION_SIZE', 10)
    elif be_code == '01':
        from hangar.backends import hdf5_01
        monkeypatch.setattr(hdf5_01, 'COLLECTION_COUNT', 5)
        monkeypatch.setattr(hdf5_01, 'COLLECTION_SIZE', 10)
    else:
        raise ValueError(f'be_code param "{be_code}" unknown.')

    wco = repo.checkout(write=True)
    proto = np.arange(50).astype(np.uint16)
    aset = wco.add_ndarray_column('aset', prototype=proto, backend=be_code)
    with aset as cm_aset:
        for i in range(500):
            proto[:] = i
            cm_aset[i] = proto
    assert aset._be_fs[be_code].hColsRemain == 4
    assert aset._be_fs[be_code].hMaxSize == 10
    wco.commit('hello')

    with aset as cm_aset:
        for i in range(500):
            proto[:] = i
            assert np.allclose(proto, cm_aset[i])
    wco.close()

    rco = repo.checkout()
    naset = rco.columns['aset']
    with naset as ncm_aset:
        for i in range(500):
            proto[:] = i
            assert np.allclose(proto, ncm_aset[i])
    rco.close()


================================================
FILE: tests/test_branching.py
================================================
import pytest


@pytest.mark.parametrize('name', [
    'dummy branch', 'origin/master', '\nmaster', '\\master', 'master\n'
    'master\r\n', 'master ', 1412, 'foo !', 'foo@', 'foo#', 'foo$', '(foo)',
    'VeryLongNameIsInvalidOver64CharactersNotAllowedVeryLongNameIsInva'])
def test_create_branch_fails_invalid_name(aset_samples_initialized_repo, name):
    repo = aset_samples_initialized_repo
    with pytest.raises(ValueError):
        repo.create_branch(name)


def test_list_branches_only_reports_master_upon_initialization(repo):
    branches = repo.list_branches()
    assert branches == ['master']


def test_cannot_create_new_branch_from_initialized_repo_with_no_commits(repo):
    with pytest.raises(RuntimeError):
        repo.create_branch('testbranch')


def test_can_create_new_branch_from_repo_with_one_commit(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    expected_digest = co.commit('first')
    co.close()

    branchRes = repo.create_branch('testbranch')

    assert branchRes.name == 'testbranch'
    assert branchRes.digest == expected_digest


def test_cannot_duplicate_branch_name(aset_samples_initialized_repo):
    aset_samples_initialized_repo.create_branch('testbranch')
    with pytest.raises(ValueError):
        aset_samples_initialized_repo.create_branch('testbranch')


def test_create_multiple_branches_different_name_same_commit(aset_samples_initialized_repo):
    b1 = aset_samples_initialized_repo.create_branch('testbranch1')
    b2 = aset_samples_initialized_repo.create_branch('testbranch2')
    b3 = aset_samples_initialized_repo.create_branch('testbranch3')

    assert b1.digest == b2.digest
    assert b2.digest == b3.digest
    assert b3.digest == b1.digest
    assert aset_samples_initialized_repo.list_branches() == ['master', 'testbranch1', 'testbranch2', 'testbranch3']


def test_create_branch_by_specifying_base_commit(repo):

    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first commit')
    first_digest = co.commit_hash
    co['test_meta']['foo'] = 'bar'
    second_digest = co.commit('second')
    co['test_meta']['hello'] = 'world'
    third_digest = co.commit('third')
    co['test_meta']['zen'] = 'python'
    fourth_digest = co.commit('fourth')
    co.close()

    assert repo.list_branches() == ['master']

    secBranch = repo.create_branch('dev-second', base_commit=second_digest)
    assert secBranch.name == 'dev-second'
    assert secBranch.digest == second_digest

    co = repo.checkout(branch='dev-second')
    assert len(co['test_meta']) == 1
    assert co['test_meta']['foo'] == 'bar'
    co.close()


def test_remove_branch_works_when_commits_align(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()
    repo.create_branch('testdelete')

    assert repo.list_branches() == ['master', 'testdelete']

    removedBranch = repo.remove_branch('testdelete')
    assert removedBranch.name == 'testdelete'
    assert removedBranch.digest == masterHEAD
    assert repo.list_branches() == ['master']


def test_delete_branch_raises_runtime_error_when_history_not_merged(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()

    repo.create_branch('testdelete')
    co = repo.checkout(write=True, branch='testdelete')
    co['test_meta']['hello'] = 'world'
    thirdDigest = co.commit('third')
    co.close()

    # checkout master so staging area is not on branch
    co = repo.checkout(write=True, branch='master')
    co.close()

    assert repo.list_branches() == ['master', 'testdelete']
    with pytest.raises(RuntimeError):
        repo.remove_branch('testdelete')


def test_delete_branch_completes_when_history_not_merged_but_force_option_set(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()

    repo.create_branch('testdelete')
    co = repo.checkout(write=True, branch='testdelete')
    co['test_meta']['hello'] = 'world'
    thirdDigest = co.commit('third')
    co.close()

    # checkout master so staging area is not on branch
    co = repo.checkout(write=True, branch='master')
    co.close()
    assert repo.list_branches() == ['master', 'testdelete']

    removedBranch = repo.remove_branch('testdelete', force_delete=True)
    assert removedBranch.name == 'testdelete'
    assert removedBranch.digest == thirdDigest
    assert repo.list_branches() == ['master']


def test_delete_branch_raises_value_error_if_invalid_branch_name(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()

    repo.create_branch('testdelete')
    co = repo.checkout(write=True, branch='testdelete')
    co['test_meta']['hello'] = 'world'
    thirdDigest = co.commit('third')
    co.close()

    assert repo.list_branches() == ['master', 'testdelete']
    with pytest.raises(ValueError):
        repo.remove_branch('doesnotexist')
    with pytest.raises(ValueError):
        repo.remove_branch('origin/master')


def test_delete_branch_raises_permission_error_if_writer_lock_held(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()

    repo.create_branch('testdelete')
    co = repo.checkout(write=True, branch='testdelete')
    co['test_meta']['hello'] = 'world'
    thirdDigest = co.commit('third')
    co.close()

    # checkout master so staging area is not on branch
    co = repo.checkout(write=True, branch='master')
    with pytest.raises(PermissionError):
        repo.remove_branch('testdelete')
    assert repo.list_branches() == ['master', 'testdelete']
    co.close()


def test_delete_branch_raises_permission_error_if_branch_requested_is_staging_head(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co.commit('first')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()

    repo.create_branch('testdelete')
    co = repo.checkout(write=True, branch='testdelete')
    co['test_meta']['hello'] = 'world'
    thirdDigest = co.commit('third')
    co.close()

    with pytest.raises(PermissionError):
        repo.remove_branch('testdelete')
    assert repo.list_branches() == ['master', 'testdelete']


def test_delete_branch_raises_permission_error_if_only_one_branch_left(repo):
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co['test_meta']['foo'] = 'bar'
    masterHEAD = co.commit('second')
    co.close()

    assert repo.list_branches() == ['master']
    with pytest.raises(PermissionError):
        repo.remove_branch('master')
    assert repo.list_branches() == ['master']


================================================
FILE: tests/test_checkout.py
================================================
import atexit
import numpy as np
import pytest
from conftest import fixed_shape_backend_params


class TestCheckout(object):

    def test_write_checkout_specifying_commit_not_allowed_if_commit_exists(self, aset_samples_initialized_repo):
        cmt_digest = aset_samples_initialized_repo.log(return_contents=True)['head']
        with pytest.raises(ValueError):
            aset_samples_initialized_repo.checkout(write=True, commit=cmt_digest)

    def test_write_checkout_specifying_commit_not_allowed_if_commit_does_not_exists(self, aset_samples_initialized_repo):
        cmt_digest = 'notrealcommit'
        with pytest.raises(ValueError):
            aset_samples_initialized_repo.checkout(write=True, commit=cmt_digest)

    def test_two_write_checkouts(self, repo):
        w1_checkout = repo.checkout(write=True)
        with pytest.raises(PermissionError):
            repo.checkout(write=True)
        w1_checkout.close()

    def test_two_read_checkouts(self, repo, array5by7):
        w_checkout = repo.checkout(write=True)
        arrayset_name = 'aset'
        r_ds = w_checkout.add_ndarray_column(name=arrayset_name, prototype=array5by7)
        r_ds['1'] = array5by7
        w_checkout.commit('init')
        r1_checkout = repo.checkout()
        r2_checkout = repo.checkout()
        assert np.allclose(r1_checkout.columns['aset']['1'], array5by7)
        assert np.allclose(
            r1_checkout.columns['aset']['1'], r2_checkout.columns['aset']['1'])
        r1_checkout.close()
        r2_checkout.close()
        w_checkout.close()

    def test_write_with_read_checkout(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout()
        with pytest.raises(AttributeError):
            co.add_ndarray_column(name='aset', shape=(5, 7), dtype=np.float64)
        with pytest.raises(AttributeError):
            co.add_str_column('test_meta')
        co.close()

    def test_writer_aset_obj_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        asets = co.columns
        aset = co.columns['writtenaset']
        co.close()

        with pytest.raises(PermissionError):
            asets.iswriteable
        with pytest.raises(PermissionError):
            shouldFail = asets['writtenaset']
        with pytest.raises(PermissionError):
            aset.iswriteable

    def test_writer_aset_obj_arrayset_iter_values_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        oldObjs = []
        for oldObj in co.columns.values():
            oldObjs.append(oldObj)
        co.close()

        for oldObj in oldObjs:
            with pytest.raises(PermissionError):
                oldObj.column

    def test_writer_aset_obj_arrayset_iter_items_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        oldObjs = {}
        for oldName, oldObj in co.columns.items():
            oldObjs[oldName] = oldObj
        co.close()

        for name, obj in oldObjs.items():
            assert isinstance(name, str)
            with pytest.raises(PermissionError):
                obj.column

    def test_writer_aset_obj_not_accessible_after_commit_and_close(self, aset_samples_initialized_repo, array5by7):
        repo = aset_samples_initialized_repo
        co = repo.checkout(write=True)
        asets = co.columns
        aset = co.columns['writtenaset']
        aset['1'] = array5by7
        co.commit('hey there')
        co.close()

        with pytest.raises(PermissionError):
            asets.iswriteable
        with pytest.raises(PermissionError):
            shouldFail = asets['writtenaset']
        with pytest.raises(PermissionError):
            aset.iswriteable
        with pytest.raises(PermissionError):
            shouldFail = aset['1']

    def test_reader_aset_obj_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=False)
        asets = co.columns
        aset = co.columns['writtenaset']
        co.close()

        with pytest.raises(PermissionError):
            asets.iswriteable
        with pytest.raises(PermissionError):
            shouldFail = asets['writtenaset']
        with pytest.raises(PermissionError):
            aset.iswriteable

    def test_reader_aset_obj_column_iter_values_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=False)
        oldObjs = []
        for oldObj in co.columns.values():
            oldObjs.append(oldObj)
        co.close()

        for oldObj in oldObjs:
            with pytest.raises(PermissionError):
                oldObj.column

    def test_reader_aset_obj_arrayset_iter_items_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=False)
        oldObjs = {}
        for oldName, oldObj in co.columns.items():
            oldObjs[oldName] = oldObj
        co.close()

        for name, obj in oldObjs.items():
            assert isinstance(name, str)
            with pytest.raises(PermissionError):
                obj.column

    def test_reader_arrayset_context_manager_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=False)
        aset = co.columns['writtenaset']
        klist = []
        with aset as ds:
            for k in ds.keys():
                klist.append(k)
                a = ds
        co.close()

        with pytest.raises(PermissionError):
            a.column
        with pytest.raises(PermissionError):
            ds.column
        with pytest.raises(PermissionError):
            aset[klist[0]]

    def test_writer_arrayset_context_manager_not_accessible_after_close(self, two_commit_filled_samples_repo):
        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        aset = co.columns['writtenaset']
        with aset as ds:
            # for k in ds.keys():
            #     klist.append(k)
            a = ds
            a['1232'] = np.random.randn(5, 7).astype(np.float32)
        co.close()

        with pytest.raises(PermissionError):
            a.column
        with pytest.raises(PermissionError):
            ds.column
        with pytest.raises(PermissionError):
            aset['1232']

    def test_close_read_does_not_invalidate_write_checkout(self, aset_samples_initialized_repo, array5by7):
        repo = aset_samples_initialized_repo
        r_co = repo.checkout(write=False)
        w_co = repo.checkout(write=True)
        r_co.close()

        with pytest.raises(PermissionError):
            shouldFail = r_co.columns

        aset = w_co.columns['writtenaset']
        aset['1'] = array5by7
        assert np.allclose(w_co.columns['writtenaset']['1'], array5by7)
        w_co.commit('hello commit')
        w_co.close()

        with pytest.raises(PermissionError):
            aset.column

    def test_close_write_does_not_invalidate_read_checkout(self, aset_samples_initialized_repo, array5by7):
        repo = aset_samples_initialized_repo
        r_co = repo.checkout(write=False)
        w_co = repo.checkout(write=True)

        aset = w_co.columns['writtenaset']
        aset['1'] = array5by7
        assert np.allclose(w_co.columns['writtenaset']['1'], array5by7)
        w_co.commit('hello commit')
        w_co.close()

        assert 'writtenaset' in r_co.columns
        with pytest.raises(PermissionError):
            aset.column
        r_co.close()
        with pytest.raises(PermissionError):
            r_co.columns

    def test_operate_on_arrayset_after_closing_old_checkout(self, repo, array5by7):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=array5by7)
        co.commit('this is a commit message')
        co.close()
        co = repo.checkout(write=True)
        with pytest.raises(PermissionError):
            aset['1'] = array5by7
            co.commit('this is a commit message')
        co.close()
        with pytest.raises(PermissionError):
            aset['1']

    def test_operate_on_closed_checkout(self, repo, array5by7):
        co = repo.checkout(write=True)
        co.add_ndarray_column('aset', prototype=array5by7)
        co.commit('this is a commit message')
        co.close()
        with pytest.raises(PermissionError):
            co.columns['aset']['1'] = array5by7

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_operate_on_arrayset_samples_after_commiting_but_not_closing_checkout(self, aset_backend, repo, array5by7):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=array5by7, backend=aset_backend)
        aset['1'] = array5by7
        co.commit('hi')

        aset['2'] = array5by7  # this raises Exception since the reference to aset i gon
        co.commit('hello 2')
        assert np.allclose(aset['2'], array5by7)
        co.close()

        with pytest.raises(PermissionError):
            aset.name

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    def test_operate_on_arraysets_after_commiting_but_not_closing_checkout(self, aset1_backend, aset2_backend, repo, array5by7):
        co = repo.checkout(write=True)
        asets = co.columns
        aset = co.add_ndarray_column('aset', prototype=array5by7, backend=aset1_backend)
        aset['1'] = array5by7
        co.commit('hi')

        aset2 = co.add_ndarray_column('arange', prototype=np.arange(50), backend=aset2_backend)
        aset2['0'] = np.arange(50)
        co.commit('hello 2')
        assert np.allclose(aset2['0'], np.arange(50))
        co.close()

        with pytest.raises(PermissionError):
            co.columns
        with pytest.raises(PermissionError):
            asets.iswriteable
        with pytest.raises(PermissionError):
            aset2.name

    def test_with_wrong_argument_value(self, repo):
        # It is intuitive to a user to pass branchname as positional
        # argument but hangar expect permission as first argument
        with pytest.raises(ValueError):
            repo.checkout('branchname')
        with pytest.raises(ValueError):
            repo.checkout(write='True')
        with pytest.raises(ValueError):
            repo.checkout(branch=True)
        co = repo.checkout(True)  # This should not raise any excpetion
        # unregister close operation as conftest will close env before this is called.
        atexit.unregister(co.close)


    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    def test_reset_staging_area_no_changes_made_does_not_work(self, aset1_backend, aset2_backend, repo, array5by7):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=array5by7, backend=aset1_backend)
        aset2 = co.add_ndarray_column('arange', prototype=np.arange(50), backend=aset2_backend)
        aset['1'] = array5by7
        aset2['0'] = np.arange(50)
        co.commit('hi')

        # verifications before reset
        assert np.allclose(aset2['0'], np.arange(50))
        assert len(co.columns) == 2
        assert co.columns['arange'].iswriteable

        with pytest.raises(RuntimeError, match='No changes made'):
            co.reset_staging_area()

        # verifications after reset
        assert np.allclose(aset2['0'], np.arange(50))
        assert len(co.columns) == 2
        assert co.columns['arange'].iswriteable
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    def test_reset_staging_area_clears_arraysets(self, aset1_backend, aset2_backend, repo, array5by7):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=array5by7, backend=aset1_backend)
        aset['1'] = array5by7
        co.commit('hi')

        aset2 = co.add_ndarray_column('arange', prototype=np.arange(50), backend=aset2_backend)
        aset2['0'] = np.arange(50)
        # verifications before reset
        assert np.allclose(aset2['0'], np.arange(50))
        assert len(co.columns) == 2
        assert co.columns['arange'].iswriteable

        co.reset_staging_area()
        # behavior expected after reset
        assert len(co.columns) == 1
        with pytest.raises(PermissionError):
            aset2['0']
        with pytest.raises(KeyError):
            co.columns['arange']
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_dunder_contains_method(self, repo_20_filled_samples, write):
        co = repo_20_filled_samples.checkout(write=write)
        assert 'writtenaset' in co
        assert 'second_aset' in co
        assert 'doesnotexist' not in co
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_dunder_len_method(self, repo_20_filled_samples, write):
        co = repo_20_filled_samples.checkout(write=write)
        assert len(co) == 2
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_dunder_iter_method(self, repo_20_filled_samples, write):
        from typing import Iterable
        co = repo_20_filled_samples.checkout(write=write)
        it = iter(co)
        assert isinstance(it, Iterable)
        icount = 0
        for k in it:
            assert k in ['writtenaset', 'second_aset']
            icount += 1
        assert icount == 2
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_keys_method(self, repo_20_filled_samples, write):
        co = repo_20_filled_samples.checkout(write=write)
        keys = list(co.keys())
        assert len(keys) == 2
        for k in ['writtenaset', 'second_aset']:
            assert k in keys
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_values_method(self, repo_20_filled_samples, write):
        from hangar.columns.layout_nested import NestedSampleWriter, NestedSampleReader
        from hangar.columns.layout_flat import FlatSampleWriter, FlatSampleReader
        possible_classes = (
            NestedSampleWriter, NestedSampleReader, FlatSampleReader, FlatSampleWriter)

        co = repo_20_filled_samples.checkout(write=write)
        icount = 0
        for col in co.values():
            assert isinstance(col, possible_classes)
            icount += 1
        assert icount == 2
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_items_method(self, repo_20_filled_samples, write):
        from hangar.columns.layout_nested import NestedSampleWriter, NestedSampleReader
        from hangar.columns.layout_flat import FlatSampleWriter, FlatSampleReader
        possible_classes = (
            NestedSampleWriter, NestedSampleReader, FlatSampleReader, FlatSampleWriter)

        co = repo_20_filled_samples.checkout(write=write)
        icount = 0
        for k, col in co.items():
            assert k in ['writtenaset', 'second_aset']
            assert isinstance(col, possible_classes)
            icount += 1
        assert icount == 2
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    def test_checkout_log_method(self, repo_20_filled_samples, write):
        repo_log = repo_20_filled_samples.log(return_contents=True)
        co = repo_20_filled_samples.checkout(write=write)
        co_log = co.log(return_contents=True)
        co.close()
        assert repo_log == co_log


class TestBranchingMergingInCheckout(object):

    def test_merge(self, aset_samples_initialized_repo, array5by7):
        branch = aset_samples_initialized_repo.create_branch('testbranch')
        assert isinstance(branch.name, str)
        assert isinstance(branch.digest, str)
        co = aset_samples_initialized_repo.checkout(write=True, branch=branch.name)
        assert co._branch_name == branch.name
        co.add_str_column('test_meta')
        co.columns['writtenaset']['1'] = array5by7
        co['test_meta'].update({'a': 'b'})
        co.commit('this is a commit message')
        co.close()
        aset_samples_initialized_repo.merge('test merge', 'master', branch.name)
        co = aset_samples_initialized_repo.checkout()
        assert (co.columns['writtenaset']['1'] == array5by7).all()
        assert co['test_meta'].get('a') == 'b'
        co.close()

    def test_merge_without_closing_previous_checkout(self, aset_samples_initialized_repo, array5by7):
        branch = aset_samples_initialized_repo.create_branch('testbranch')
        co = aset_samples_initialized_repo.checkout(write=True, branch=branch.name)
        co.columns['writtenaset']['1'] = array5by7
        co.commit('this is a commit message')
        with pytest.raises(PermissionError):
            aset_samples_initialized_repo.merge('test merge', 'master', branch.name)
        # unregister close operation as conftest will close env before this is called.
        atexit.unregister(co.close)

    def test_merge_multiple_checkouts_same_aset(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.add_str_column('test_meta')
        co.commit('test meta commit')
        co.close()
        branch1 = aset_samples_initialized_repo.create_branch('testbranch1')
        co = aset_samples_initialized_repo.checkout(write=True, branch=branch1.name)
        co.columns['writtenaset']['1'] = array5by7
        co['test_meta'].update({'a1': 'b1'})
        co.commit('this is a commit message')
        co.close()

        branch2 = aset_samples_initialized_repo.create_branch('testbranch2')
        co = aset_samples_initialized_repo.checkout(write=True, branch=branch2.name)
        co.columns['writtenaset']['2'] = array5by7
        co['test_meta'].update({'a2': 'b2'})
        co.commit('this is a commit message')
        co.close()

        aset_samples_initialized_repo.merge('test merge 1', 'master', branch1.name)
        aset_samples_initialized_repo.merge('test merge 2', 'master', branch2.name)

        co = aset_samples_initialized_repo.checkout(branch='master')
        assert len(co.columns) == 2
        assert len(co.columns['writtenaset']) == 2
        assert list(co['test_meta'].keys()) == ['a1', 'a2']
        co.close()

    def test_merge_multiple_checkouts_multiple_aset(self, aset_samples_initialized_repo, array5by7):
        branch1 = aset_samples_initialized_repo.create_branch('testbranch1')
        co = aset_samples_initialized_repo.checkout(write=True, branch=branch1.name)
        co.columns['writtenaset']['1'] = array5by7
        co.commit('this is a commit message')
        co.close()

        branch2 = aset_samples_initialized_repo.create_branch('testbranch2')
        co = aset_samples_initialized_repo.checkout(write=True, branch=branch2.name)
        second_aset = co.add_ndarray_column(name='second_aset', prototype=array5by7)
        second_aset['1'] = array5by7
        co.commit('this is a commit message')
        co.close()

        aset_samples_initialized_repo.merge('test merge 1', 'master', branch1.name)
        aset_samples_initialized_repo.merge('test merge 2', 'master', branch2.name)

        co = aset_samples_initialized_repo.checkout(branch='master')
        assert len(co.columns) == 2
        assert len(co.columns['writtenaset']) == 1
        assert len(co.columns['second_aset']) == 1
        co.close()

    def test_merge_diverged_conflict(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.add_str_column('test_meta')
        co.commit('test meta commit')
        co.close()
        branch1 = aset_samples_initialized_repo.create_branch('testbranch1')
        branch2 = aset_samples_initialized_repo.create_branch('testbranch2')

        co = aset_samples_initialized_repo.checkout(write=True, branch=branch1.name)
        co.columns['writtenaset']['1'] = array5by7
        co['test_meta'].update({'a': 'b'})
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True, branch=branch2.name)
        newarray = np.zeros_like(array5by7)
        co.columns['writtenaset']['1'] = newarray
        co['test_meta'].update({'a': 'c'})
        co.commit('this is a commit message')
        co.close()

        aset_samples_initialized_repo.merge('commit message', 'master', branch1.name)

        with pytest.raises(ValueError):
            aset_samples_initialized_repo.merge('commit message', 'master', branch2.name)

    def test_new_branch_from_where(self, aset_samples_initialized_repo, array5by7):
        branch1 = aset_samples_initialized_repo.create_branch('testbranch1')
        branch2 = aset_samples_initialized_repo.create_branch('testbranch2')
        co1 = aset_samples_initialized_repo.checkout(write=True, branch=branch1.name)
        h1 = aset_samples_initialized_repo.log(branch=co1.branch_name, return_contents=True)
        co1.close()

        co2 = aset_samples_initialized_repo.checkout(write=True, branch=branch2.name)
        co2.add_ndarray_column('aset2', prototype=array5by7)
        co2.columns['aset2']['2'] = array5by7
        co2.commit('this is a merge message')
        co2.close()
        h2 = aset_samples_initialized_repo.log(branch=branch2.name, return_contents=True)

        branch3 = aset_samples_initialized_repo.create_branch('testbranch3')
        co3 = aset_samples_initialized_repo.checkout(write=True, branch=branch3.name)
        h3 = aset_samples_initialized_repo.log(branch=co3.branch_name, return_contents=True)
        co3.close()

        assert h2['head'] == h3['head']
        assert h2['ancestors'][h2['head']] == h3['ancestors'][h3['head']]
        assert h1['head'] in h2['ancestors'][h2['head']]

    def test_cannot_checkout_branch_with_staged_changes(self, aset_samples_initialized_repo, array5by7):
        branch1 = aset_samples_initialized_repo.create_branch('testbranch1')
        branch2 = aset_samples_initialized_repo.create_branch('testbranch2')
        co1 = aset_samples_initialized_repo.checkout(write=True, branch=branch1.name)
        initial_cmt = co1.commit_hash
        co1.add_ndarray_column('aset2', prototype=array5by7)
        co1.columns['aset2']['2'] = array5by7
        co1.close()

        with pytest.raises(ValueError):
            con = aset_samples_initialized_repo.checkout(write=True, branch=branch2.name)

        co1 = aset_samples_initialized_repo.checkout(write=True, branch=branch1.name)
        co1.commit('hi')
        assert co1.commit_hash != initial_cmt
        assert co1.branch_name == branch1.name
        co1.close()

        co2 = aset_samples_initialized_repo.checkout(write=True, branch=branch2.name)
        assert co2.branch_name == branch2.name
        assert co2.commit_hash == initial_cmt
        co2.close()


def test_full_from_short_commit_digest(two_commit_filled_samples_repo):
    from hangar.records.commiting import expand_short_commit_digest

    repo = two_commit_filled_samples_repo
    history = repo.log(branch='master', return_contents=True)
    commits = history['order']
    for full_cmt in commits:
        short_cmt = full_cmt[:18]
        found_cmt = expand_short_commit_digest(repo._env.refenv, short_cmt)
        assert found_cmt == full_cmt

    with pytest.raises(KeyError, match='No matching commit hash found starting with'):
        expand_short_commit_digest(repo._env.refenv, 'zzzzzzzzzzzzzzzzzzzzzzzzzzzz')


def test_writer_context_manager_objects_are_gc_removed_after_co_close(two_commit_filled_samples_repo):

    repo = two_commit_filled_samples_repo
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    with co['test_meta'] as m:
        m['aa'] = 'bb'
        cmt1 = co.commit('here is the first commit')
        with co.columns['writtenaset'] as d:
            d['2422'] = d['0'] + 213
            cmt2 = co.commit('here is the second commit')

    assert co.close() is None
    with pytest.raises(PermissionError):
        _ = m.__dict__
    with pytest.raises(PermissionError):
        _ = d.column
    with pytest.raises(PermissionError):
        _ = co.columns
    assert co.__dict__ == {}

    co = repo.checkout(commit=cmt1)
    assert 'aa' in co['test_meta']
    assert co['test_meta']['aa'] == 'bb'
    co.close()

    co = repo.checkout(commit=cmt2)
    assert 'aa' in co['test_meta']
    assert co['test_meta']['aa'] == 'bb'
    assert '2422' in co.columns['writtenaset']
    assert np.allclose(co.columns['writtenaset']['2422'],
                       co.columns['writtenaset']['0'] + 213)
    co.close()


def test_reader_context_manager_objects_are_gc_removed_after_co_close(two_commit_filled_samples_repo):

    repo = two_commit_filled_samples_repo
    co = repo.checkout(write=False)
    with co.columns['writtenaset'] as d:
        ds = d['2']

    assert d.iswriteable is False
    assert np.allclose(ds, d.get('2'))
    assert np.allclose(ds, co.columns['writtenaset'].get('2'))

    assert co.close() is None

    with pytest.raises(PermissionError):
        d.column
    with pytest.raises(AttributeError):
        co._columns
    with pytest.raises(PermissionError):
        str(co.columns.get('writtenaset'))
    with pytest.raises(PermissionError):
        co.columns
    with pytest.raises(PermissionError):
        repr(co)
    assert co.__dict__ == {}


def test_checkout_branch_not_existing_does_not_hold_writer_lock(two_commit_filled_samples_repo):
    repo = two_commit_filled_samples_repo
    assert 'doesnotexist' not in repo.list_branches()
    assert repo.writer_lock_held is False
    with pytest.raises(ValueError):
        co = repo.checkout(write=True, branch='doesnotexist')
    assert repo.writer_lock_held is False
    with pytest.raises(NameError):
        co.branch_name  # should not even exist


================================================
FILE: tests/test_checkout_arrayset_access.py
================================================
import pytest
import numpy as np


# -------------------------- Reader Checkout ----------------------------------


@pytest.mark.parametrize('write', [True, False])
def test_arrayset_getattr_does_not_raise_permission_error_if_alive(write, aset_samples_initialized_repo):
    co = aset_samples_initialized_repo.checkout(write=write)
    asets = co.columns

    assert hasattr(asets, 'doesnotexist') is False  # does not raise error
    assert hasattr(asets, '_mode') is True
    with pytest.raises(AttributeError):
        assert getattr(asets, 'doesnotexist')
    assert getattr(asets, '_mode') == 'a' if write else 'r'

    co.close()
    with pytest.raises(PermissionError):
        hasattr(asets, 'doesnotexist')
    with pytest.raises(PermissionError):
        hasattr(asets, '_mode')


def test_write_in_context_manager_no_loop(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    with wco:
        assert wco._is_conman is True
        wco['writtenaset']['0'] = array5by7
        wco['newaset']['0'] = array10
    assert wco._is_conman is False

    assert np.allclose(array5by7, wco.columns['writtenaset']['0'])
    assert np.allclose(array10, wco.columns['newaset']['0'])
    wco.commit('init')
    assert np.allclose(array5by7, wco.columns['writtenaset']['0'])
    assert np.allclose(array10, wco.columns['newaset']['0'])
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    assert np.allclose(array5by7, rco.columns['writtenaset']['0'])
    assert np.allclose(array10, rco.columns['newaset']['0'])
    rco.close()


def test_write_in_context_manager_many_samples_looping(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    with wco:
        assert wco._is_conman is True
        for idx in range(100):
            array10[:] = idx
            array5by7[:] = idx
            wco['writtenaset'][idx] = array5by7
            wco['newaset'][idx] = array10
    assert wco._is_conman is False

    for idx in range(100):
        array10[:] = idx
        array5by7[:] = idx
        assert np.allclose(array5by7, wco.columns['writtenaset'][idx])
        assert np.allclose(array10, wco.columns['newaset'][idx])
    wco.commit('init')
    for idx in range(100):
        array10[:] = idx
        array5by7[:] = idx
        assert np.allclose(array5by7, wco.columns['writtenaset'][idx])
        assert np.allclose(array10, wco.columns['newaset'][idx])
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    for idx in range(100):
        array10[:] = idx
        array5by7[:] = idx
        assert np.allclose(array5by7, rco.columns['writtenaset'][idx])
        assert np.allclose(array10, rco.columns['newaset'][idx])
    rco.close()


def test_write_fails_if_checkout_closed(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)
    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    wco['writtenaset'][0] = array5by7
    wco['newaset'][0] = array10
    wco.close()
    with pytest.raises((PermissionError, UnboundLocalError)):
        wco['writtenaset'][1] = array5by7
        wco['newaset'][1] = array10

    wco2 = aset_samples_initialized_repo.checkout(write=True)
    assert 0 in wco2.columns['writtenaset']
    assert 0 in wco2.columns['newaset']
    assert 1 not in wco2.columns['writtenaset']
    assert 1 not in wco2.columns['newaset']
    wco2.close()


def test_write_context_manager_fails_if_checkout_closed(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)
    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    wco['writtenaset'][0] = array5by7
    wco['newaset'][0] = array10
    wco.close()
    with pytest.raises(PermissionError):
        with wco:
            wco['writtenaset'][1] = array5by7
    with pytest.raises(PermissionError):
        with wco:
            wco['newaset'][1] = array10

    wco2 = aset_samples_initialized_repo.checkout(write=True)
    assert 0 in wco2.columns['writtenaset']
    assert 0 in wco2.columns['newaset']
    assert 1 not in wco2.columns['writtenaset']
    assert 1 not in wco2.columns['newaset']
    wco2.close()


def test_writer_co_read_single_aset_single_sample(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    assert np.allclose(wco['writtenaset', 0], array5by7)
    assert np.allclose(wco['writtenaset', 1], array5by7 + 1)
    assert np.allclose(wco['writtenaset', 2], array5by7 + 2)
    wco.close()


def test_writer_co_read_single_aset_multiple_samples(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    res = wco[('writtenaset', 0), ('writtenaset', 1), ('writtenaset', 2)]
    assert np.allclose(res[0], array5by7)
    assert np.allclose(res[1], array5by7 + 1)
    assert np.allclose(res[2], array5by7 + 2)
    wco.close()


def test_writer_co_read_multiple_aset_single_samples(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    array10[:] = 0
    wco.columns['newaset'][0] = array10
    wco.columns['newaset'][1] = array10 + 1
    wco.columns['newaset'][2] = array10 + 2

    res = wco[('writtenaset', 0), ('newaset', 0)]
    assert np.allclose(res[0], array5by7)
    assert np.allclose(res[1], array10)
    res = wco[('writtenaset', 1), ('newaset', 1)]
    assert np.allclose(res[0], array5by7 + 1)
    assert np.allclose(res[1], array10 + 1)
    wco.close()


def test_writer_co_read_multtiple_aset_multiple_samples(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    array10[:] = 0
    wco.columns['newaset'][0] = array10
    wco.columns['newaset'][1] = array10 + 1
    wco.columns['newaset'][2] = array10 + 2

    res = wco[('writtenaset', 0), ('newaset', 0), ('writtenaset', 1), ('newaset', 1)]
    assert isinstance(res, list)
    assert len(res) == 4
    assert np.allclose(res[0], array5by7)
    assert np.allclose(res[1], array10)
    assert np.allclose(res[2], array5by7 + 1)
    assert np.allclose(res[3], array10 + 1)
    wco.close()


def test_writer_co_read_fails_nonexistant_aset_name(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    with pytest.raises(KeyError):
        _ = wco['doesnotexist', 0]
    wco.close()


def test_writer_co_read_fails_nonexistant_sample_name(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    with pytest.raises(KeyError):
        _ = wco['doesnotexist', 124]
    wco.close()


def test_writer_co_get_returns_none_on_nonexistant_sample_name(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    out = wco.get(('writtenaset', 124))
    assert out is None
    wco.close()


def test_writer_co_read_in_context_manager_no_loop(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    wco['writtenaset']['0'] = array5by7
    wco['newaset']['0'] = array10
    with wco:
        assert wco._is_conman is True
        assert np.allclose(wco['writtenaset', '0'], array5by7)
    wco.close()


def test_writer_co_read_in_context_manager_many_samples_looping(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    with wco:
        for idx in range(100):
            array10[:] = idx
            array5by7[:] = idx
            wco['writtenaset'][idx] = array5by7
            wco['newaset'][idx] = array10

    with wco:
        waset_keys = [('writtenaset', i) for i in range(100)]
        naset_keys = [('newaset', i) for i in range(100)]
        writtenasetOut = wco[waset_keys]
        newasetOut = wco[naset_keys]
        for idx in range(100):
            array10[:] = idx
            array5by7[:] = idx
            assert np.allclose(array5by7, wco['writtenaset', idx])
            assert np.allclose(array10, wco['newaset', idx])

            o = wco[('writtenaset', idx), ('newaset', idx)]
            assert np.allclose(o[0], array5by7)
            assert np.allclose(o[1], array10)

            assert np.allclose(writtenasetOut[idx], array5by7)
            assert np.allclose(newasetOut[idx], array10)
    wco.close()


@pytest.mark.parametrize('write', [True, False])
def test_co_read_dunder_getitem_excepts_missing_sample(aset_samples_initialized_repo, write):
    co = aset_samples_initialized_repo.checkout(write=write)
    with pytest.raises(KeyError):
        res = co['writtenaset', 0]
    co.close()


@pytest.mark.parametrize('write', [True, False])
def test_co_read_get_except_missing_true_excepts_missing_sample(aset_samples_initialized_repo, write):
    co = aset_samples_initialized_repo.checkout(write=write)
    with pytest.raises(KeyError):
        res = co.get(('writtenaset', 0), except_missing=True)
    co.close()


@pytest.mark.parametrize('write', [True, False])
def test_co_read_get_except_missing_false_returns_none_on_missing_sample(aset_samples_initialized_repo, write):
    co = aset_samples_initialized_repo.checkout(write=write)
    res_1 = co.get(('writtenaset', 0))
    assert res_1 is None
    res_2 = co.get(('writtenaset', 0), except_missing=False)
    assert res_2 is None
    co.close()


def test_writer_co_aset_finds_connection_manager_of_any_aset_in_cm(aset_samples_initialized_repo):
    wco = aset_samples_initialized_repo.checkout(write=True)
    wco.add_ndarray_column('second', shape=(20,), dtype=np.uint8)
    asets = wco.columns

    with wco.columns['second'] as second_aset:
        assert wco.columns['second']._is_conman is True
        assert second_aset._is_conman is True
        assert asets._any_is_conman() is True

    with wco.columns['writtenaset'] as written_aset:
        assert wco.columns['writtenaset']._is_conman is True
        assert written_aset._is_conman is True
        assert asets._any_is_conman() is True

    assert wco.columns['writtenaset']._is_conman is False
    assert wco.columns['second']._is_conman is False
    assert asets._any_is_conman() is False
    wco.close()


def test_writer_co_aset_cm_not_allow_remove_aset(aset_samples_initialized_repo, array5by7):

    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    asets = wco.columns
    with asets as cm_asets:
        with pytest.raises(PermissionError):
            cm_asets.delete('writtenaset')
        with pytest.raises(PermissionError):
            asets.delete('writtenaset')
        with pytest.raises(PermissionError):
            wco.columns.delete('writtenaset')

        with pytest.raises(PermissionError):
            del cm_asets['writtenaset']
        with pytest.raises(PermissionError):
            del asets['writtenaset']
        with pytest.raises(PermissionError):
            del wco.columns['writtenaset']

    assert len(wco['writtenaset']) == 3
    assert np.allclose(wco['writtenaset', 0], array5by7)
    assert np.allclose(wco['writtenaset', 1], array5by7 + 1)
    assert np.allclose(wco['writtenaset', 2], array5by7 + 2)
    wco.close()


def test_writer_co_column_instance_cm_not_allow_any_column_removal(repo_20_filled_samples):

    wco = repo_20_filled_samples.checkout(write=True)
    columns = wco.columns
    writtenaset = wco.columns['writtenaset']
    second_aset = wco.columns['second_aset']

    with second_aset:
        with pytest.raises(PermissionError):
            columns.delete('writtenaset')
        with pytest.raises(PermissionError):
            columns.delete('second_aset')
        with pytest.raises(PermissionError):
            wco.columns.delete('writtenaset')
        with pytest.raises(PermissionError):
            wco.columns.delete('second_aset')
        with pytest.raises(PermissionError):
            del columns['writtenaset']
        with pytest.raises(PermissionError):
            del columns['second_aset']
        with pytest.raises(PermissionError):
            del wco.columns['second_aset']
        with pytest.raises(PermissionError):
            del wco.columns['written_aset']

    with writtenaset:
        with pytest.raises(PermissionError):
            columns.delete('writtenaset')
        with pytest.raises(PermissionError):
            columns.delete('second_aset')
        with pytest.raises(PermissionError):
            wco.columns.delete('writtenaset')
        with pytest.raises(PermissionError):
            wco.columns.delete('second_aset')
        with pytest.raises(PermissionError):
            del columns['writtenaset']
        with pytest.raises(PermissionError):
            del columns['second_aset']
        with pytest.raises(PermissionError):
            del wco.columns['second_aset']
        with pytest.raises(PermissionError):
            del wco.columns['written_aset']

    with columns:
        with pytest.raises(PermissionError):
            columns.delete('writtenaset')
        with pytest.raises(PermissionError):
            columns.delete('second_aset')
        with pytest.raises(PermissionError):
            wco.columns.delete('writtenaset')
        with pytest.raises(PermissionError):
            wco.columns.delete('second_aset')
        with pytest.raises(PermissionError):
            del columns['writtenaset']
        with pytest.raises(PermissionError):
            del columns['second_aset']
        with pytest.raises(PermissionError):
            del wco.columns['second_aset']
        with pytest.raises(PermissionError):
            del wco.columns['written_aset']

    wco.close()


def test_writer_co_aset_removes_all_samples_and_arrayset_still_exists(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)
    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2
    assert len(wco.columns) == 1
    assert len(wco.columns['writtenaset']) == 3

    with wco.columns['writtenaset'] as wset:
        del wset[0]
        del wset[1]
        del wset[2]
        # Removed all samples, now the aset's gone
        assert len(wset) == 0
        assert len(wco.columns) == 1
    assert len(wco.columns) == 1

    del wco.columns['writtenaset']

    assert len(wco.columns) == 0
    with pytest.raises(KeyError):
        len(wco.columns['writtenaset'])
    wco.close()


# -------------------------- Reader Checkout ----------------------------------


def test_reader_co_read_single_aset_single_sample(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    assert np.allclose(rco['writtenaset', 0], array5by7)
    assert np.allclose(rco['writtenaset', 1], array5by7 + 1)
    assert np.allclose(rco['writtenaset', 2], array5by7 + 2)
    rco.close()


def test_reader_co_read_single_aset_multiple_samples(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    res = rco[('writtenaset', 0), ('writtenaset', 1), ('writtenaset', 2)]
    assert np.allclose(res[0], array5by7)
    assert np.allclose(res[1], array5by7 + 1)
    assert np.allclose(res[2], array5by7 + 2)
    rco.close()


def test_reader_co_read_multiple_aset_single_samples(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    array10[:] = 0
    wco.columns['newaset'][0] = array10
    wco.columns['newaset'][1] = array10 + 1
    wco.columns['newaset'][2] = array10 + 2
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    res = rco[('writtenaset', 0), ('newaset', 0)]
    assert np.allclose(res[0], array5by7)
    assert np.allclose(res[1], array10)
    res = rco[('writtenaset', 1), ('newaset', 1)]
    assert np.allclose(res[0], array5by7 + 1)
    assert np.allclose(res[1], array10 + 1)
    rco.close()


def test_reader_co_read_multtiple_aset_multiple_samples(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.columns['writtenaset'][1] = array5by7 + 1
    wco.columns['writtenaset'][2] = array5by7 + 2

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    array10[:] = 0
    wco.columns['newaset'][0] = array10
    wco.columns['newaset'][1] = array10 + 1
    wco.columns['newaset'][2] = array10 + 2
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    res = rco[('writtenaset', 0), ('newaset', 0), ('writtenaset', 1), ('newaset', 1)]
    assert isinstance(res, list)
    assert len(res) == 4
    assert np.allclose(res[0], array5by7)
    assert np.allclose(res[1], array10)
    assert np.allclose(res[2], array5by7 + 1)
    assert np.allclose(res[3], array10 + 1)
    rco.close()


def test_reader_co_read_fails_nonexistant_aset_name(aset_samples_initialized_repo, array5by7):
    rco = aset_samples_initialized_repo.checkout()
    with pytest.raises(KeyError):
        _ = rco['doesnotexist', 0]
    rco.close()


def test_reader_co_read_fails_nonexistant_sample_name(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)
    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    with pytest.raises(KeyError):
        _ = rco['doesnotexist', 124]
    rco.close()


def test_reader_co_get_read_returns_none_nonexistant_sample_name(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)
    array5by7[:] = 0
    wco.columns['writtenaset'][0] = array5by7
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    out = rco.get(('writtenaset', 124))
    assert out is None
    rco.close()


def test_reader_co_read_in_context_manager_no_loop(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    wco['writtenaset']['0'] = array5by7
    wco['newaset']['0'] = array10
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    with rco:
        assert rco._is_conman is True
        assert np.allclose(rco['writtenaset', '0'], array5by7)
    rco.close()


def test_reader_co_read_in_context_manager_many_samples_looping(aset_samples_initialized_repo, array5by7):
    wco = aset_samples_initialized_repo.checkout(write=True)

    array10 = np.arange(10, dtype=np.float32)
    wco.add_ndarray_column('newaset', prototype=array10)
    with wco:
        for idx in range(100):
            array10[:] = idx
            array5by7[:] = idx
            wco['writtenaset'][idx] = array5by7
            wco['newaset'][idx] = array10
    wco.commit('first')
    wco.close()

    rco = aset_samples_initialized_repo.checkout()
    with rco:
        waset_keys = [('writtenaset', i) for i in range(100)]
        naset_keys = [('newaset', i) for i in range(100)]
        writtenasetOut = rco[waset_keys]
        newasetOut = rco[naset_keys]
        for idx in range(100):
            array10[:] = idx
            array5by7[:] = idx
            assert np.allclose(array5by7, rco['writtenaset', idx])
            assert np.allclose(array10, rco['newaset', idx])

            o = rco[('writtenaset', idx), ('newaset', idx)]
            assert np.allclose(o[0], array5by7)
            assert np.allclose(o[1], array10)
            assert np.allclose(writtenasetOut[idx], array5by7)
            assert np.allclose(newasetOut[idx], array10)
    rco.close()


================================================
FILE: tests/test_cli.py
================================================
from os import getcwd
import os
from pathlib import Path

import numpy as np
import pytest
from click.testing import CliRunner

from hangar import Repository
from hangar.cli import cli
from hangar.external import PluginManager
from conftest import fixed_shape_backend_params


# -------------------------------- test data ----------------------------------


help_res = 'Usage: main [OPTIONS] COMMAND [ARGS]...\n'\
           '\n'\
           'Options:\n'\
           '  --version  display current Hangar Version\n'\
           '  --help     Show this message and exit.\n'\
           '\n'\
           'Commands:\n'\
           '  branch       Operate on and list branch pointers.\n'\
           '  checkout     Checkout writer head branch at BRANCHNAME.\n'\
           '  clone        Initialize a repository at the current path and fetch updated...\n'\
           '  column       Operations for working with columns in the writer checkout.\n'\
           '  commit       Commits outstanding changes.\n'\
           '  diff         Display diff of DEV commit/branch to MASTER commit/branch.\n'\
           '  export       Export COLUMN sample data as it existed a STARTPOINT to some...\n'\
           '  fetch        Retrieve the commit history from REMOTE for BRANCH.\n'\
           '  fetch-data   Get data from REMOTE referenced by STARTPOINT (short-commit or...\n'\
           '  import       Import file or directory of files at PATH to COLUMN in the...\n'\
           '  init         Initialize an empty repository at the current path.\n'\
           '  log          Display commit graph starting at STARTPOINT (short-digest or...\n'\
           '  push         Upload local BRANCH commit history / data to REMOTE server.\n'\
           '  remote       Operations for working with remote server references\n'\
           '  server       Start a hangar server, initializing one if does not exist.\n'\
           '  status       Display changes made in the staging area compared to its base...\n'\
           '  summary      Display content summary at STARTPOINT (short-digest or branch).\n'\
           '  view         Use a plugin to view the data of some SAMPLE in COLUMN at...\n'\
           '  writer-lock  Determine if the writer lock is held for a repository.\n'


# ------------------------------- begin tests ---------------------------------


def test_help_option():
    runner = CliRunner()
    with runner.isolated_filesystem():
        res = runner.invoke(cli.main, ['--help'], terminal_width=80)
        assert res.exit_code == 0
        assert res.stdout == help_res


def test_help_no_args_option():
    runner = CliRunner()
    with runner.isolated_filesystem():
        res = runner.invoke(cli.main, terminal_width=80)
        assert res.exit_code == 0
        assert res.stdout == help_res


def test_version_long_option():
    import hangar
    runner = CliRunner()
    with runner.isolated_filesystem():
        res = runner.invoke(cli.main, ['--version'])
        assert res.exit_code == 0
        assert res.stdout == f'main, version {hangar.__version__}\n'


def test_init_repo(managed_tmpdir):
    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        try:
            repo = Repository(P, exists=False)
            res = runner.invoke(cli.init, ['--name', 'test', '--email', 'test@foo.com'], obj=repo)
            assert res.exit_code == 0
            assert repo._Repository__verify_repo_initialized() is None
        finally:
            repo._env._close_environments()


def test_writer_lock_is_held_check(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(cli.writer_lock_held, obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Writer lock is available.\n'
    co = repo_20_filled_samples2.checkout(write=True)
    res = runner.invoke(cli.writer_lock_held, obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Writer lock is held.\n'
    co.close()


def test_writer_lock_force_release(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(cli.writer_lock_held, ['--force-release'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Success force release of writer lock.\n'
    co = repo_20_filled_samples2.checkout(write=True)
    res = runner.invoke(cli.writer_lock_held, ['--force-release'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Success force release of writer lock.\n'
    assert repo_20_filled_samples2.writer_lock_held is False
    nco = repo_20_filled_samples2.checkout(write=True)
    with pytest.raises(PermissionError):
        print(co.columns)
    nco.close()


def test_checkout_writer_branch_works(repo_20_filled_samples2):
    from hangar.records.heads import get_staging_branch_head
    repo_20_filled_samples2.create_branch('dev')
    runner = CliRunner()
    res = runner.invoke(cli.checkout, ['dev'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Writer checkout head set to branch: dev\n'
    recorded_branch = get_staging_branch_head(repo_20_filled_samples2._env.branchenv)
    assert recorded_branch == 'dev'
    assert repo_20_filled_samples2.writer_lock_held is False


def test_checkout_writer_branch_nonexistant_branch_errors(repo_20_filled_samples2):
    from hangar.records.heads import get_staging_branch_head
    runner = CliRunner()
    res = runner.invoke(cli.checkout, ['doesnotexist'], obj=repo_20_filled_samples2)
    assert res.exit_code == 1
    assert res.stdout == 'Error: branch with name: doesnotexist does not exist. cannot get head.\n'
    recorded_branch = get_staging_branch_head(repo_20_filled_samples2._env.branchenv)
    assert recorded_branch == 'master'
    assert repo_20_filled_samples2.writer_lock_held is False


def test_checkout_writer_branch_lock_held_errors(repo_20_filled_samples2):
    from hangar.records.heads import get_staging_branch_head
    repo_20_filled_samples2.create_branch('testbranch')
    co = repo_20_filled_samples2.checkout(write=True, branch='master')
    try:
        runner = CliRunner()
        res = runner.invoke(cli.checkout, ['testbranch'], obj=repo_20_filled_samples2)
        assert res.exit_code == 1
        msg = res.stdout
        assert msg.startswith('Error: Cannot acquire the writer lock.') is True
        recorded_branch = get_staging_branch_head(repo_20_filled_samples2._env.branchenv)
        assert recorded_branch == 'master'
        assert repo_20_filled_samples2.writer_lock_held is True
        assert co.branch_name == 'master'
    finally:
        co.close()
    assert repo_20_filled_samples2.writer_lock_held is False


def test_diff_command(repo_2_br_no_conf):
    runner = CliRunner()
    res = runner.invoke(cli.diff, ['master', 'testbranch'], obj=repo_2_br_no_conf)
    assert res.exit_code == 0


def test_commit_cli_message(repo_20_filled_samples2):
    co = repo_20_filled_samples2.checkout(write=True)
    co.add_str_column('test_meta')
    base_digest = co.commit_hash
    base_branch = co.branch_name
    co.close()
    assert base_branch == 'master'

    runner = CliRunner()
    res = runner.invoke(cli.commit, ['-m', 'this is my commit message'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    out = res.stdout
    assert out.startswith('Commit message:\nthis is my commit message\nCommit Successful') is True
    new_digest = out.split(' ')[-1].rstrip('\n')
    assert new_digest != base_digest

    nco = repo_20_filled_samples2.checkout(write=True)
    try:
        assert nco.commit_hash == new_digest
        assert nco.branch_name == base_branch
    finally:
        nco.close()


def test_commit_cli_message_with_no_changes(repo_20_filled_samples2):
    co = repo_20_filled_samples2.checkout(write=True)
    base_digest = co.commit_hash
    base_branch = co.branch_name
    co.close()
    assert base_branch == 'master'

    runner = CliRunner()
    res = runner.invoke(cli.commit, ['-m', 'this is my commit message'], obj=repo_20_filled_samples2)
    assert res.exit_code == 1
    assert res.stdout.endswith('Error: No changes made in staging area. Cannot commit.\n')

    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert co.branch_name == base_branch
        assert co.commit_hash == base_digest
    finally:
        co.close()


def substitute_editor_commit_message(hint):
    return 'this is my commit message\n' + hint


def test_commit_editor_message(monkeypatch, repo_20_filled_samples2):
    import click
    monkeypatch.setattr(click, 'edit', substitute_editor_commit_message)

    co = repo_20_filled_samples2.checkout(write=True)
    co.add_str_column('test_meta')
    base_digest = co.commit_hash
    base_branch = co.branch_name
    co.close()
    assert base_branch == 'master'

    runner = CliRunner()
    res = runner.invoke(cli.commit, obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    out = res.stdout
    assert out.startswith('Commit message:\nthis is my commit message\nCommit Successful') is True
    new_digest = out.split(' ')[-1].rstrip('\n')
    assert new_digest != base_digest

    nco = repo_20_filled_samples2.checkout(write=True)
    try:
        assert nco.commit_hash == new_digest
        assert nco.branch_name == base_branch
    finally:
        nco.close()


def substitute_editor_empty_commit_message(hint):
    return hint


def test_commit_editor_empty_message(monkeypatch, repo_20_filled_samples2):
    import click
    monkeypatch.setattr(click, 'edit', substitute_editor_empty_commit_message)

    co = repo_20_filled_samples2.checkout(write=True)
    co.add_str_column('test_meta')
    base_digest = co.commit_hash
    base_branch = co.branch_name
    co.close()
    assert base_branch == 'master'

    runner = CliRunner()
    res = runner.invoke(cli.commit, obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Aborted! Empty commit message\n'
    nco = repo_20_filled_samples2.checkout(write=True)
    try:
        assert nco.commit_hash == base_digest
        assert nco.branch_name == base_branch
    finally:
        nco.close()


def test_clone(written_two_cmt_server_repo):
    server, base_repo = written_two_cmt_server_repo
    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        try:
            new_repo = Repository(P, exists=False)
            res = runner.invoke(
                cli.clone,
                ['--name', 'Foo Tester', '--email', 'foo@email.com', f'{server}'], obj=new_repo)

            assert res.exit_code == 0

            newLog = new_repo.log(return_contents=True)
            baseLog = base_repo.log(return_contents=True)
            assert newLog == baseLog
            assert new_repo.summary() == base_repo.summary()
        finally:
            new_repo._env._close_environments()


@pytest.mark.parametrize('backend', fixed_shape_backend_params)
def test_push_fetch_records(server_instance, backend):

    runner = CliRunner()
    with runner.isolated_filesystem():
        repo = Repository(getcwd(), exists=False)
        try:
            repo.init('foo', 'bar')
            dummyData = np.arange(50)
            co1 = repo.checkout(write=True, branch='master')
            co1.add_ndarray_column(name='dummy', prototype=dummyData, backend=backend)
            for idx in range(10):
                dummyData[:] = idx
                co1.columns['dummy'][str(idx)] = dummyData
            cmt1 = co1.commit('first commit adding dummy data')
            co1.close()

            repo.create_branch('testbranch')
            co2 = repo.checkout(write=True, branch='testbranch')
            for idx in range(10, 20):
                dummyData[:] = idx
                co2.columns['dummy'][str(idx)] = dummyData
            cmt2 = co2.commit('first commit on test branch adding non-conflict data')
            co2.close()

            repo.remote.add('origin', server_instance)

            res = runner.invoke(cli.push, ['origin', 'master'], obj=repo)
            assert res.exit_code == 0
            res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo)
            assert res.exit_code == 0
        finally:
            repo._env._close_environments()


@pytest.mark.parametrize('backend', fixed_shape_backend_params)
@pytest.mark.parametrize('options', [
    ['origin', 'testbranch'],
    ['origin', 'master'],
    ['origin', 'testbranch', '--all-history'],
    ['origin', 'master', '--all-history'],
    ['origin', 'testbranch', '--column', 'data'],
    ['origin', 'master', '--column', 'data'],
    ['origin', 'testbranch', '--column', 'data', '--all-history'],
    ['origin', 'master', '--column', 'data', '--all-history'],
    ['origin', 'testbranch', '--column', 'data', '--all-history'],
])
def test_fetch_records_and_data(server_instance, backend, options):
    runner = CliRunner()
    with runner.isolated_filesystem():
        repo = Repository(getcwd(), exists=False)
        try:
            repo.init('foo', 'bar')
            dummyData = np.arange(50)
            co1 = repo.checkout(write=True, branch='master')
            co1.add_ndarray_column(name='dummy', prototype=dummyData, backend=backend)
            for idx in range(10):
                dummyData[:] = idx
                co1.columns['dummy'][str(idx)] = dummyData
            cmt1 = co1.commit('first commit adding dummy data')
            co1.close()

            repo.create_branch('testbranch')
            co2 = repo.checkout(write=True, branch='testbranch')
            for idx in range(10, 20):
                dummyData[:] = idx
                co2.columns['dummy'][str(idx)] = dummyData
            cmt2 = co2.commit('first commit on test branch adding non-conflict data')
            co2.close()

            repo.remote.add('origin', server_instance)

            res = runner.invoke(cli.push, ['origin', 'master'], obj=repo)
            assert res.exit_code == 0
            res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo)
            assert res.exit_code == 0
        finally:
            repo._env._close_environments()

    with runner.isolated_filesystem():
        repo = Repository(getcwd(), exists=False)
        try:
            res = runner.invoke(
                cli.clone,
                ['--name', 'Foo Tester', '--email', 'foo@email.com', f'{server_instance}'], obj=repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.fetch_records, ['origin', 'testbranch'], obj=repo)
            assert res.exit_code == 0
            res = runner.invoke(cli.branch_create, ['testbranch', 'origin/testbranch'], obj=repo)
            assert res.exit_code == 0
            res = runner.invoke(cli.fetch_data, options, obj=repo)
            assert res.exit_code == 0
        finally:
            repo._env._close_environments()


def test_add_remote(managed_tmpdir):
    from hangar.remotes import RemoteInfo

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        repo = Repository(P, exists=False)
        try:
            res = runner.invoke(cli.init, ['--name', 'test', '--email', 'test@foo.com'], obj=repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.add_remote, ['origin', 'localhost:50051'], obj=repo)
            assert res.exit_code == 0
            assert res.stdout == "RemoteInfo(name='origin', address='localhost:50051')\n"

            remote_list = repo.remote.list_all()
            assert remote_list == [RemoteInfo(name='origin', address='localhost:50051')]
        finally:
            repo._env._close_environments()


def test_remove_remote(managed_tmpdir):
    from hangar.remotes import RemoteInfo

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        repo = Repository(P, exists=False)
        try:
            res = runner.invoke(cli.init, ['--name', 'test', '--email', 'test@foo.com'], obj=repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.add_remote, ['origin', 'localhost:50051'], obj=repo)
            assert res.exit_code == 0
            assert res.stdout == "RemoteInfo(name='origin', address='localhost:50051')\n"

            remote_list = repo.remote.list_all()
            assert remote_list == [RemoteInfo(name='origin', address='localhost:50051')]

            res = runner.invoke(cli.remove_remote, ['origin'], obj=repo)
            assert res.exit_code == 0
            assert res.stdout == "RemoteInfo(name='origin', address='localhost:50051')\n"
            assert repo.remote.list_all() == []
        finally:
            repo._env._close_environments()


def test_list_all_remotes(managed_tmpdir):
    from hangar.remotes import RemoteInfo

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        repo = Repository(P, exists=False)
        try:
            res = runner.invoke(cli.init, ['--name', 'test', '--email', 'test@foo.com'], obj=repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.add_remote, ['origin', 'localhost:50051'], obj=repo)
            assert res.exit_code == 0
            assert res.stdout == "RemoteInfo(name='origin', address='localhost:50051')\n"
            res = runner.invoke(cli.add_remote, ['upstream', 'foo:ip'], obj=repo)
            assert res.exit_code == 0
            assert res.stdout == "RemoteInfo(name='upstream', address='foo:ip')\n"

            remote_list = repo.remote.list_all()
            assert remote_list == [
                RemoteInfo(name='origin', address='localhost:50051'),
                RemoteInfo(name='upstream', address='foo:ip')
            ]

            res = runner.invoke(cli.list_remotes, obj=repo)
            assert res.exit_code == 0
            expected_stdout = "[RemoteInfo(name='origin', address='localhost:50051'), "\
                              "RemoteInfo(name='upstream', address='foo:ip')]\n"
            assert res.stdout == expected_stdout
        finally:
            repo._env._close_environments()


def test_summary(written_two_cmt_server_repo, capsys):
    server, base_repo = written_two_cmt_server_repo
    runner = CliRunner()
    with runner.isolated_filesystem():
        try:
            with capsys.disabled():
                P = getcwd()
                new_repo = Repository(P, exists=False)
                res = runner.invoke(
                    cli.clone,
                    ['--name', 'Foo Tester', '--email', 'foo@email.com', f'{server}'], obj=new_repo)

                assert res.exit_code == 0
                assert new_repo.summary() == base_repo.summary()

            new_repo.summary()

            with capsys.disabled():
                res = runner.invoke(cli.summary, obj=new_repo)
                assert res.stdout == f"{capsys.readouterr().out}\n"
        finally:
            new_repo._env._close_environments


def test_summary_before_commit_made(managed_tmpdir):
    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        new_repo = Repository(P, exists=False)
        new_repo.init('Test User', 'Test@test.com')
        try:
            res = runner.invoke(cli.summary, obj=new_repo)
            assert res.exit_code == 0
            assert 'No commits have been made in the repository' in res.stdout
        finally:
            new_repo._env._close_environments


def test_log(written_two_cmt_server_repo, capsys):
    server, base_repo = written_two_cmt_server_repo
    runner = CliRunner()
    with runner.isolated_filesystem():
        try:
            with capsys.disabled():
                P = getcwd()
                new_repo = Repository(P, exists=False)
                res = runner.invoke(
                    cli.clone,
                    ['--name', 'Foo Tester', '--email', 'foo@email.com', f'{server}'], obj=new_repo)

                assert res.exit_code == 0
                assert new_repo.log() == base_repo.log()

            new_repo.log()

            with capsys.disabled():
                res = runner.invoke(cli.log, ['master'], obj=new_repo)
                assert res.stdout == f"{capsys.readouterr().out}\n"
        finally:
            new_repo._env._close_environments()


def test_status(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo_20_filled_samples2

    dummyData = np.arange(50).astype(np.int64)
    co2 = repo.checkout(write=True)
    for idx in range(10, 20):
        dummyData[:] = idx
        co2.columns['dummy'][str(idx)] = dummyData
        co2.columns['dummy'][idx] = dummyData
    df = co2.diff.staged()
    co2.close()
    expected = status(repo._env.hashenv, 'master', df.diff).getvalue()
    runner = CliRunner()
    res = runner.invoke(cli.status, obj=repo)
    assert res.exit_code == 0
    assert res.stdout == expected


def test_arrayset_create_uint8(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(
        cli.create_column,
        ['train_images', 'UINT8', '256', '256', '3'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Initialized Column: train_images\n'
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'train_images' in co.columns
        assert co.columns['train_images'].shape == (256, 256, 3)
        assert co.columns['train_images'].dtype == np.uint8
        assert co.columns['train_images'].schema_type == 'fixed_shape'
        assert len(co.columns['train_images']) == 0
    finally:
        co.close()


def test_arrayset_create_float32(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(
        cli.create_column,
        ['train_images', 'FLOAT32', '256'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Initialized Column: train_images\n'
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'train_images' in co.columns
        assert co.columns['train_images'].shape == (256,)
        assert co.columns['train_images'].dtype == np.float32
        assert co.columns['train_images'].schema_type == 'fixed_shape'
        assert len(co.columns['train_images']) == 0
    finally:
        co.close()


def test_arrayset_create_invalid_dtype_fails(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(
        cli.create_column,
        ['train_images', 'FLOAT7', '256'], obj=repo_20_filled_samples2)
    assert res.exit_code == 2
    expected = ('invalid choice: FLOAT7. (choose from UINT8, '
                'INT8, UINT16, INT16, UINT32, INT32, UINT64, '
                'INT64, FLOAT16, FLOAT32, FLOAT64, STR)\n')
    assert res.stdout.endswith(expected) is True
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'train_images' not in co.columns
    finally:
        co.close()


def test_arrayset_create_invalid_name_fails(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(cli.create_column, ['tra#in', 'FLOAT32', '256'], obj=repo_20_filled_samples2)
    assert res.exit_code == 1
    msg = res.stdout
    assert msg.startswith('Error: Column name provided: `tra#in` is invalid.') is True
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'tra#in' not in co.columns
        assert 'dummy' in co.columns
        assert len(co.columns) == 1
    finally:
        co.close()


def test_arrayset_create_variable_shape(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(
        cli.create_column,
        ['train_images', 'FLOAT32', '256', '--variable-shape'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Initialized Column: train_images\n'
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'train_images' in co.columns
        assert co.columns['train_images'].shape == (256,)
        assert co.columns['train_images'].dtype == np.float32
        assert co.columns['train_images'].schema_type == 'variable_shape'
        assert co.columns['train_images'].contains_subsamples is False
        assert len(co.columns['train_images']) == 0
    finally:
        co.close()


def test_arrayset_create_contains_subsamples(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(
        cli.create_column,
        ['train_images', 'FLOAT32', '256', '--contains-subsamples'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Initialized Column: train_images\n'
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'train_images' in co.columns
        assert co.columns['train_images'].shape == (256,)
        assert co.columns['train_images'].dtype == np.float32
        assert co.columns['train_images'].schema_type == 'fixed_shape'
        assert co.columns['train_images'].contains_subsamples is True
        assert len(co.columns['train_images']) == 0
    finally:
        co.close()


def test_remove_arrayset(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(cli.remove_column, ['dummy'], obj=repo_20_filled_samples2)
    assert res.exit_code == 0
    assert res.stdout == 'Successfully removed column: dummy\n'
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'repo_20_filled_samples2' not in co.columns
        assert len(co.columns) == 0
    finally:
        co.close()


def test_remove_non_existing_arrayset(repo_20_filled_samples2):
    runner = CliRunner()
    res = runner.invoke(cli.remove_column, ['doesnotexist'], obj=repo_20_filled_samples2)
    assert res.exit_code == 1
    assert res.stdout == "Error: 'Cannot remove: doesnotexist. Key does not exist.'\n"
    co = repo_20_filled_samples2.checkout(write=True)
    try:
        assert 'doesnotexist' not in co.columns
        assert 'dummy' in co.columns
        assert len(co.columns) == 1
        assert len(co.columns['dummy']) == 10
    finally:
        co.close()


def test_branch_create_and_list(written_two_cmt_server_repo):
    server, base_repo = written_two_cmt_server_repo

    co = base_repo.checkout(write=True)
    cmt = co.commit_hash
    co.close()

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        new_repo = Repository(P, exists=False)
        try:
            res = runner.invoke(
                cli.clone,
                ['--name', 'Foo Tester', '--email', 'foo@email.com', f'{server}'], obj=new_repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.branch_create, ['testbranch'], obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Created BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master', 'testbranch']

            res = runner.invoke(cli.branch_list, obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == "['master', 'origin/master', 'testbranch']\n"
        finally:
            new_repo._env._close_environments()


@pytest.mark.filterwarnings("ignore:Column.* contains `reference-only` samples")
def test_branch_create_and_delete(written_two_cmt_server_repo):
    server, base_repo = written_two_cmt_server_repo

    co = base_repo.checkout(write=True)
    cmt = co.commit_hash
    co.close()

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        new_repo = Repository(P, exists=False)
        try:
            res = runner.invoke(
                cli.clone,
                ['--name', 'Foo Tester', '--email', 'foo@email.com', f'{server}'], obj=new_repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.branch_create, ['testbranch'], obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Created BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master', 'testbranch']

            res = runner.invoke(cli.branch_remove, ['testbranch'], obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Deleted BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master']

            new_repo.create_branch('secondtest')
            co = new_repo.checkout(write=True, branch='secondtest')
            co.add_str_column('test_meta')
            newDigest = co.commit('dummy commit')
            co.close()

            # re-open with staging set to master so we can try to delete secondtest
            co = new_repo.checkout(write=True, branch='master')
            co.close()

            res = runner.invoke(cli.branch_remove, ['secondtest'], obj=new_repo)
            assert res.exit_code == 1

            res = runner.invoke(cli.branch_remove, ['secondtest', '-f'], obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Deleted BRANCH: secondtest HEAD: {newDigest}\n"

            res = runner.invoke(cli.branch_list, obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == "['master', 'origin/master']\n"
        finally:
            new_repo._env._close_environments()


def test_start_server(managed_tmpdir):
    import time
    runner = CliRunner()
    with runner.isolated_filesystem():
        startTime = time.time()
        res = runner.invoke(cli.server, ['--ip', 'localhost', '--port', '50111', '--timeout', '1'])
        assert time.time() - startTime <= 1.8  # buffer to give it time to stop
        assert res.exit_code == 0
        assert 'Hangar Server Started' in res.stdout


# ------------------------ Developer Commands --------------------------------


def test_db_view_command(repo_20_filled_samples):
    repo = repo_20_filled_samples
    runner = CliRunner()
    res = runner.invoke(cli.lmdb_record_details, ['-a'], obj=repo)

    dbs_queried = 0
    assert res.exit_code == 0
    for line in res.stdout.splitlines():
        if '.lmdb' in line:
            dbs_queried += 1
    assert dbs_queried == 5

    res = runner.invoke(cli.lmdb_record_details, ['-a', '--limit', '10'], obj=repo)
    assert res.exit_code == 0


# =========================== External Plugin =================================


def monkeypatch_scan(provides, accepts, attribute, func):
    def wrapper(self):
        from hangar.external import BasePlugin

        plugin = BasePlugin(provides, accepts)
        plugin.__dict__[attribute] = func

        self._plugin_store['myplugin'] = plugin
    return wrapper


@pytest.fixture()
def written_repo_with_1_sample(aset_samples_initialized_repo):
    aset_name = 'writtenaset'
    shape = (5, 7)
    co = aset_samples_initialized_repo.checkout(write=True)
    aset = co.columns[aset_name]
    aset['data'] = np.random.random(shape)
    aset['123'] = np.random.random(shape)
    aset[123] = np.random.random(shape)
    co.commit('added')
    co.close()
    yield aset_samples_initialized_repo


class TestImport(object):

    @staticmethod
    def load(fpath, *args, **kwargs):
        data = np.random.random((5, 7)).astype(np.float64)
        if isinstance(fpath, Path):
            fpath = fpath.name
        return data, fpath

    def test_import(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        shape = (5, 7)
        fpath = 'data.ext'
        aset_name = 'writtenaset'

        with monkeypatch.context() as m, runner.isolated_filesystem():
            with open('data.ext', 'w') as f:
                f.write('test')

            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['load'], ['ext'], 'load', self.load))
            # adding data
            res = runner.invoke(cli.import_data, [aset_name, fpath], obj=repo)
            assert res.exit_code == 0
            co = repo.checkout(write=True)
            co.commit('added data')
            d1 = co.columns[aset_name][fpath]
            co.close()

            # without overwrite
            res = runner.invoke(cli.import_data, [aset_name, fpath], obj=repo)
            assert res.exit_code == 0
            co = repo.checkout()
            d2 = co.columns[aset_name][fpath]
            co.close()
            assert np.allclose(d1, d2)

            # with overwrite
            res = runner.invoke(cli.import_data, [aset_name, fpath, '--overwrite'], obj=repo)
            assert res.exit_code == 0
            co = repo.checkout(write=True)
            co.commit('added data')
            d3 = co.columns[aset_name][fpath]
            co.close()
            assert not np.allclose(d1, d3)
        assert d1.shape == d2.shape == d3.shape == shape

    def test_import_wrong_args(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()

        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['load'], ['ext'], 'load', self.load))

            with runner.isolated_filesystem():

                # invalid file
                res = runner.invoke(cli.import_data, [aset_name, 'valid.ext'], obj=repo)
                assert res.exit_code == 2
                assert "Invalid value for" in res.stdout
                assert "PATH" in res.stdout
                assert "valid.ext" in res.stdout
                assert "does not exist." in res.stdout

                with open('valid.ext', 'w') as f:
                    f.write('empty')

                with open('valid.ext.bz2', 'w') as f:
                    f.write('empty')

                res = runner.invoke(cli.import_data, [aset_name, 'valid.ext.bz2'], obj=repo)
                assert res.exit_code == 1
                assert res.stdout.endswith('No plugins found for the file extension ext.bz2 that could do load\n')

                # invalid branch
                res = runner.invoke(cli.import_data, [aset_name, 'valid.ext', '--branch', 'invalid'], obj=repo)
                assert res.exit_code == 1
                assert res.stdout.endswith('Branch name: invalid does not exist, Exiting.\n')

                # invalid plugin
                res = runner.invoke(cli.import_data, [aset_name, 'valid.ext', '--plugin', 'invalid'], obj=repo)
                assert res.exit_code == 1
                assert res.stdout.endswith('Plugin invalid not found\n')

    def test_import_generator_on_load(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        fpath = 'data.ext'
        aset_name = 'writtenaset'

        def load(fpath, *args, **kwargs):
            for i in range(10):
                data, name = self.load(fpath, *args, **kwargs)
                if isinstance(name, Path):
                    name = name.name
                yield data, f"{i}_{name}"

        with monkeypatch.context() as m, runner.isolated_filesystem():
            with open('data.ext', 'w') as f:
                f.write('test')
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['load'], ['ext'], 'load', load))
            res = runner.invoke(cli.import_data, [aset_name, fpath], obj=repo)
            assert res.exit_code == 0
            co = repo.checkout(write=True)
            co.commit('added data')
            aset = co.columns[aset_name]
            for i in range(10):
                assert f"{i}_{fpath}" in aset.keys()
            co.close()


class TestExport(object):
    save_msg = "Data saved from custom save function"

    @classmethod
    def save(cls, data, outdir, sampleN, extension, *args, **kwargs):
        print(cls.save_msg)
        fpath = os.path.join(outdir, f"{sampleN}.{extension}")
        print(fpath)

    def test_export_success(self, monkeypatch, written_repo_with_1_sample, tmp_path):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['save'], ['ext'], 'save', self.save))

            # single sample
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', str(tmp_path), '--sample', 'data', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            assert self.save_msg in res.output

            # with sample name and sample type
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', str(tmp_path), '--sample', 'int:123', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            assert os.path.join(tmp_path, 'int:123.ext') in res.output
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', str(tmp_path), '--sample', 'str:123', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            assert os.path.join(tmp_path, 'str:123.ext') in res.output
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', str(tmp_path), '--sample', '123', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            assert os.path.join(tmp_path, 'str:123.ext') in res.output

            # whole column
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', str(tmp_path), '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            assert os.path.join(tmp_path, 'str:data.ext') in res.output
            assert os.path.join(tmp_path, 'str:123.ext') in res.output
            assert os.path.join(tmp_path, 'int:123.ext') in res.output

    def test_export_wrong_out_location(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['save'], ['ext'], 'save', self.save))

            # single sample
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', 'wrongpath', '--sample', 'data', '--format', 'ext'], obj=repo)
            assert res.exit_code == 2
            assert "Invalid value for" in res.stdout
            assert "-o" in res.stdout
            assert "--out" in res.stdout

    def test_export_wrong_arg(self, monkeypatch, written_repo_with_1_sample, tmp_path):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['save'], ['ext'], 'save', self.save))
            res = runner.invoke(
                cli.export_data, [aset_name, '-o', str(tmp_path), '--plugin', 'invalid'], obj=repo)
            assert res.exit_code == 1
            assert 'Plugin invalid not found' in res.stdout

    def test_export_without_specifying_out(self, monkeypatch, written_repo_with_1_sample):
        import os
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['save'], ['ext'], 'save', self.save))
            res = runner.invoke(
                cli.export_data, [aset_name, '--sample', 'data', '--format', 'ext'], obj=repo)
            assert os.getcwd() in res.output

    def test_export_for_non_existent_sample(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['save'], ['ext'], 'save', self.save))
            res = runner.invoke(
                cli.export_data, [aset_name, '--sample', 'wrongname', '--format', 'ext'], obj=repo)
            assert res.exit_code == 1
            assert 'wrongname' in res.output

    def test_export_for_specified_branch(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['save'], ['ext'], 'save', self.save))
            res = runner.invoke(
                cli.export_data, [aset_name, 'master', '--sample', 'data', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0


class TestShow(object):
    show_msg = "Data is displayed from custom show function"

    @classmethod
    def show(cls, fpath, *args, **kwargs):
        print(cls.show_msg)

    def test_show_success(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['show'], ['ext'], 'show', self.show))
            res = runner.invoke(
                cli.view_data, [aset_name, 'data', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            assert self.show_msg in res.output

    def test_show_on_startpoint(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['show'], ['ext'], 'show', self.show))
            res = runner.invoke(
                cli.view_data, [aset_name, 'data', 'master', '--format', 'ext'], obj=repo)
            assert res.exit_code == 0
            res = runner.invoke(
                cli.view_data, [aset_name, 'data', 'wrongstartpoint', '--format', 'ext'], obj=repo)
            assert "No matching commit hash found" in str(res.exception)

    def test_show_with_wrong_arg(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'

        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['show'], ['ext'], 'show', self.show))
            res = runner.invoke(
                cli.view_data, [aset_name, 'data', '--format', 'wrong'], obj=repo)
            assert res.exit_code == 1
            assert 'No plugins found' in res.stdout

    def test_wrong_sample_name(self, monkeypatch, written_repo_with_1_sample):
        repo = written_repo_with_1_sample
        runner = CliRunner()
        aset_name = 'writtenaset'
        with monkeypatch.context() as m:
            m.setattr(PluginManager, "_scan_plugins", monkeypatch_scan(['show'], ['ext'], 'show', self.show))
            res = runner.invoke(
                cli.view_data, [aset_name, 'wrongsample', '--format', 'ext'], obj=repo)
            assert res.exit_code == 1
            assert "wrongsample" in res.stdout


================================================
FILE: tests/test_column.py
================================================
import pytest
import numpy as np
from conftest import fixed_shape_backend_params, variable_shape_backend_params
from itertools import permutations


def assert_equal(arr, arr2):
    assert np.array_equal(arr, arr2)
    assert arr.dtype == arr2.dtype


class TestColumn(object):

    @pytest.mark.parametrize('name', [
        'invalid\n', '\ninvalid', 'inv name', 'inva@lid', 12, ' try', 'andthis ',
        'VeryLongNameIsInvalidOver64CharactersNotAllowedVeryLongNameIsInva'])
    def test_invalid_column_name(self, repo, randomsizedarray, name):
        co = repo.checkout(write=True)
        with pytest.raises(ValueError):
            co.add_ndarray_column(name=name, prototype=randomsizedarray)
        with pytest.raises(ValueError):
            co.add_str_column(name=name)
        co.close()

    def test_read_only_mode(self, aset_samples_initialized_repo):
        import hangar
        co = aset_samples_initialized_repo.checkout()
        assert isinstance(co, hangar.checkout.ReaderCheckout)
        with pytest.raises(AttributeError):
            assert co.add_ndarray_column('foo')
        with pytest.raises(AttributeError):
            assert co.add_str_column('foo')
        with pytest.raises(PermissionError):
            del co.columns['foo']
        with pytest.raises(PermissionError):
            del co.columns['foo']
        assert len(co.columns['writtenaset']) == 0
        co.close()

    def test_get_column(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)

        # getting the column with `get`
        asetOld = co.columns.get('writtenaset')
        asetOldPath = asetOld._path
        asetOldAsetn = asetOld.column
        asetOldDefaultSchemaHash = asetOld._schema.schema_hash_digest()

        asetOld['1'] = array5by7
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout()

        # getting column with dictionary like style method
        asetNew = co.columns['writtenaset']
        assert_equal(asetNew['1'], array5by7)
        assert asetOldPath == asetNew._path
        assert asetOldAsetn == asetNew.column
        assert asetOldDefaultSchemaHash == asetNew._schema.schema_hash_digest()
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_remove_column(self, aset_backend, aset_samples_initialized_repo):
        co = aset_samples_initialized_repo.checkout(write=True)
        del co.columns['writtenaset']
        with pytest.raises(KeyError):
            del co.columns['writtenaset']

        co.add_ndarray_column('writtenaset', shape=(5, 7), dtype=np.float64, backend=aset_backend)
        assert len(co.columns) == 1
        del co.columns['writtenaset']
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 0

        co.add_ndarray_column('writtenaset', shape=(5, 7), dtype=np.float64, backend=aset_backend)
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1
        del co.columns['writtenaset']
        assert len(co.columns) == 0
        co.commit('this is a commit message')
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_init_again(self, aset_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        co.add_ndarray_column('aset', prototype=randomsizedarray, backend=aset_backend)
        with pytest.raises(LookupError):
            co.add_ndarray_column('aset', prototype=randomsizedarray, backend=aset_backend)
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_column_with_more_dimension(self, aset_backend, repo):
        co = repo.checkout(write=True)
        shape = (0, 1, 2)
        with pytest.raises(ValueError):
            co.add_ndarray_column('aset', shape=shape, dtype=np.int, backend=aset_backend)
        shape = [1] * 31
        aset = co.add_ndarray_column('aset1', shape=shape, dtype=np.int, backend=aset_backend)
        assert len(aset.shape) == 31
        shape = [1] * 32
        with pytest.raises(ValueError):
            # maximum tensor rank must be <= 31
            co.add_ndarray_column('aset2', shape=shape, dtype=np.int, backend=aset_backend)
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_column_with_empty_dimension(self, aset_backend, repo):
        co = repo.checkout(write=True)
        arr = np.array(1, dtype=np.int64)
        aset = co.add_ndarray_column('aset1', shape=(), dtype=np.int64, backend=aset_backend)
        aset['1'] = arr
        co.commit('this is a commit message')
        aset = co.add_ndarray_column('aset2', prototype=arr)
        aset['1'] = arr
        co.commit('this is a commit message')
        co.close()
        co = repo.checkout()
        aset1 = co.columns['aset1']
        aset2 = co.columns['aset2']
        assert_equal(aset1['1'], arr)
        assert_equal(aset2['1'], arr)
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_column_with_int_specifier_as_dimension(self, aset_backend, repo):
        co = repo.checkout(write=True)
        arr = np.arange(10, dtype=np.int64)
        aset = co.add_ndarray_column('aset1', shape=10, dtype=np.int64, backend=aset_backend)
        aset['1'] = arr
        co.commit('this is a commit message')
        arr2 = np.array(53, dtype=np.int64)
        aset = co.add_ndarray_column('aset2', prototype=arr2)
        aset['1'] = arr2
        co.commit('this is a commit message')
        co.close()
        co = repo.checkout()
        aset1 = co.columns['aset1']
        aset2 = co.columns['aset2']
        assert_equal(aset1['1'], arr)
        assert_equal(aset2['1'], arr2)
        co.close()

    @pytest.mark.parametrize('write', [True, False])
    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_getattr_does_not_raise_permission_error_if_alive(self, aset_backend, write, repo):
        co = repo.checkout(write=True)
        arr = np.arange(10, dtype=np.int64)
        aset = co.add_ndarray_column('aset1', shape=10, dtype=np.int64, backend=aset_backend)
        aset['1'] = arr
        co.commit('hello')
        co.close()
        co = repo.checkout(write=write)
        aset = co.columns['aset1']

        assert hasattr(aset, 'doesnotexist') is False  # does not raise error
        assert hasattr(aset, '_mode') is True
        with pytest.raises(AttributeError):
            assert getattr(aset, 'doesnotexist')
        assert getattr(aset, '_mode') == 'a' if write else 'r'

        co.close()
        with pytest.raises(PermissionError):
            hasattr(aset, 'doesnotexist')
        with pytest.raises(PermissionError):
            hasattr(aset, '_mode')


class TestDataWithFixedSizedColumn(object):

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset3_backend", fixed_shape_backend_params)
    def test_column_remote_references_property_with_none(
            self, aset1_backend, aset2_backend, aset3_backend, repo, randomsizedarray
    ):
        co = repo.checkout(write=True)
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray, backend=aset1_backend)
        aset2 = co.add_ndarray_column('aset2', shape=(2, 2), dtype=np.int, backend=aset2_backend)
        aset3 = co.add_ndarray_column('aset3', shape=(3, 4), dtype=np.float32, backend=aset3_backend)

        with aset1 as d1, aset2 as d2, aset3 as d3:
            d1[1] = randomsizedarray
            d2[1] = np.ones((2, 2), dtype=np.int)
            d3[1] = np.ones((3, 4), dtype=np.float32)

        assert co.columns.contains_remote_references == {'aset1': False, 'aset2': False, 'aset3': False}
        assert co.columns.remote_sample_keys == {'aset1': (), 'aset2': (), 'aset3': ()}
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset3_backend", fixed_shape_backend_params)
    def test_column_remote_references_property_with_remotes(
            self, aset1_backend, aset2_backend, aset3_backend, repo, randomsizedarray
    ):
        co = repo.checkout(write=True)
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray, backend=aset1_backend)
        aset2 = co.add_ndarray_column('aset2', shape=(2, 2), dtype=np.int, backend=aset2_backend)
        aset3 = co.add_ndarray_column('aset3', shape=(3, 4), dtype=np.float32, backend=aset3_backend)

        with aset1 as d1, aset2 as d2, aset3 as d3:
            d1[1] = randomsizedarray
            d2[1] = np.ones((2, 2), dtype=np.int)
            d3[1] = np.ones((3, 4), dtype=np.float32)

        assert co.columns.contains_remote_references == {'aset1': False, 'aset2': False, 'aset3': False}
        assert co.columns.remote_sample_keys == {'aset1': (), 'aset2': (), 'aset3': ()}
        co.commit('hello')
        co.close()
        co = repo.checkout()

        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        co._columns._columns['aset1']._samples[12] = template
        co._columns._columns['aset2']._samples[22] = template

        assert co.columns.contains_remote_references == {'aset1': True, 'aset2': True, 'aset3': False}
        assert co.columns.remote_sample_keys == {'aset1': (12,), 'aset2': (22,), 'aset3': ()}
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset3_backend", fixed_shape_backend_params)
    def test_iterating_over(self, aset1_backend, aset2_backend, aset3_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        all_tensors = []
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray, backend=aset1_backend)
        aset2 = co.add_ndarray_column('aset2', shape=(2, 2), dtype=np.int, backend=aset2_backend)
        aset3 = co.add_ndarray_column('aset3', shape=(3, 4), dtype=np.float32, backend=aset3_backend)

        with aset1 as d1, aset2 as d2, aset3 as d3:
            d1['1'] = randomsizedarray
            d1['2'] = np.zeros_like(randomsizedarray)
            d1['3'] = np.zeros_like(randomsizedarray) + 5

            d2['1'] = np.ones((2, 2), dtype=np.int)
            d2['2'] = np.ones((2, 2), dtype=np.int) * 5
            d2['3'] = np.zeros((2, 2), dtype=np.int)

            d3['1'] = np.ones((3, 4), dtype=np.float32)
            d3['2'] = np.ones((3, 4), dtype=np.float32) * 7
            d3['3'] = np.zeros((3, 4), dtype=np.float32)

        all_tensors.extend([aset1['1'], aset1['2'], aset1['3']])
        all_tensors.extend([aset2['1'], aset2['2'], aset2['3']])
        all_tensors.extend([aset3['1'], aset3['2'], aset3['3']])

        co.commit('this is a commit message')
        co.close()
        co = repo.checkout()
        # iterating over .items()
        tensors_in_the_order = iter(all_tensors)
        for dname, aset in co.columns.items():
            assert aset._column_name == dname
            for sname, sample in aset.items():
                assert_equal(sample, next(tensors_in_the_order))

        # iterating over .keys()
        tensors_in_the_order = iter(all_tensors)
        for dname in co.columns.keys():
            for sname in co.columns[dname].keys():
                assert_equal(co.columns[dname][sname], next(tensors_in_the_order))

        # iterating over .values()
        tensors_in_the_order = iter(all_tensors)
        for aset in co.columns.values():
            for sample in aset.values():
                assert_equal(sample, next(tensors_in_the_order))
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset3_backend", fixed_shape_backend_params)
    def test_iterating_over_local_only(self, aset1_backend, aset2_backend, aset3_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        all_tensors = []
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray, backend=aset1_backend)
        aset2 = co.add_ndarray_column('aset2', shape=(2, 2), dtype=np.int, backend=aset2_backend)
        aset3 = co.add_ndarray_column('aset3', shape=(3, 4), dtype=np.float32, backend=aset3_backend)

        with aset1 as d1, aset2 as d2, aset3 as d3:
            d1['1'] = randomsizedarray
            d1['2'] = np.zeros_like(randomsizedarray)
            d1['3'] = np.zeros_like(randomsizedarray) + 5

            d2['1'] = np.ones((2, 2), dtype=np.int)
            d2['2'] = np.ones((2, 2), dtype=np.int) * 5
            d2['3'] = np.zeros((2, 2), dtype=np.int)

            d3['1'] = np.ones((3, 4), dtype=np.float32)
            d3['2'] = np.ones((3, 4), dtype=np.float32) * 7
            d3['3'] = np.zeros((3, 4), dtype=np.float32)

        all_tensors.extend([aset1['1'], aset1['2'], aset1['3']])
        all_tensors.extend([aset2['1'], aset2['2'], aset2['3']])
        all_tensors.extend([aset3['1'], aset3['2'], aset3['3']])

        co.commit('this is a commit message')
        co.close()
        co = repo.checkout()

        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        co._columns._columns['aset1']._samples['4'] = template
        co._columns._columns['aset2']._samples['4'] = template

        # iterating over .items()
        tensors_in_the_order = iter(all_tensors)
        for dname in ['aset1', 'aset2', 'aset3']:
            aset = co.columns[dname]
            count = 0
            for sname, sample in aset.items(local=True):
                count += 1
                assert_equal(sample, next(tensors_in_the_order))
                assert '4' != sname
            assert count == 3

        # iterating over .keys()
        tensors_in_the_order = iter(all_tensors)
        for dname in ['aset1', 'aset2', 'aset3']:
            aset = co.columns[dname]
            count = 0
            for sname in aset.keys(local=True):
                count += 1
                assert_equal(aset[sname], next(tensors_in_the_order))
                assert '4' != sname
            assert count == 3

        # iterating over .values()
        tensors_in_the_order = iter(all_tensors)
        for dname in ['aset1', 'aset2', 'aset3']:
            aset = co.columns[dname]
            count = 0
            for sample in aset.values(local=True):
                count += 1
                assert_equal(sample, next(tensors_in_the_order))
            assert count == 3

        assert list(co['aset1'].keys()) == ['1', '2', '3', '4']
        with pytest.raises((FileNotFoundError, KeyError)):
            list(co['aset1'].values())
        with pytest.raises((FileNotFoundError, KeyError)):
            list(co['aset1'].items())

        assert list(co['aset2'].keys()) == ['1', '2', '3', '4']
        with pytest.raises((FileNotFoundError, KeyError)):
            list(co['aset2'].values())
        with pytest.raises((FileNotFoundError, KeyError)):
            list(co['aset2'].items())

        assert list(co['aset3'].keys()) == ['1', '2', '3']
        assert len(list(co['aset3'].values())) == 3
        assert len(list(co['aset3'].items())) == 3
        co.close()

    def test_get_data(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout()
        assert np.allclose(co.columns['writtenaset']['1'], co.columns.get('writtenaset').get('1'), array5by7)
        co.close()

    def test_get_sample_with_default_works(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        res = co.columns['writtenaset'].get('doesnotexist', default=500)
        assert res == 500
        res = co.columns['writtenaset'].get('doesnotexist', 500)
        assert res == 500
        co.close()

    def test_get_multiple_samples_fails(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.columns['writtenaset']['2'] = array5by7 + 1
        co.columns['writtenaset']['3'] = array5by7 + 2
        co.commit('this is a commit message')
        co.close()

        nco = aset_samples_initialized_repo.checkout()
        with pytest.raises(TypeError):
            res = nco.columns['writtenaset'].get(['1', '2'])
        res = nco.columns['writtenaset'].get(('1', '2'))
        assert res is None

        aset = nco.columns['writtenaset']
        with pytest.raises(TypeError):
            res = aset.get(*('1', '2', '3'))
        nco.close()

    def test_getitem_multiple_samples_missing_key(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.commit('this is a commit message')
        co.close()

        nco = aset_samples_initialized_repo.checkout()
        with pytest.raises(KeyError):
            nco.columns['writtenaset'][('1', '2')]
        with pytest.raises(KeyError):
            aset = nco.columns['writtenaset']
            aset[('1', '2')]
        nco.close()

    def test_get_multiple_samples_missing_key(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.commit('this is a commit message')
        co.close()

        nco = aset_samples_initialized_repo.checkout()
        aset = nco.columns['writtenaset']
        res = aset.get(('1', '2'))
        assert res == None
        nco.close()

    def test_add_data_str_keys(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']
        with pytest.raises(KeyError):
            aset['somerandomkey']

        aset['1'] = array5by7
        aset['2'] = array5by7
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout()
        assert_equal(co.columns['writtenaset']['1'], array5by7)
        assert_equal(co.columns['writtenaset']['2'], array5by7)
        co.close()

    def test_add_data_int_keys(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']

        aset[1] = array5by7
        secondArray = array5by7 + 1
        aset[2] = secondArray
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout()
        assert_equal(co.columns['writtenaset'][1], array5by7)
        assert_equal(co.columns['writtenaset'][2], secondArray)
        co.close()

    def test_cannot_add_data_negative_int_key(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']
        with pytest.raises(ValueError):
            aset[-1] = array5by7
        assert len(co.columns['writtenaset']) == 0
        co.close()

    def test_cannot_add_data_float_key(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']
        with pytest.raises(ValueError):
            aset[2.1] = array5by7
        with pytest.raises(ValueError):
            aset[0.0] = array5by7
        assert len(co.columns['writtenaset']) == 0
        co.close()

    def test_add_data_mixed_int_str_keys(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']

        aset[1] = array5by7
        newFirstArray = array5by7 + 1
        aset['1'] = newFirstArray
        secondArray = array5by7 + 2
        aset[2] = secondArray
        thirdArray = array5by7 + 3
        aset['2'] = thirdArray
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout()
        assert_equal(co.columns['writtenaset'][1], array5by7)
        assert_equal(co.columns['writtenaset']['1'], newFirstArray)
        assert_equal(co.columns['writtenaset'][2], secondArray)
        assert_equal(co.columns['writtenaset']['2'], thirdArray)
        co.close()

    def test_cannot_add_data_sample_name_longer_than_64_characters(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']
        with pytest.raises(ValueError):
            aset['VeryLongNameIsInvalidOver64CharactersNotAllowedVeryLongNameIsInva'] = array5by7
        assert len(co.columns['writtenaset']) == 0
        co.close()

    def test_add_with_wrong_argument_order(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        with pytest.raises(ValueError):
            aset[array5by7] = '1'

    def test_update_with_dict_single_item(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = {'foo': array5by7}
        aset.update(data_map)
        assert_equal(aset['foo'], array5by7)

    def test_update_with_dict_multiple_items(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = {
            'foo': array5by7,
            1: array5by7+1
        }
        aset.update(data_map)
        assert_equal(aset['foo'], array5by7)
        assert_equal(aset[1], array5by7+1)

    def test_update_with_list_single_item(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = ['foo', array5by7]
        with pytest.raises(ValueError, match='dictionary update sequence'):
            aset.update(data_map)
        assert 'foo' not in aset

        aset.update((data_map,))  # try again while contained in iterable
        assert_equal(aset['foo'], array5by7)

    def test_update_with_list_multiple_items(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = [
            ('foo', array5by7),
            (1, array5by7+1),
        ]
        aset.update(data_map)
        assert_equal(aset['foo'], array5by7)
        assert_equal(aset[1], array5by7+1)

    def test_update_with_only_kwargs_single_item(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        aset.update(foo=array5by7)
        assert_equal(aset['foo'], array5by7)

    def test_update_with_only_kwargs_multiple_items(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        aset.update(foo=array5by7, bar=array5by7+1)
        assert_equal(aset['foo'], array5by7)
        assert_equal(aset['bar'], array5by7+1)

    def test_update_with_list_and_kwargs(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = [
            ('foo', array5by7),
            (1, array5by7+1),
        ]
        aset.update(data_map, bar=array5by7+2)
        assert_equal(aset['foo'], array5by7)
        assert_equal(aset[1], array5by7+1)
        assert_equal(aset['bar'], array5by7 + 2)

    def test_update_with_dict_and_kwargs(self, aset_samples_initialized_w_checkout, array5by7):
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = {
            'foo': array5by7,
            1: array5by7+1,
        }
        aset.update(data_map, bar=array5by7+2)
        assert_equal(aset['foo'], array5by7)
        assert_equal(aset[1], array5by7+1)
        assert_equal(aset['bar'], array5by7 + 2)

    def test_update_with_dict_and_kwargs_does_not_modify_input_in_calling_scopy(
        self, aset_samples_initialized_w_checkout, array5by7
    ):
        """ensure bug does not revert.

        Had a case where if dict was passed as ``other`` along with kwargs, the operation
        would complete as normally, but when control returned to the caller the original
        dict passed in as ``other`` would have been silently merged with the kwargs.
        """
        aset = aset_samples_initialized_w_checkout.columns['writtenaset']
        data_map = {
            'foo': array5by7,
            1: array5by7+1,
        }
        data_map_before = list(data_map.keys())
        aset.update(data_map, bar=array5by7+2)
        # in bug case, would now observe that data_map would have been
        # silently modified in a method analogous to calling:
        #
        #   ``data_map.update({'bar': np.array})``
        #
        assert list(data_map.keys()) == data_map_before

    @pytest.mark.parametrize('data_map', [
        ['foo', {'bar': np.random.random((5, 7))}],
        ['foo', 'bar', np.random.random((5, 7))],
        [('foo', 'bar', np.random.random((5, 7)))],
        [{('foo', 'bar'): np.random.random((5, 7))}],
        [('foo', 'bar', np.random.random((5, 7)))],
        [('foo', np.random.random((5, 7)), 'bar')],
        [(np.random.random((5, 7)), 'foo', 'bar')],
        [('foo', np.random.random((5, 7)), 'bar'), ('valid', np.random.random((5, 7)))],
        [('valid', np.random.random((5, 7))), ('foo', np.random.random((5, 7)), 'bar')],
        {'foo': np.random.random((5, 7)), 'bar': (np.random.random((5, 7)), np.random.random((5, 7)))},
    ])
    def test_update_with_invalid_data_map_fails(self, aset_samples_initialized_w_checkout, data_map):
        aset = aset_samples_initialized_w_checkout['writtenaset']
        with pytest.raises(ValueError):
            aset.update(data_map)

    @pytest.mark.parametrize('key,value', [
        ['foo', {'bar': np.random.random((5, 7))}],
        ['foo', ('bar', np.random.random((5, 7)))],
        [('foo', 'bar'), np.random.random((5, 7))],
        ['foo', {('foo', 'bar'): np.random.random((5, 7))}],
        ['foo', ('bar', np.random.random((5, 7)))],
        ['foo', (np.random.random((5, 7)), 'bar')],
        [np.random.random((5, 7)), ('foo', 'bar')],
        [('foo', np.random.random((5, 7)), 'bar'), ('valid', np.random.random((5, 7)))],
        [('valid', np.random.random((5, 7))), ('foo', np.random.random((5, 7)), 'bar')],
        [('valid', np.random.random((5, 7))), ('valid2', np.random.random((5, 7)))],
    ])
    def test_setitem_with_invalid_data_map_fails(self, aset_samples_initialized_w_checkout, key, value):
        aset = aset_samples_initialized_w_checkout['writtenaset']
        with pytest.raises(ValueError):
            aset[key] = value

    def test_add_multiple_data_single_commit(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        new_array = np.zeros_like(array5by7)
        co.columns['writtenaset']['2'] = new_array
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        aset = co.columns['writtenaset']
        assert len(aset) == 2
        assert list(aset.keys()) == ['1', '2']
        assert_equal(aset['1'], array5by7)
        co.close()

    def test_add_same_data_same_key_does_not_duplicate_hash(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset = co.columns['writtenaset']
        aset['1'] = array5by7
        old_spec = aset._samples['1']
        aset['1'] = array5by7
        new_spec = aset._samples['1']
        assert old_spec == new_spec
        assert len(aset) == 1
        assert len(aset._samples) == 1
        co.close()

    def test_multiple_data_multiple_commit(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.commit('this is a commit message')
        new_array = np.zeros_like(array5by7)
        co.columns['writtenaset']['2'] = new_array
        co.close()

        new_new_array = new_array + 5
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['3'] = new_new_array
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        aset = co.columns['writtenaset']
        assert_equal(aset['1'], array5by7)
        assert_equal(aset['2'], new_array)
        assert_equal(aset['3'], new_new_array)
        co.close()

    def test_added_but_not_commited(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.close()

        with pytest.raises(PermissionError):
            co.commit('this is a commit message')

        co = aset_samples_initialized_repo.checkout()
        aset = co.columns['writtenaset']
        with pytest.raises(KeyError):
            aset['1']
        co.close()

    def test_remove_data(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.columns['writtenaset']['2'] = array5by7 + 1
        co.columns['writtenaset']['3'] = array5by7 + 2
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        del co.columns['writtenaset']['1']
        del co.columns['writtenaset']['3']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        with pytest.raises(KeyError):
            co.columns['writtenaset']['1']
        with pytest.raises(KeyError):
            co.columns['writtenaset']['3']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        assert_equal(co.columns['writtenaset']['2'], array5by7 + 1)
        co.close()

    def test_remove_data_multiple_items(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.columns['writtenaset']['2'] = array5by7 + 1
        co.columns['writtenaset']['3'] = array5by7 + 2
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        with pytest.raises(KeyError):
            del co.columns['writtenaset'][('1', '3')]
        assert '1' in co.columns['writtenaset']
        assert '3' in co.columns['writtenaset']
        del co.columns['writtenaset']['1']
        del co.columns['writtenaset']['3']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        with pytest.raises(KeyError):
            co.columns['writtenaset']['1']
        with pytest.raises(KeyError):
            co.columns['writtenaset']['3']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        assert_equal(co.columns['writtenaset']['2'], array5by7 + 1)
        co.close()

    def test_pop_data(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.columns['writtenaset']['2'] = array5by7 + 1
        co.columns['writtenaset']['3'] = array5by7 + 2
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        res = co.columns['writtenaset'].pop('1')
        assert_equal(res, array5by7)

        aset = co.columns['writtenaset']
        res = aset.pop('3')
        assert_equal(res, array5by7 + 2)

        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        with pytest.raises(KeyError):
            co.columns['writtenaset']['1']
        with pytest.raises(KeyError):
            co.columns['writtenaset']['3']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        assert_equal(co.columns['writtenaset']['2'], array5by7 + 1)
        co.close()

    def test_pop_data_multiple_items(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        co.columns['writtenaset']['2'] = array5by7 + 1
        co.columns['writtenaset']['3'] = array5by7 + 2
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 3
        with pytest.raises(TypeError):
            co.columns['writtenaset'].pop('1', '3')
        res = co.columns['writtenaset'].pop('1')
        assert_equal(res, array5by7)
        res = co.columns['writtenaset'].pop('3')
        assert_equal(res, array5by7 + 2)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        with pytest.raises(KeyError):
            co.columns['writtenaset']['1']
        with pytest.raises(KeyError):
            co.columns['writtenaset']['3']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        assert_equal(co.columns['writtenaset']['2'], array5by7 + 1)
        co.close()

    def test_remove_all_data(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        new_array = np.zeros_like(array5by7)
        co.columns['writtenaset']['2'] = new_array
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 2
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 2
        del co.columns['writtenaset']['1']
        del co.columns['writtenaset']['2']
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 0

        wset = co.columns['writtenaset']
        del co.columns['writtenaset']

        assert len(co.columns) == 0
        with pytest.raises(KeyError):
            len(co.columns['writtenaset'])
        co.commit('this is a commit message')
        co.close()

        # recreating same and verifying
        co = aset_samples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 0
        co.add_ndarray_column('writtenaset', prototype=array5by7)
        co.columns['writtenaset']['1'] = array5by7
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        co.commit('this is a commit message')
        co.close()

        co = aset_samples_initialized_repo.checkout()
        assert_equal(co.columns['writtenaset']['1'], array5by7)
        assert len(co.columns) == 1
        assert len(co.columns['writtenaset']) == 1
        co.close()

    def test_remove_data_nonexistant_sample_key_raises(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset']['1'] = array5by7
        new_array = np.zeros_like(array5by7)
        co.columns['writtenaset']['2'] = new_array
        co.columns['writtenaset']['3'] = new_array + 5
        with pytest.raises(KeyError):
            del co.columns['writtenaset']['doesnotexist']
        co.commit('this is a commit message')
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    def test_multiple_columns_single_commit(
            self, aset1_backend, aset2_backend, aset_samples_initialized_repo, randomsizedarray
    ):
        co = aset_samples_initialized_repo.checkout(write=True)
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray, backend=aset1_backend)
        aset2 = co.add_ndarray_column('aset2', prototype=randomsizedarray, backend=aset2_backend)
        aset1['arr'] = randomsizedarray
        aset2['arr'] = randomsizedarray
        co.commit('this is a commit message')
        co.close()
        co = aset_samples_initialized_repo.checkout()
        assert_equal(co.columns['aset1']['arr'], randomsizedarray)
        assert_equal(co.columns['aset2']['arr'], randomsizedarray)
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    def test_prototype_and_shape(self, aset1_backend, aset2_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray, backend=aset1_backend)
        aset2 = co.add_ndarray_column('aset2', shape=randomsizedarray.shape, dtype=randomsizedarray.dtype, backend=aset2_backend)

        newarray = np.random.random(randomsizedarray.shape).astype(randomsizedarray.dtype)
        aset1['arr1'] = newarray
        aset2['arr'] = newarray
        co.commit('this is a commit message')
        co.close()

        co = repo.checkout()
        assert_equal(co.columns['aset1']['arr1'], newarray)
        assert_equal(co.columns['aset2']['arr'], newarray)
        co.close()

    def test_samples_without_name(self, repo, randomsizedarray):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=randomsizedarray)
        with pytest.raises(TypeError):
            aset[randomsizedarray]

        aset_no_name = co.add_ndarray_column('aset_no_name', prototype=randomsizedarray)
        added = aset_no_name.append(randomsizedarray)
        assert_equal(next(aset_no_name.values()), randomsizedarray)
        assert_equal(aset_no_name[added], randomsizedarray)
        co.close()

    def test_append_samples(self, repo, randomsizedarray):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=randomsizedarray)
        with pytest.raises((ValueError, TypeError)):
            aset[randomsizedarray]

        aset_no_name = co.add_ndarray_column('aset_no_name', prototype=randomsizedarray)
        generated_key = aset_no_name.append(randomsizedarray)
        assert generated_key in aset_no_name
        assert len(aset_no_name) == 1
        assert_equal(aset_no_name[generated_key], randomsizedarray)
        co.close()

    def test_different_data_types_and_shapes(self, repo):
        co = repo.checkout(write=True)
        shape = (2, 3)
        dtype = np.int
        another_dtype = np.float64
        another_shape = (3, 4)
        arr = np.random.random(shape).astype(dtype)
        aset = co.add_ndarray_column('aset', shape=shape, dtype=dtype)
        aset['1'] = arr

        newarr = np.random.random(shape).astype(another_dtype)
        with pytest.raises(ValueError):
            aset['2'] = newarr

        newarr = np.random.random(another_shape).astype(dtype)
        with pytest.raises(ValueError):
            aset['3'] = newarr
        co.close()

    def test_add_sample_with_non_numpy_array_data_fails(self, aset_samples_initialized_repo):
        co = aset_samples_initialized_repo.checkout(write=True)
        with pytest.raises(ValueError, match='`data` argument type'):
            co.columns['writtenaset'][1] = [[1, 2, 3, 4, 5, 6, 7] for i in range(5)]
        co.close()

    def test_add_sample_with_fortran_order_data_fails(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        with pytest.raises(ValueError, match='`data` must be "C" contiguous array.'):
            co.columns['writtenaset'][1] = np.asfortranarray(array5by7)
        co.close()

    def test_add_sample_with_dimension_rank_fails(self, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', shape=(2, 3), dtype=np.float32, variable_shape=True)
        arr = np.random.randn(2, 3, 2).astype(np.float32)
        with pytest.raises(ValueError, match='data rank 3 != aset rank 2'):
            aset[1] = arr
        co.close()

    def test_add_sample_with_dimension_exceeding_max_fails(self, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', shape=(2, 3), dtype=np.float32, variable_shape=True)
        arr = np.random.randn(2, 4).astype(np.float32)
        with pytest.raises(ValueError, match='exceeds schema max'):
            aset[1] = arr
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_writer_context_manager_column_add_sample(self, aset_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=randomsizedarray, backend=aset_backend)
        with co.columns['aset'] as aset:
            aset['1'] = randomsizedarray
        co.commit('this is a commit message')
        co.close()
        co = repo.checkout()
        assert_equal(co.columns['aset']['1'], randomsizedarray)
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_column_context_manager_aset_sample_add(self, aset_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('aset', prototype=randomsizedarray, backend=aset_backend)
        with co.columns['aset'] as aset:
            aset['1'] = randomsizedarray
            aset['2'] = randomsizedarray + 1
        co.commit('this is a commit message')
        co.close()

        co = repo.checkout()
        assert_equal(co.columns['aset']['1'], randomsizedarray)
        assert np.allclose(co.columns['aset'].get('2'), randomsizedarray + 1)
        co.close()

    def test_writer_column_properties_are_correct(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        assert co.columns.iswriteable is True
        d = co.columns['writtenaset']
        assert d.column =='writtenaset'
        assert d.dtype == array5by7.dtype
        assert np.allclose(d.shape, array5by7.shape) is True
        assert d.schema_type == 'fixed_shape'
        assert d.iswriteable is True
        assert d.backend == '01'
        assert isinstance(d.backend_options, dict)
        assert len(d.backend_options) > 0
        assert d.contains_subsamples is False
        assert d.remote_reference_keys == ()
        assert d.contains_remote_references is False
        co.close()

    def test_reader_column_properties_are_correct(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=False)
        assert co.columns.iswriteable is False
        d = co.columns['writtenaset']
        assert d.column =='writtenaset'
        assert d.dtype == array5by7.dtype
        assert np.allclose(d.shape, array5by7.shape) is True
        assert d.schema_type == 'fixed_shape'
        assert d.iswriteable is False
        assert d.backend == '01'
        assert isinstance(d.backend_options, dict)
        assert len(d.backend_options) > 0
        assert d.contains_subsamples is False
        assert d.remote_reference_keys == ()
        assert d.contains_remote_references is False

    def test_iter_column_samples_yields_keys(self, aset_samples_initialized_repo, array5by7):
        co = aset_samples_initialized_repo.checkout(write=True)
        co.columns['writtenaset'][0] = array5by7
        new_array = np.zeros_like(array5by7)
        co.columns['writtenaset'][1] = new_array
        co.columns['writtenaset'][2] = new_array + 5

        for idx, sname in enumerate(iter(co.columns['writtenaset'])):
            assert sname == idx
        assert idx == 2
        co.close()

    def test_iter_columns_yields_aset_names(self, repo_20_filled_samples):
        co = repo_20_filled_samples.checkout(write=True)
        for k in iter(co.columns):
            assert k in ['second_aset', 'writtenaset']
        co.close()

    def test_set_item_column_fails(self, aset_samples_initialized_repo):
        co = aset_samples_initialized_repo.checkout(write=True)
        with pytest.raises(AttributeError):
            co.columns['newaset'] = co.columns['writtenaset']
        co.close()


class TestVariableSizedColumn(object):

    @pytest.mark.parametrize(
        'test_shapes,max_shape',
        [[[(2, 5), (1, 10), (10, 1), (5, 2)], (10, 10)],
         [[(10,), (10,)], (10,)],
         [[(3, 3, 3), (27, 1, 1), (1, 27, 1), (1, 1, 27), (3, 9, 1), (9, 3, 1), (1, 3, 9), (1, 9, 3)], (27, 27, 27)]])
    @pytest.mark.parametrize("dtype1", [np.uint8, np.float32, np.int32])
    @pytest.mark.parametrize("dtype2", [np.uint8, np.float32, np.int32])
    @pytest.mark.parametrize('backend1', variable_shape_backend_params)
    @pytest.mark.parametrize('backend2', variable_shape_backend_params)
    def test_write_all_zeros_same_size_different_shape_does_not_store_as_identical_hashs(
        self, repo, test_shapes, max_shape, dtype1, dtype2, backend1, backend2
    ):
        wco = repo.checkout(write=True)
        aset1 = wco.add_ndarray_column('aset1', shape=max_shape, dtype=dtype1, variable_shape=True, backend=backend1)
        aset2 = wco.add_ndarray_column('aset2', shape=max_shape, dtype=dtype2, variable_shape=True, backend=backend2)

        arrdict1, arrdict2 = {}, {}
        for idx, shape in enumerate(test_shapes):
            arr1 = np.zeros(shape, dtype=dtype1)
            arr2 = np.zeros(shape, dtype=dtype2)
            arrdict1[idx] = arr1
            arrdict2[idx] = arr2
            aset1[idx] = arr1
            aset2[idx] = arr2

        for k, v in arrdict1.items():
            # make sure they are good before committed
            res = aset1[k]
            assert res.dtype == v.dtype
            assert res.shape == v.shape
            assert_equal(res, v)
        for k, v in arrdict2.items():
            # make sure they are good before committed
            res = aset2[k]
            assert res.dtype == v.dtype
            assert res.shape == v.shape
            assert_equal(res, v)

        wco.commit('first')

        for k, v in arrdict1.items():
            # make sure they are good before committed
            res = aset1[k]
            assert res.dtype == v.dtype
            assert res.shape == v.shape
            assert_equal(res, v)
        for k, v in arrdict2.items():
            # make sure they are good before committed
            res = aset2[k]
            assert res.dtype == v.dtype
            assert res.shape == v.shape
            assert_equal(res, v)

        wco.close()
        rco = repo.checkout()
        naset1 = rco.columns['aset1']
        naset2 = rco.columns['aset2']

        for k, v in arrdict1.items():
            # make sure they are good before committed
            res = naset1[k]
            assert res.dtype == v.dtype
            assert res.shape == v.shape
            assert_equal(res, v)
        for k, v in arrdict2.items():
            # make sure they are good before committed
            res = naset2[k]
            assert res.dtype == v.dtype
            assert res.shape == v.shape
            assert_equal(res, v)
        rco.close()

    @pytest.mark.parametrize(
        'test_shapes,shape',
        [[[(10, 10), (1, 10), (2, 2), (3, 5), (1, 1), (10, 1)], (10, 10)],
         [[(10,), (1,), (5,)], (10,)],
         [[(100, 100, 100), (100, 100, 1), (100, 1, 100), (1, 100, 100), (1, 1, 1), (34, 6, 3)], (100, 100, 100)]])
    @pytest.mark.parametrize("dtype", [np.uint8, np.float32])
    @pytest.mark.parametrize('backend', variable_shape_backend_params)
    def test_writer_can_create_variable_size_column(
        self, aset_samples_initialized_repo, dtype, test_shapes, shape, backend
    ):
        repo = aset_samples_initialized_repo
        wco = repo.checkout(write=True)
        wco.add_ndarray_column('varaset', shape=shape, dtype=dtype, variable_shape=True, backend=backend)
        d = wco.columns['varaset']

        arrdict = {}
        for idx, shape in enumerate(test_shapes):
            arr = (np.random.random_sample(shape) * 10).astype(dtype)
            arrdict[str(idx)] = arr
            d[str(idx)] = arr

        for k, v in arrdict.items():
            # make sure they are good before committed
            assert_equal(d[k], v)

        wco.commit('first')

        for k, v in arrdict.items():
            # make sure they can work after commit
            assert_equal(d[k], v)
        wco.close()

    @pytest.mark.parametrize('test_shapes,shape', [
        [[(10, 10), (1, 10), (2, 2), (3, 5), (1, 1), (10, 1)], (10, 10)],
        [[(10,), (1,), (5,)], (10,)],
        [[(100, 100, 100), (100, 100, 1), (100, 1, 100), (1, 100, 100), (1, 1, 1), (34, 6, 3)], (100, 100, 100)]
    ])
    @pytest.mark.parametrize("dtype", [np.uint8, np.float32])
    @pytest.mark.parametrize('backend', variable_shape_backend_params)
    def test_reader_recieves_expected_values_for_variable_size_column(
        self, aset_samples_initialized_repo, dtype, test_shapes, shape, backend
    ):
        repo = aset_samples_initialized_repo
        wco = repo.checkout(write=True)
        wco.add_ndarray_column('varaset', shape=shape, dtype=dtype, variable_shape=True, backend=backend)
        wd = wco.columns['varaset']

        arrdict = {}
        for idx, shape in enumerate(test_shapes):
            arr = (np.random.random_sample(shape) * 10).astype(dtype)
            arrdict[str(idx)] = arr
            wd[str(idx)] = arr

        for k, v in arrdict.items():
            # make sure they are good before committed
            assert_equal(wd[k], v)

        wco.commit('first')
        rco = repo.checkout()
        rd = rco.columns['varaset']

        for k, v in arrdict.items():
            # make sure they can work after commit
            assert_equal(wd[k], v)
            assert_equal(rd[k], v)
        wco.close()
        rco.close()

    @pytest.mark.parametrize('aset_specs', [
        [['aset1', [(10, 10), (1, 10), (2, 2), (3, 5), (1, 1), (10, 1)], (10, 10)],
         ['aset2', [(10,), (1,), (5,)], (10,)]],
        [['aset1', [(100, 100), (1, 100), (20, 20), (30, 50), (1, 10), (10, 1)], (100, 100)],
         ['aset2', [(100,), (1,), (50,)], (100,)]]])
    @pytest.mark.parametrize('backends', permutations(variable_shape_backend_params, 2))
    @pytest.mark.parametrize('dtype', [np.float32, np.uint8])
    def test_writer_reader_can_create_read_multiple_variable_size_column(
        self, aset_samples_initialized_repo, aset_specs, backends, dtype
    ):
        repo = aset_samples_initialized_repo
        wco = repo.checkout(write=True)
        arrdict = {}
        for backend, aset_spec in zip(backends, aset_specs):
            aset_name, test_shapes, max_shape = aset_spec
            wco.add_ndarray_column(
                aset_name, shape=max_shape, dtype=dtype, variable_shape=True, backend=backend)

            arrdict[aset_name] = {}
            for idx, shape in enumerate(test_shapes):
                arr = (np.random.random_sample(shape) * 10).astype(dtype)
                arrdict[aset_name][str(idx)] = arr
                wco.columns[aset_name][str(idx)] = arr

        for aset_k in arrdict.keys():
            for samp_k, v in arrdict[aset_k].items():
                # make sure they are good before committed
                assert_equal(wco.columns[aset_k][samp_k], v)

        wco.commit('first')
        rco = repo.checkout()

        for aset_k in arrdict.keys():
            for samp_k, v in arrdict[aset_k].items():
                # make sure they are good before committed
                assert_equal(wco.columns[aset_k][samp_k], v)
                assert_equal(rco.columns[aset_k][samp_k], v)
        wco.close()
        rco.close()

    def test_writer_column_properties_are_correct(self, aset_samples_var_shape_initialized_repo):
        co = aset_samples_var_shape_initialized_repo.checkout(write=True)
        d = co.columns['writtenaset']
        assert d.column =='writtenaset'
        assert d.dtype == np.float64
        assert np.allclose(d.shape, (10, 10))
        assert d.schema_type == 'variable_shape'
        assert d.iswriteable is True
        assert d.backend in variable_shape_backend_params
        assert isinstance(d.backend_options, dict)
        assert d.contains_subsamples is False
        assert d.remote_reference_keys == ()
        assert d.contains_remote_references is False
        co.close()

    def test_reader_column_properties_are_correct(self, aset_samples_var_shape_initialized_repo):
        co = aset_samples_var_shape_initialized_repo.checkout(write=False)
        d = co.columns['writtenaset']
        assert d.column =='writtenaset'
        assert d.dtype == np.float64
        assert np.allclose(d.shape, (10, 10))
        assert d.schema_type == 'variable_shape'
        assert d.iswriteable is False
        assert d.backend in variable_shape_backend_params
        assert isinstance(d.backend_options, dict)
        assert d.contains_subsamples is False
        assert d.remote_reference_keys == ()
        assert d.contains_remote_references is False
        co.close()


class TestMultiprocessColumnReads(object):

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_external_multi_process_pool(self, repo, backend):
        from multiprocessing import get_context

        masterCmtList = []
        co = repo.checkout(write=True)
        co.add_ndarray_column(name='writtenaset', shape=(20, 20), dtype=np.float32, backend=backend)
        masterSampList = []
        for cIdx in range(2):
            if cIdx != 0:
                co = repo.checkout(write=True)
            with co.columns['writtenaset'] as d:
                kstart = 20 * cIdx
                for sIdx in range(20):
                    arr = np.random.randn(20, 20).astype(np.float32) * 100
                    sName = str(sIdx + kstart)
                    d[sName] = arr
                    masterSampList.append(arr)
            assert d.backend == backend
            cmt = co.commit(f'master commit number: {cIdx}')
            masterCmtList.append((cmt, list(masterSampList)))
            co.close()

        cmtIdx = 0
        for cmt, sampList in masterCmtList:
            nco = repo.checkout(write=False, commit=cmt)
            ds = nco.columns['writtenaset']
            keys = [str(i) for i in range(20 + (20*cmtIdx))]
            with get_context().Pool(2) as P:
                cmtData = P.map(ds.get, keys)
            for data, sampData in zip(cmtData, sampList):
                assert_equal(data, sampData) is True
            cmtIdx += 1
            nco.close()

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_external_multi_process_pool_fails_on_write_enabled_checkout(self, repo, backend):
        from multiprocessing import get_context

        co = repo.checkout(write=True)
        co.add_ndarray_column(name='writtenaset', shape=(20, 20), dtype=np.float32, backend=backend)
        with co.columns['writtenaset'] as d:
            for sIdx in range(20):
                d[sIdx] = np.random.randn(20, 20).astype(np.float32) * 100
        assert d.backend == backend
        co.commit(f'master commit number 1')
        co.close()

        nco = repo.checkout(write=True)
        ds = nco.columns['writtenaset']
        keys = [i for i in range(20)]
        with pytest.raises(PermissionError):
            with get_context().Pool(2) as P:
                cmtData = P.map(ds.get, keys)
        nco.close()


    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_multiprocess_get_succeeds_on_superset_and_subset_of_keys(self, repo, backend):
        from multiprocessing import get_context

        co = repo.checkout(write=True)
        co.add_ndarray_column(name='writtenaset', shape=(20, 20), dtype=np.float32, backend=backend)
        masterSampList = []
        with co.columns['writtenaset'] as d:
            for sIdx in range(20):
                arr = np.random.randn(20, 20).astype(np.float32) * 100
                d[sIdx] = arr
                masterSampList.append(arr)
            assert d.backend == backend
        cmt = co.commit(f'master commit number one')
        co.close()

        nco = repo.checkout(write=False, commit=cmt)
        ds = nco.columns['writtenaset']

        # superset of keys fails
        keys = [i for i in range(24)]
        with get_context().Pool(2) as P:
            cmtData = P.map(ds.get, keys)
        for idx, data in enumerate(cmtData):
            if idx >= 20:
                assert data is None
            else:
                assert_equal(data, masterSampList[idx])

        # subset of keys works
        keys = [i for i in range(10, 20)]
        with get_context().Pool(2) as P:
            cmtData = P.map(ds.get, keys)
        for idx, data in enumerate(cmtData):
            assert_equal(data, masterSampList[10+idx])
        nco.close()

    def test_writer_iterating_over_keys_can_have_additions_made_no_error(self, two_commit_filled_samples_repo):
        # do not want ``RuntimeError dictionary changed size during iteration``

        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        aset = co.columns['writtenaset']
        with aset as ds:
            for idx, k in enumerate(ds.keys()):
                if idx == 0:
                    ds['1232'] = np.random.randn(5, 7).astype(np.float32)
                assert '1232' != k

        added_key_exists_on_later_iteration = False
        for k in aset.keys():
            if k == '1232':
                added_key_exists_on_later_iteration = True
                break
        assert added_key_exists_on_later_iteration is True
        co.close()

    def test_writer_iterating_over_values_can_have_additions_made_no_error(self, two_commit_filled_samples_repo):
        # do not want ``RuntimeError dictionary changed size during iteration``

        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        aset = co.columns['writtenaset']
        mysample = np.random.randn(5, 7).astype(np.float32)
        with aset as ds:
            for idx, v in enumerate(ds.values()):
                if idx == 0:
                    ds['1232'] = mysample
                assert not np.allclose(v, mysample)

        added_value_exists_on_later_iteration = False
        for v in aset.values():
            if np.allclose(v, mysample):
                added_value_exists_on_later_iteration = True
                break
        assert added_value_exists_on_later_iteration is True
        co.close()

    def test_writer_iterating_over_items_can_have_additions_made_no_error(self, two_commit_filled_samples_repo):
        # do not want ``RuntimeError dictionary changed size during iteration``

        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=True)
        aset = co.columns['writtenaset']
        mysample = np.random.randn(5, 7).astype(np.float32)
        with aset as ds:
            for idx, kv in enumerate(ds.items()):
                if idx == 0:
                    ds['1232'] = mysample
                k, v = kv
                assert not np.allclose(v, mysample)
                assert k != '1232'

        added_value_exists_on_later_iteration = False
        for k, v in aset.items():
            if (k == '1232') and np.allclose(v, mysample):
                added_value_exists_on_later_iteration = True
                break
        assert added_value_exists_on_later_iteration is True
        co.close()

    def test_reader_iterating_over_items_can_not_make_additions(self, two_commit_filled_samples_repo):
        # do not want ``RuntimeError dictionary changed size during iteration``

        repo = two_commit_filled_samples_repo
        co = repo.checkout(write=False)
        aset = co.columns['writtenaset']
        mysample = np.random.randn(5, 7).astype(np.float32)
        with aset as ds:
            for idx, kv in enumerate(ds.items()):
                if idx == 0:
                    with pytest.raises(TypeError):
                        ds['1232'] = mysample
                k, v = kv
                assert not np.allclose(v, mysample)
                assert k != '1232'

        assert '1232' not in aset
        co.close()


================================================
FILE: tests/test_column_backends.py
================================================
import pytest
import numpy as np
from conftest import fixed_shape_backend_params


@pytest.mark.parametrize('backend', fixed_shape_backend_params)
def test_backend_property_reports_correct_backend(repo, array5by7, backend):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column('aset', prototype=array5by7, backend=backend)
    assert aset.backend == backend
    aset[0] = array5by7
    wco.commit('first')
    wco.close()

    rco = repo.checkout()
    naset = rco.columns['aset']
    assert naset.backend == backend
    rco.close()


@pytest.mark.parametrize('backend', fixed_shape_backend_params)
def test_setting_backend_property_cannot_change_backend(repo, array5by7, backend):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column('aset', prototype=array5by7, backend=backend)
    assert aset.backend == backend
    aset[0] = array5by7
    with pytest.raises(AttributeError):
        aset.backend = 'foo'
    wco.commit('first')
    wco.close()

    rco = repo.checkout()
    naset = rco.columns['aset']
    assert naset.backend == backend
    with pytest.raises(AttributeError):
        naset.backend = 'foo'
    rco.close()


@pytest.mark.parametrize('subsamples', [True, False])
@pytest.mark.parametrize('backend', fixed_shape_backend_params)
def test_setting_backend_opts_property_cannot_change_backend_opts(repo, array5by7, backend, subsamples):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', prototype=array5by7, backend=backend, contains_subsamples=subsamples)
    if subsamples:
        aset.update({0: {0: array5by7}})
    else:
        aset[0] = array5by7
    with pytest.raises(AttributeError):
        aset.backend_options = {'foo': 'bar'}
    wco.commit('first')
    wco.close()

    rco = repo.checkout()
    naset = rco.columns['aset']
    assert naset.backend == backend
    with pytest.raises(AttributeError):
        naset.backend = {'foo': 'bar'}
    rco.close()


@pytest.mark.parametrize('shape,dtype,variable_shape,expected_backend', [
    [(10,), np.uint16, True, '10'],
    [(1000,), np.uint16, True, '00'],
    [(1000,), np.uint16, False, '00'],
    [(9_999_999,), np.uint8, False, '00'],
    [(10_000_000,), np.uint8, False, '00'],
    [(10_000_001,), np.uint8, False, '01'],
    [(10_000_001,), np.uint8, True, '00'],
    [(2, 2), np.uint16, True, '00'],
    [(2, 2), np.uint16, False, '01'],
    [(5, 2), np.uint16, True, '00'],
    [(5, 2), np.uint16, False, '01'],
])
@pytest.mark.parametrize('subsamples', [True, False])
def test_heuristics_select_backend(repo, shape, dtype, variable_shape, expected_backend, subsamples):
    wco = repo.checkout(write=True)
    prototype = np.ones(shape, dtype=dtype)
    aset = wco.add_ndarray_column(
        'aset', prototype=prototype, variable_shape=variable_shape, contains_subsamples=subsamples)
    assert aset.backend == expected_backend
    if subsamples:
        aset.update({'0': {'0': prototype}})
    else:
        aset['0'] = prototype
    wco.commit('first commit')
    assert aset.backend == expected_backend
    if subsamples:
        assert np.allclose(prototype, aset['0']['0'])
    else:
        assert np.allclose(prototype, aset['0'])
    wco.close()

    nwco = repo.checkout(write=True)
    naset = nwco.columns['aset']
    assert naset.backend == expected_backend
    if subsamples:
        assert np.allclose(prototype, naset['0']['0'])
    else:
        assert np.allclose(prototype, naset['0'])
    nwco.close()


@pytest.mark.parametrize('prototype', [np.random.randn(10), np.random.randn(1000), np.random.randn(2, 2)])
@pytest.mark.parametrize('backend', fixed_shape_backend_params)
@pytest.mark.parametrize('subsamples', [True, False])
def test_manual_override_heuristics_select_backend(repo, prototype, backend, subsamples):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', prototype=prototype, backend=backend, contains_subsamples=subsamples)
    assert aset.backend == backend
    if subsamples:
        aset.update({'0': {'0': prototype}})
    else:
        aset['0'] = prototype
    wco.commit('first commit')
    assert aset.backend == backend
    if subsamples:
        assert np.allclose(prototype, aset['0']['0'])
    else:
        assert np.allclose(prototype, aset['0'])
    wco.close()

    nwco = repo.checkout(write=True)
    naset = nwco.columns['aset']
    assert naset.backend == backend
    if subsamples:
        assert np.allclose(prototype, naset['0']['0'])
    else:
        assert np.allclose(prototype, naset['0'])
    nwco.close()


def test_manual_override_heuristics_invalid_value_raises_error(repo):

    wco = repo.checkout(write=True)
    with pytest.raises(ValueError):
        wco.add_ndarray_column('aset', prototype=np.arange(10), backend='ERROR')
    wco.close()


@pytest.mark.parametrize('backendStart', fixed_shape_backend_params)
@pytest.mark.parametrize('backendEnd', fixed_shape_backend_params)
@pytest.mark.parametrize('subsamples', [True, False])
def test_manual_change_backends_after_write_works(repo, array5by7, backendStart, backendEnd, subsamples):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', prototype=array5by7, backend=backendStart, contains_subsamples=subsamples)
    assert aset.backend == backendStart
    if subsamples:
        aset.update({0: {0: array5by7}})
    else:
        aset[0] = array5by7
    wco.commit('first commit')
    assert aset.backend == backendStart
    if subsamples:
        assert np.allclose(array5by7, aset[0][0])
    else:
        assert np.allclose(array5by7, aset[0])
    wco.close()

    nwco = repo.checkout(write=True)
    naset = nwco.columns['aset']
    assert naset.backend == backendStart

    naset.change_backend(backend=backendEnd)
    if subsamples:
        naset.update({1: {1: array5by7+1}})
    else:
        naset[1] = array5by7 + 1

    assert naset.backend == backendEnd
    if subsamples:
        assert np.allclose(array5by7, naset[0][0])
        assert np.allclose(array5by7+1, naset[1][1])
    else:
        assert np.allclose(array5by7, naset[0])
        assert np.allclose(array5by7+1, naset[1])
    nwco.commit('second')
    nwco.close()

    rco = repo.checkout()
    assert rco.columns['aset'].backend == backendEnd
    rco.close()


@pytest.mark.parametrize('backendStart', fixed_shape_backend_params)
@pytest.mark.parametrize('backendFail', ['lmao', '000'])
@pytest.mark.parametrize('subsamples', [True, False])
def test_manual_change_backend_to_invalid_fmt_code_fails(repo, array5by7, backendStart, backendFail, subsamples):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', prototype=array5by7, backend=backendStart, contains_subsamples=subsamples)
    assert aset.backend == backendStart
    if subsamples:
        aset[0] = {0: array5by7}
    else:
        aset[0] = array5by7
    wco.commit('first commit')
    assert aset.backend == backendStart
    if subsamples:
        assert np.allclose(array5by7, aset[0][0])
    else:
        assert np.allclose(array5by7, aset[0])
    wco.close()

    nwco = repo.checkout(write=True)
    naset = nwco.columns['aset']
    assert naset.backend == backendStart

    with pytest.raises(ValueError):
        naset.change_backend(backend=backendFail)
    assert naset.backend == backendStart
    if subsamples:
        naset[1] = {1: array5by7+1}
    else:
        naset[1] = array5by7 + 1

    if subsamples:
        assert np.allclose(array5by7, naset[0][0])
        assert np.allclose(array5by7 + 1, naset[1][1])
    else:
        assert np.allclose(array5by7, naset[0])
        assert np.allclose(array5by7 + 1, naset[1])
    nwco.commit('second')
    nwco.close()


@pytest.mark.parametrize('backendStart', fixed_shape_backend_params)
@pytest.mark.parametrize('backendEnd', fixed_shape_backend_params)
@pytest.mark.parametrize('subsamples', [True, False])
def test_manual_change_backend_fails_while_in_cm(repo, array5by7, backendStart, backendEnd, subsamples):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', prototype=array5by7, backend=backendStart, contains_subsamples=subsamples)
    assert aset.backend == backendStart
    if subsamples:
        aset[0] = {0: array5by7}
    else:
        aset[0] = array5by7
    wco.commit('first commit')
    assert aset.backend == backendStart
    if subsamples:
        assert np.allclose(array5by7, aset[0][0])
    else:
        assert np.allclose(array5by7, aset[0])
    wco.close()

    nwco = repo.checkout(write=True)
    naset = nwco.columns['aset']
    assert naset.backend == backendStart

    with nwco as c:
        with pytest.raises(RuntimeError):
            c['aset'].change_backend(backend=backendEnd)
        with pytest.raises(RuntimeError):
            naset.change_backend(backend=backendEnd)
        with pytest.raises(RuntimeError):
            c.columns['aset'].change_backend(backend=backendEnd)
        with pytest.raises(RuntimeError):
            nwco.columns['aset'].change_backend(backend=backendEnd)

    with naset as na:
        with pytest.raises(RuntimeError):
            na.change_backend(backend=backendEnd)
        with pytest.raises(RuntimeError):
            naset.change_backend(backend=backendEnd)
        with pytest.raises(RuntimeError):
            nwco.columns['aset'].change_backend(backend=backendEnd)

    assert naset.backend == backendStart
    if subsamples:
        naset[1] = {1: array5by7+1}
    else:
        naset[1] = array5by7 + 1

    if subsamples:
        assert np.allclose(array5by7, naset[0][0])
        assert np.allclose(array5by7 + 1, naset[1][1])
    else:
        assert np.allclose(array5by7, naset[0])
        assert np.allclose(array5by7 + 1, naset[1])
    nwco.commit('second')
    nwco.close()



@pytest.fixture(scope='class')
def dummy_writer_checkout(classrepo):
    wco = classrepo.checkout(write=True)
    yield wco
    wco.close()


class TestComplibRestrictions:

    @pytest.mark.parametrize('backend', ['01', '00'])
    @pytest.mark.parametrize('subsamples', [True, False])
    @pytest.mark.parametrize('complib', [
        'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib', 'blosc:zstd'
    ])
    @pytest.mark.parametrize('dtype,shape', [
        [np.float32, (1, 1, 1)],
        [np.float32, (3,)],
        [np.float64, (1,)],
        [np.uint8, (15,)],
        [np.uint8, (3, 2, 2)],
    ])
    def test_schema_smaller_16_bytes_cannot_select_blosc_backend(
        self, dummy_writer_checkout, backend, complib, dtype, shape, subsamples
    ):
        wco = dummy_writer_checkout
        be_opts = {'complib': complib, 'complevel': 3, 'shuffle': 'byte'}

        # prototype spec
        with pytest.raises(ValueError, match='blosc clib requires'):
            proto = np.zeros(shape, dtype=dtype)
            wco.add_ndarray_column(
                'aset', prototype=proto, backend=backend,
                backend_options=be_opts, contains_subsamples=subsamples)

        # shape and dtype spec
        with pytest.raises(ValueError, match='blosc clib requires'):
            wco.add_ndarray_column(
                'aset', shape=shape, dtype=dtype, backend=backend,
                backend_options=be_opts, contains_subsamples=subsamples)


@pytest.mark.parametrize('backend', ['01', '00'])
@pytest.mark.parametrize('subsamples', [True, False])
@pytest.mark.parametrize('dtype,shape', [
    [np.float32, (1, 1, 1)],
    [np.float32, (3,)],
    [np.float64, (1,)],
    [np.uint8, (15,)],
    [np.uint8, (3, 2, 2)],
])
def test_schema_smaller_16_bytes_does_not_use_heuristic_to_select_blosc(
    repo, backend, dtype, shape, subsamples
):
    wco = repo.checkout(write=True)
    proto = np.zeros(shape, dtype=dtype)
    aset = wco.add_ndarray_column(
        'aset', prototype=proto, backend=backend, contains_subsamples=subsamples)
    bad_clibs = ['blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib', 'blosc:zstd']
    assert aset.backend_options['complib'] not in bad_clibs
    if subsamples:
        aset[0] = {0: proto}
    else:
        aset[0] = proto
    assert aset.backend_options['complib'] not in bad_clibs
    wco.close()


@pytest.mark.parametrize('backend', ['01', '00'])
@pytest.mark.parametrize('subsamples', [True, False])
@pytest.mark.parametrize('complib', [
    'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:zlib', 'blosc:zstd'
])
@pytest.mark.parametrize('dtype,shape', [
    [np.float32, (1, 1, 1)],
    [np.float32, (3,)],
    [np.float64, (1,)],
    [np.uint8, (15,)],
    [np.uint8, (3, 2, 2)],
])
def test_schema_smaller_16_bytes_cannot_change_to_blosc_backend(
    repo, backend, complib, shape, dtype, subsamples):

    wco = repo.checkout(write=True)
    aset = wco.add_ndarray_column(
        'aset', shape=shape, dtype=dtype, backend=backend, contains_subsamples=subsamples)
    proto = np.zeros(shape, dtype=dtype)
    if subsamples:
        aset[0] = {0: proto}
    else:
        aset[0] = proto

    be_opts = {'complib': complib, 'complevel': 3, 'shuffle': None}
    with pytest.raises(ValueError, match='blosc clib requires'):
        aset.change_backend(backend=backend, backend_options=be_opts)
    wco.close()


================================================
FILE: tests/test_column_definition_permutations.py
================================================
from collections import defaultdict
from functools import partial
import secrets
import string

import pytest
import numpy as np


def assert_equal(expected, actual):
    if isinstance(expected, (str, bytes)):
        assert expected == actual
    elif isinstance(expected, np.ndarray):
        assert np.allclose(expected, actual)
        assert expected.dtype == actual.dtype
    else:
        raise TypeError(f'unknown type of data {type(expected)}')


def ndarray_generate_data_fixed_shape(shape, dtype, low=0, high=255):
    arr = np.random.randint(low, high, size=shape, dtype=dtype)
    return arr


def ndarray_generate_data_variable_shape(shape, dtype, low=0, high=255):
    arr_dims = []
    for dim in shape:
        valid_dim_shapes = [i for i in range(1, dim + 1)]
        dimsize = secrets.choice(valid_dim_shapes)
        arr_dims.append(dimsize)
    arr_dims = tuple(arr_dims)
    arr = np.random.randint(low, high, size=arr_dims, dtype=dtype)
    return arr


def str_generate_data_variable_shape(
        length=20, *, _ALPHABET=''.join([string.ascii_letters, string.digits, string.punctuation, ' '])
):
    tokens = [secrets.choice(_ALPHABET) for i in range(length)]
    res = ''.join(tokens)
    return res


def bytes_generate_data_variable_shape(
        length=20, *,
        _ALPHABET=''.join([string.printable, '\x01', '\x12', '\x25', '\x26', '\x27', '\x91'])
):
    tokens = [secrets.choice(_ALPHABET) for i in range(length)]
    res = ''.join(tokens).encode()
    return res


column_settings = {
    'ndarray': {
        'fixed_shape': ['00', '01', '10'],
        'variable_shape': ['00', '10'],
    },
    'str': {
        'variable_shape': ['30']
    },
    'bytes': {
        'variable_shape': ['31']
    }
}


column_data_generators = {
    'ndarray': {
        'fixed_shape': ndarray_generate_data_fixed_shape,
        'variable_shape': ndarray_generate_data_variable_shape,
    },
    'str': {
        'variable_shape': str_generate_data_variable_shape
    },
    'bytes': {
        'variable_shape': bytes_generate_data_variable_shape
    }
}


column_layouts = {
    'ndarray': ['flat', 'nested'],
    'str': ['flat', 'nested'],
    'bytes': ['flat', 'nested']
}


def add_data_to_column(col, data_gen, nsamples, nsubsamples=None):
    column_data = {}
    for samp in range(nsamples):
        if nsubsamples is None:
            data = data_gen()
            column_data[samp] = data
            col[samp] = data
        else:
            column_data[samp] = {}
            for subsamp in range(nsubsamples):
                data = data_gen()
                column_data[samp][subsamp] = data
            col[samp] = column_data[samp]
    return column_data


@pytest.fixture(params=[1, 3])
def num_samples_gen(request):
    return request.param


@pytest.fixture(params=[1, 3])
def num_subsamples_gen(request):
    return request.param


@pytest.fixture()
def column_permutation_repo(repo, num_samples_gen, num_subsamples_gen):
    co = repo.checkout(write=True)
    nsamp = num_samples_gen
    nsubs = num_subsamples_gen

    column_name_partials = {}
    column_data_copy = defaultdict(dict)
    shape = (4, 4)
    dtype = np.uint8
    for col_dtype, schema_settings in column_settings.items():
        for layout in column_layouts[col_dtype]:
            for schema_type, valid_backends in schema_settings.items():
                for backend in valid_backends:
                    name = f'{col_dtype}_{layout}_{schema_type}_{backend}'
                    generator = column_data_generators[col_dtype][schema_type]
                    has_subs = False if layout == 'flat' else True
                    is_var = False if schema_type == 'fixed_shape' else True

                    if col_dtype == 'ndarray':
                        col = co.add_ndarray_column(name,
                                                    shape=shape,
                                                    dtype=dtype,
                                                    variable_shape=is_var,
                                                    contains_subsamples=has_subs,
                                                    backend=backend)
                        data_partial = partial(generator, shape, dtype)
                        if layout == 'flat':
                            column_data_copy[name] = add_data_to_column(col, data_partial, nsamp)
                        elif layout == 'nested':
                            column_data_copy[name] = add_data_to_column(col, data_partial, nsamp, nsubs)
                        else:
                            raise ValueError(f'invalid layout {layout}')
                    elif col_dtype == 'str':
                        col = co.add_str_column(name, contains_subsamples=has_subs, backend=backend)
                        data_partial = partial(generator)
                        if layout == 'flat':
                            column_data_copy[name] = add_data_to_column(col, data_partial, nsamp)
                        elif layout == 'nested':
                            column_data_copy[name] = add_data_to_column(col, data_partial, nsamp, nsubs)
                    elif col_dtype == 'bytes':
                        col = co.add_bytes_column(name, contains_subsamples=has_subs, backend=backend)
                        data_partial = partial(generator)
                        if layout == 'flat':
                            column_data_copy[name] = add_data_to_column(col, data_partial, nsamp)
                        elif layout == 'nested':
                            column_data_copy[name] = add_data_to_column(col, data_partial, nsamp, nsubs)
                    else:
                        raise ValueError(f'column dtype {col_dtype} invalid')

                    column_name_partials[name] = data_partial

    co.commit('first')
    co.close()
    yield repo, column_data_copy, column_name_partials


@pytest.fixture(params=[True, False])
def column_permutations_read_write_checkout(request, column_permutation_repo):
    repo, column_data, column_data_partials = column_permutation_repo
    co = repo.checkout(write=request.param)
    yield co, column_data, column_data_partials
    co.close()


@pytest.fixture()
def column_permutations_write_checkout(column_permutation_repo):
    repo, column_data, column_data_partials = column_permutation_repo
    co = repo.checkout(write=True)
    yield co, column_data, column_data_partials
    co.close()


@pytest.mark.parametrize('column_type,column_kwargs', [
    ('ndarray', {'prototype': np.array([1, 2, 3])}),
    ('str', {}),
    ('bytes', {}),
])
@pytest.mark.parametrize('contains_subsamples', [True, False])
def test_cannot_create_column_within_cm(repo, column_type, column_kwargs, contains_subsamples):
    co = repo.checkout(write=True)
    with co:
        with pytest.raises(PermissionError):
            if column_type == 'ndarray':
                co.add_ndarray_column(
                    'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
            elif column_type == 'str':
                co.add_str_column(
                    'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
            elif column_type == 'bytes':
                co.add_bytes_column(
                    'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
            else:
                raise ValueError(column_type)
    co.close()


@pytest.mark.parametrize('column_type,column_kwargs', [
    ('ndarray', {'prototype': np.array([1, 2, 3])}),
    ('str', {}),
    ('bytes', {}),
])
def test_contains_subsamples_non_bool_value_fails(repo, column_type, column_kwargs):
    co = repo.checkout(write=True)
    with pytest.raises(ValueError):
        if column_type == 'ndarray':
            co.add_ndarray_column(
                'testcol', contains_subsamples=None, **column_kwargs)
        elif column_type == 'str':
            co.add_str_column(
                'testcol', contains_subsamples=None, **column_kwargs)
        elif column_type == 'bytes':
            co.add_bytes_column(
                'testcol', contains_subsamples=None, **column_kwargs)
        else:
            raise ValueError(column_type)
    co.close()


@pytest.mark.parametrize('column_type,column_kwargs', [
    ('ndarray', {'prototype': np.array([1, 2, 3])}),
    ('str', {}),
    ('bytes', {}),
])
@pytest.mark.parametrize('contains_subsamples', [True, False])
def test_cannot_create_column_name_exists(repo, column_type, column_kwargs, contains_subsamples):
    co = repo.checkout(write=True)

    # setup so that a column already exists
    if column_type == 'ndarray':
        co.add_ndarray_column(
            'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
    elif column_type == 'str':
        co.add_str_column(
            'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
    elif column_type == 'bytes':
        co.add_bytes_column(
            'testcol', contains_subsamples=contains_subsamples, **column_kwargs)

    with pytest.raises(LookupError):
        if column_type == 'ndarray':
            co.add_ndarray_column(
                'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
        elif column_type == 'str':
            co.add_str_column(
                'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
        elif column_type == 'bytes':
            co.add_bytes_column(
                'testcol', contains_subsamples=contains_subsamples, **column_kwargs)
        else:
            raise ValueError(column_type)
    co.close()


def test_read_data_from_column_permutations(column_permutations_read_write_checkout):
    co, column_data, column_data_partials = column_permutations_read_write_checkout

    assert len(co.columns) == len(column_data)
    for column_name, column_samples in column_data.items():
        assert column_name in co.columns
        col = co[column_name]
        assert len(column_samples) == len(col)

        for sample_key, sample_value in column_samples.items():
            if not isinstance(sample_value, dict):
                recorded = col[sample_key]
                assert_equal(sample_value, recorded)
            else:
                col_samp = col[sample_key]
                assert len(sample_value) == len(col_samp)
                for subsample_key, subsample_value in sample_value.items():
                    recorded = col_samp[subsample_key]
                    assert_equal(subsample_value, recorded)


def test_write_data_to_column_permutations(
        column_permutations_write_checkout, num_samples_gen, num_subsamples_gen
):
    co, column_data, column_data_partials = column_permutations_write_checkout

    for column_name in co.columns:
        col = co[column_name]
        data_gen = column_data_partials[column_name]
        if col.column_layout == 'flat':
            for samp in range(num_samples_gen):
                data = data_gen()
                column_data[column_name][str(samp)] = data
                col[str(samp)] = data
        elif col.column_layout == 'nested':
            for samp in range(num_samples_gen):
                column_data[column_name][str(samp)] = {}
                for ssamp in range(num_subsamples_gen):
                    data = data_gen()
                    column_data[column_name][str(samp)][str(ssamp)] = data
                col[str(samp)] = column_data[column_name][str(samp)]
        else:
            raise ValueError(f'unknown layout option {col.column_layout}')

    assert len(co.columns) == len(column_data)
    for column_name, column_samples in column_data.items():
        assert column_name in co.columns
        col = co[column_name]
        assert len(column_samples) == len(col)

        for sample_key, sample_value in column_samples.items():
            if not isinstance(sample_value, dict):
                recorded = col[sample_key]
                assert_equal(sample_value, recorded)
            else:
                col_samp = col[sample_key]
                assert len(sample_value) == len(col_samp)
                for subsample_key, subsample_value in sample_value.items():
                    recorded = col_samp[subsample_key]
                    assert_equal(subsample_value, recorded)


def test_merge_write_data_to_column_permutations(
        column_permutation_repo, num_samples_gen, num_subsamples_gen
):
    repo, column_data, column_data_partials = column_permutation_repo
    repo.create_branch('testbranch')

    # Write new data to master branch
    co = repo.checkout(write=True, branch='master')
    for column_name in co.columns:
        col = co[column_name]
        data_gen = column_data_partials[column_name]
        if col.column_layout == 'flat':
            for samp in range(num_samples_gen):
                data = data_gen()
                column_data[column_name][str(samp)] = data
                col[str(samp)] = data
        elif col.column_layout == 'nested':
            for samp in range(num_samples_gen):
                column_data[column_name][str(samp)] = {}
                for ssamp in range(num_subsamples_gen):
                    data = data_gen()
                    column_data[column_name][str(samp)][str(ssamp)] = data
                col[str(samp)] = column_data[column_name][str(samp)]
        else:
            raise ValueError(f'unknown layout option {col.column_layout}')
    co.commit('commit on master adding data')
    co.close()

    # Write new data to testbranch branch
    co = repo.checkout(write=True, branch='testbranch')
    for column_name in co.columns:
        col = co[column_name]
        data_gen = column_data_partials[column_name]
        if col.column_layout == 'flat':
            for samp in range(num_samples_gen):
                data = data_gen()
                column_data[column_name][f'_{samp}'] = data
                col[f'_{samp}'] = data
        elif col.column_layout == 'nested':
            for samp in range(num_samples_gen):
                column_data[column_name][f'_{samp}'] = {}
                for ssamp in range(num_subsamples_gen):
                    data = data_gen()
                    column_data[column_name][f'_{samp}'][f'_{ssamp}'] = data
                col[f'_{samp}'] = column_data[column_name][f'_{samp}']
        else:
            raise ValueError(f'unknown layout option {col.column_layout}')
    co.commit('commit on master adding data')
    co.close()

    # Merge and check that union of all data added is present
    repo.merge('merge commit', 'master', 'testbranch')

    co = repo.checkout(write=True, branch='master')
    assert len(co.columns) == len(column_data)
    for column_name, column_samples in column_data.items():
        assert column_name in co.columns
        col = co[column_name]
        assert len(column_samples) == len(col)

        for sample_key, sample_value in column_samples.items():
            if not isinstance(sample_value, dict):
                recorded = col[sample_key]
                assert_equal(sample_value, recorded)
            else:
                col_samp = col[sample_key]
                assert len(sample_value) == len(col_samp)
                for subsample_key, subsample_value in sample_value.items():
                    recorded = col_samp[subsample_key]
                    assert_equal(subsample_value, recorded)
    co.close()



================================================
FILE: tests/test_column_nested.py
================================================
"""Tests for the class methods contained in the nested subsample column accessor.
"""
import numpy as np
import pytest
from conftest import fixed_shape_backend_params, variable_shape_backend_params


# --------------------------- Setup ------------------------------


def assert_equal(arr, arr2):
    assert np.array_equal(arr, arr2)
    assert arr.dtype == arr2.dtype


# ------------------------ Tests ----------------------------------


class TestArraysetSetup:

    @pytest.mark.parametrize('name', [
        'invalid\n', '\ninvalid', 'inv name', 'inva@lid', 12, ' try', 'andthis ',
        'VeryLongNameIsInvalidOver64CharactersNotAllowedVeryLongNameIsInva'])
    def test_does_not_allow_invalid_arrayset_names(self, repo, randomsizedarray, name):
        co = repo.checkout(write=True)
        with pytest.raises(ValueError):
            co.add_ndarray_column(name, prototype=randomsizedarray, contains_subsamples=True)
        co.close()

    def test_read_only_mode_arrayset_methods_limited(self, aset_subsamples_initialized_repo):
        import hangar
        co = aset_subsamples_initialized_repo.checkout()
        assert isinstance(co, hangar.checkout.ReaderCheckout)
        with pytest.raises(AttributeError):
            assert co.add_ndarray_column('foo')
        with pytest.raises(AttributeError):
            assert co.add_str_column('foo')
        with pytest.raises(PermissionError):
            assert co.columns.delete('foo')
        assert len(co.columns['writtenaset']) == 0
        co.close()

    def test_get_arrayset_in_read_and_write_checkouts(self, aset_subsamples_initialized_repo, array5by7):
        co = aset_subsamples_initialized_repo.checkout(write=True)
        # getting the column with `get`
        asetOld = co.columns.get('writtenaset')
        asetOldPath = asetOld._path
        asetOldAsetn = asetOld.column
        asetOldDefaultSchemaHash = asetOld._schema.schema_hash_digest()
        co.close()

        co = aset_subsamples_initialized_repo.checkout()
        # getting column with dictionary like style method
        asetNew = co.columns['writtenaset']
        assert asetOldPath == asetNew._path
        assert asetOldAsetn == asetNew.column
        assert asetOldDefaultSchemaHash == asetNew._schema.schema_hash_digest()
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_delete_arrayset(self, aset_backend, aset_subsamples_initialized_repo):
        co = aset_subsamples_initialized_repo.checkout(write=True)
        co.columns.delete('writtenaset')
        assert 'writtenaset' not in co.columns
        with pytest.raises(KeyError):
            # cannot delete twice
            co.columns.delete('writtenaset')

        # init and immediate delete leaves no trace
        co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float64,
                              backend=aset_backend, contains_subsamples=True)
        assert len(co.columns) == 1
        co.columns.delete('writtenaset')
        assert len(co.columns) == 0
        co.commit('this is a commit message')
        co.close()

        # init column in checkout persists aset records/accessor even if no samples contained
        co = aset_subsamples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 0
        co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float64,
                              backend=aset_backend, contains_subsamples=True)
        co.commit('this is a commit message')
        co.close()
        co = aset_subsamples_initialized_repo.checkout(write=True)
        assert len(co.columns) == 1

        # column can be deleted with via __delitem__ dict style command.
        del co.columns['writtenaset']
        assert len(co.columns) == 0
        co.commit('this is a commit message')
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_init_same_arrayset_twice_fails_again(self, aset_backend, repo, randomsizedarray):
        co = repo.checkout(write=True)
        co.add_ndarray_column('aset', prototype=randomsizedarray,
                              backend=aset_backend, contains_subsamples=True)
        with pytest.raises(LookupError):
            # test if everything is the same as initalized one.
            co.add_ndarray_column('aset', prototype=randomsizedarray,
                                  backend=aset_backend, contains_subsamples=True)
        with pytest.raises(LookupError):
            # test if column container type is different than existing name (no subsamples0
            co.add_ndarray_column('aset', prototype=randomsizedarray,
                                  backend=aset_backend, contains_subsamples=False)
        co.close()

    @pytest.mark.parametrize("aset_backend", fixed_shape_backend_params)
    def test_arrayset_with_invalid_dimension_sizes_shapes(self, aset_backend, repo):
        co = repo.checkout(write=True)

        shape = (0, 1, 2)
        with pytest.raises(ValueError):
            # cannot have zero valued size for any dimension
            co.add_ndarray_column('aset', shape=shape, dtype=np.int,
                                  backend=aset_backend, contains_subsamples=True)

        shape = [1] * 31
        aset = co.add_ndarray_column('aset1', shape=shape, dtype=np.int,
                                     backend=aset_backend, contains_subsamples=True)
        assert len(aset.shape) == 31

        shape = [1] * 32
        with pytest.raises(ValueError):
            # maximum tensor rank must be <= 31
            co.add_ndarray_column('aset2', shape=shape, dtype=np.int,
                                  backend=aset_backend, contains_subsamples=True)
        co.close()


# ------------------------------ Add Data Tests --------------------------------------------

@pytest.fixture(params=[1, 3], scope='class')
def multi_item_generator(request):
    yield request.param


@pytest.fixture(params=[
    # specifies container types, two-elements: ['outer', 'inner']
    ['dict', None],
    ['list', 'tuple'],
    ['tuple', 'list'],
], scope='class')
def iterable_subsamples(request, multi_item_generator):
    outer, inner = request.param
    arrays = []
    for num_item in range(multi_item_generator):
        arr = np.arange(16, dtype=np.uint8).reshape(4, 4)
        arr += 1
        arrays.append(arr)

    components = []
    for idx, array in enumerate(arrays):
        if inner == 'list':
            component = [f'subsample{idx}', array]
        elif inner == 'tuple':
            component = (f'subsample{idx}', array)
        elif inner is None:
            component = {f'subsample{idx}': array}
        else:
            raise ValueError(
                f'unknown parameter of `inner` {inner} in test suite generation')
        components.append(component)

    if outer == 'dict':
        res = {}
        for part in components:
            res.update(part)
    elif outer == 'list':
        res = []
        for part in components:
            res.append(part)
    elif outer == 'tuple':
        res = []
        for part in components:
            res.append(part)
        res = tuple(res)
    else:
        raise ValueError(
            f'unknown parameter of `outer` {outer} in test suite generation')
    return res


@pytest.fixture(params=['dict', 'list', 'tuple'], scope='class')
def iterable_samples(request, multi_item_generator, iterable_subsamples):
    container = request.param

    if container == 'dict':
        res = {}
        for idx in range(multi_item_generator):
            res[f'sample{idx}'] = iterable_subsamples
    elif container == 'list':
        res = []
        for idx in range(multi_item_generator):
            res.append([f'sample{idx}', iterable_subsamples])
    elif container == 'tuple':
        res = []
        for idx in range(multi_item_generator):
            res.append([f'sample{idx}', iterable_subsamples])
        res = tuple(res)
    else:
        raise ValueError(
            f'unknown parameter of `container` {container} in test suite generation')
    return res


@pytest.fixture(params=fixed_shape_backend_params, scope='class')
def backend_params(request):
    return request.param


@pytest.fixture()
def subsample_writer_written_aset(backend_params, repo, monkeypatch):
    from hangar.backends import hdf5_00
    from hangar.backends import hdf5_01
    from hangar.backends import numpy_10
    monkeypatch.setattr(hdf5_00, 'COLLECTION_COUNT', 5)
    monkeypatch.setattr(hdf5_00, 'COLLECTION_SIZE', 10)
    monkeypatch.setattr(hdf5_01, 'COLLECTION_COUNT', 5)
    monkeypatch.setattr(hdf5_01, 'COLLECTION_SIZE', 10)
    monkeypatch.setattr(numpy_10, 'COLLECTION_SIZE', 10)

    co = repo.checkout(write=True)
    aset = co.add_ndarray_column('foo', shape=(4, 4), dtype=np.uint8, variable_shape=False,
                                 backend=backend_params, contains_subsamples=True)
    yield aset
    co.close()


class TestAddData:

    def test_update_sample_subsamples_empty_arrayset(self, subsample_writer_written_aset, iterable_samples):
        aset = subsample_writer_written_aset
        added = aset.update(iterable_samples)
        assert added is None
        assert len(aset._samples) == len(iterable_samples)
        for sample_idx, sample_data in enumerate(iterable_samples):
            assert f'sample{sample_idx}' in aset._samples

    def test_update_sample_kwargs_only_empty_arrayset(self, subsample_writer_written_aset, iterable_subsamples):
        aset = subsample_writer_written_aset
        added = aset.update(fookwarg=iterable_subsamples)
        assert added is None
        assert len(aset._samples) == 1
        assert 'fookwarg' in aset._samples

        added = aset.update(bar=iterable_subsamples, baz=iterable_subsamples)
        assert added is None
        assert len(aset._samples) == 3
        assert 'bar' in aset._samples
        assert 'baz' in aset._samples
        for subsample_idx, _data in enumerate(iterable_subsamples):
            assert f'subsample{subsample_idx}' in aset._samples['fookwarg']._subsamples
            assert f'subsample{subsample_idx}' in aset._samples['bar']._subsamples
            assert f'subsample{subsample_idx}' in aset._samples['baz']._subsamples

    def test_update_sample_kwargs_and_other_dict_doesnt_modify_input_in_calling_scope(
            self, subsample_writer_written_aset, iterable_subsamples, iterable_samples
    ):
        """ensure bug does not revert.

        Had a case where if dict was passed as ``other`` along with kwargs, the operation
        would complete as normally, but when control returned to the caller the original
        dict passed in as ``other`` would have been silently merged with the kwargs.
        """
        aset = subsample_writer_written_aset
        if not isinstance(iterable_samples, dict):
            return
        iterable_samples_before = list(iterable_samples.items())

        aset.update(iterable_samples, kwargadded=iterable_subsamples)
        # in bug case, would now observe that iterable_samples would have been
        # silently modified in a method analogous to calling:
        #
        #   ``iterable_samples.update({'kwargadded': iterable_subsamples})``
        #
        assert list(iterable_samples.items()) == iterable_samples_before

    def test_update_sample_kwargs_and_iterably_empty_arrayset(
            self, subsample_writer_written_aset, iterable_subsamples, iterable_samples
    ):
        aset = subsample_writer_written_aset
        aset.update(iterable_samples, fookwarg=iterable_subsamples)
        assert len(aset._samples) == len(iterable_samples) + 1

        assert 'fookwarg' in aset._samples
        for sample_idx in range(len(iterable_samples)):
            assert f'sample{sample_idx}' in aset._samples

    def test_update_sample_subsamples_duplicate_data_does_not_save_new(
            self, subsample_writer_written_aset, iterable_samples
    ):
        aset = subsample_writer_written_aset
        aset.update(iterable_samples)
        old_specs = {}
        for sample_idx, sample_data in enumerate(iterable_samples):
            old_specs[f'sample{sample_idx}'] = aset._samples[f'sample{sample_idx}']._subsamples.copy()

        aset.update(iterable_samples)
        new_specs = {}
        for sample_idx, sample_data in enumerate(iterable_samples):
            new_specs[f'sample{sample_idx}'] = aset._samples[f'sample{sample_idx}']._subsamples.copy()
        assert old_specs == new_specs

    def test_update_sample_subsamples_context_manager(self, subsample_writer_written_aset, iterable_samples):
        aset = subsample_writer_written_aset
        assert aset._is_conman is False
        with aset as cm_aset:
            assert cm_aset._is_conman is True
            added = cm_aset.update(iterable_samples)
            assert added is None
        assert aset._is_conman is False

        assert len(aset._samples) == len(iterable_samples)
        for sample_idx, sample_data in enumerate(iterable_samples):
            assert f'sample{sample_idx}' in aset._samples

    def test_setitem_sample_subsamples_empty_arrayset(
            self, multi_item_generator, subsample_writer_written_aset, iterable_subsamples
    ):
        aset = subsample_writer_written_aset

        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = iterable_subsamples
        assert len(aset._samples) == len(iterable_subsamples)

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(iterable_subsamples)
            for subsample_idx in range(len(iterable_subsamples)):
                assert f'subsample{subsample_idx}' in aset._samples[f'sample{sample_idx}']._subsamples

    def test_setitem_sample_subsamples_contextmanager(
            self, multi_item_generator, subsample_writer_written_aset, iterable_subsamples
    ):
        aset = subsample_writer_written_aset
        assert aset._is_conman is False
        with aset as aset_cm:
            assert aset_cm._is_conman is True
            for sample_idx in range(multi_item_generator):
                aset_cm[f'sample{sample_idx}'] = iterable_subsamples
            assert len(aset_cm._samples) == len(iterable_subsamples)
            assert aset_cm._samples[f'sample{sample_idx}']._is_conman is True
        assert aset._is_conman is False

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(iterable_subsamples)
            for subsample_idx in range(len(iterable_subsamples)):
                assert f'subsample{subsample_idx}' in aset._samples[
                    f'sample{sample_idx}']._subsamples

    def test_update_subsamples_empty_arrayset(self, multi_item_generator, subsample_writer_written_aset,
                                              iterable_subsamples):
        aset = subsample_writer_written_aset
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            aset[f'sample{sample_idx}'].update(iterable_subsamples)
        assert len(aset._samples) == len(iterable_subsamples)

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(iterable_subsamples) + 1
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            for subsample_idx in range(len(iterable_subsamples)):
                assert f'subsample{subsample_idx}' in aset._samples[f'sample{sample_idx}']._subsamples

    def test_update_subsamples_via_kwargs_empty_arrayset(self, multi_item_generator, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            aset[f'sample{sample_idx}'].update(bar=np.arange(16, dtype=np.uint8).reshape(4, 4) + 20)
        assert len(aset._samples) == multi_item_generator

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == 2
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            assert 'bar' in aset._samples[f'sample{sample_idx}']._subsamples

    def test_update_subsamples_kwargs_and_other_dict_doesnt_modify_input_in_calling_scopy(
            self, multi_item_generator, subsample_writer_written_aset, iterable_subsamples
    ):
        """ensure bug does not revert.

        Had a case where if dict was passed as ``other`` along with kwargs, the operation
        would complete as normally, but when control returned to the caller the original
        dict passed in as ``other`` would have been silently merged with the kwargs.
        """
        aset = subsample_writer_written_aset
        if not isinstance(iterable_subsamples, dict):
            return
        iterable_subsamples_before = list(iterable_subsamples.keys())

        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            aset[f'sample{sample_idx}'].update(iterable_subsamples,
                                               kwargadded=np.arange(16, dtype=np.uint8).reshape(4, 4))
            # in bug case, would now observe that iterable_subsamples would have been
            # silently modified in a method analogous to calling:
            #
            #   ``iterable_subsamples.update({'kwargadded': np.array})``
            #
            assert list(iterable_subsamples.keys()) == iterable_subsamples_before
        assert list(iterable_subsamples.keys()) == iterable_subsamples_before

    def test_update_subsamples_via_kwargs_and_iterable_empty_arrayset(
            self, multi_item_generator, subsample_writer_written_aset, iterable_subsamples
    ):
        aset = subsample_writer_written_aset
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            aset[f'sample{sample_idx}'].update(iterable_subsamples, bar=np.arange(16, dtype=np.uint8).reshape(4, 4))

        assert len(aset._samples) == multi_item_generator

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(iterable_subsamples) + 2
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            assert 'bar' in aset._samples[f'sample{sample_idx}']._subsamples

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_update_subsamples_context_manager(
            self, backend, multi_item_generator, iterable_subsamples, repo
    ):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo', shape=(4, 4), dtype=np.uint8,
                                     backend=backend, contains_subsamples=True)

        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            assert aset._is_conman is False
            with aset[f'sample{sample_idx}'] as sample_cm:
                assert sample_cm._is_conman is True
                assert aset._is_conman is True
                sample_cm.update(iterable_subsamples)
            assert aset._is_conman is False
        assert len(aset._samples) == len(iterable_subsamples)

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(iterable_subsamples) + 1
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            for subsample_idx in range(len(iterable_subsamples)):
                assert f'subsample{subsample_idx}' in aset._samples[f'sample{sample_idx}']._subsamples
        co.close()

    def test_setitem_sample_empty_arrayset(
            self, multi_item_generator, iterable_subsamples, subsample_writer_written_aset
    ):
        aset = subsample_writer_written_aset

        subsamples_dict = dict(iterable_subsamples)
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            for subsample_key, subsample_val in subsamples_dict.items():
                aset[f'sample{sample_idx}'][subsample_key] = subsample_val
        assert len(aset._samples) == len(iterable_subsamples)

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(subsamples_dict) + 1
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            for subkey in subsamples_dict.keys():
                assert subkey in aset._samples[f'sample{sample_idx}']._subsamples

    def test_setitem_sample_setitem_subsample_empty_arrayset_fails(self, subsample_writer_written_aset):
        """This should fail because __getitem___ raises keyerror when

        ``aset[foo-sample][subsample] = np.ndarray`` runs.

        The ``aset[foo-sample]`` part fails with KeyError, and no subsample
        accessor is returned for the __setitem__ call following __getitem__
        """
        aset = subsample_writer_written_aset
        with pytest.raises(KeyError, match='sample'):
            aset['sample']
        with pytest.raises(KeyError, match='sample'):
            aset['sample']['subsample'] = np.arange(16, dtype=np.uint8).reshape(4, 4)
        assert len(aset) == 0

    def test_setitem_subsamples_contextmanager(self, multi_item_generator, iterable_subsamples,
                                               subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        subsamples_dict = dict(iterable_subsamples)
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
            assert aset._is_conman is False
            with aset[f'sample{sample_idx}'] as sample_cm:
                assert sample_cm._is_conman is True
                assert aset._is_conman is True
                for subsample_key, subsample_val in subsamples_dict.items():
                    sample_cm[subsample_key] = subsample_val
            assert aset._is_conman is False
        assert len(aset._samples) == len(iterable_subsamples)

        for sample_idx in range(multi_item_generator):
            assert f'sample{sample_idx}' in aset._samples
            assert len(aset._samples[f'sample{sample_idx}']._subsamples) == len(subsamples_dict) + 1
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            for subkey in subsamples_dict.keys():
                assert subkey in aset._samples[f'sample{sample_idx}']._subsamples

    def test_append_subsamples_empty_arrayset(self, multi_item_generator, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {
                'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + ((sample_idx * 2) + 1)
            }
            outkey = aset[f'sample{sample_idx}'].append(
                np.arange(16, dtype=np.uint8).reshape(4, 4) + sample_idx
            )
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            assert outkey in aset._samples[f'sample{sample_idx}']._subsamples
        assert len(aset._samples) == multi_item_generator

    def test_append_subsamples_contextmanager(self, multi_item_generator, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        for sample_idx in range(multi_item_generator):
            aset[f'sample{sample_idx}'] = {
                'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + ((sample_idx * 2) + 1)
            }
            assert aset._is_conman is False
            with aset[f'sample{sample_idx}'] as sample_cm:
                assert aset._is_conman is True
                assert sample_cm._is_conman is True
                outkey = sample_cm.append(np.arange(16, dtype=np.uint8).reshape(4, 4) + sample_idx)
            assert aset._is_conman is False
            assert 'foo' in aset._samples[f'sample{sample_idx}']._subsamples
            assert outkey in aset._samples[f'sample{sample_idx}']._subsamples
        assert len(aset._samples) == multi_item_generator

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    @pytest.mark.parametrize('other', [
        [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)],
        (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)),
    ])
    def test_update_noniterable_subsample_iter_fails(self, backend, other, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo', shape=(4, 4), dtype=np.uint8,
                                     backend=backend, contains_subsamples=True)
        aset[f'foo'] = {'foo': np.arange(16, dtype=np.uint8).reshape(4, 4) + 10}
        with pytest.raises(ValueError, match='dictionary update sequence'):
            aset['foo'].update(other)
        assert len(aset._samples) == 1
        assert len(aset._samples['foo']._subsamples) == 1
        assert 'foo' in aset._samples['foo']._subsamples
        assert 'subsample1' not in aset._samples['foo']._subsamples
        co.close()

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_update_subsamples_with_too_many_arguments_fails(self, backend, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo', shape=(4, 4), dtype=np.uint8,
                                     backend=backend, contains_subsamples=True)
        arr = np.arange(16, dtype=np.uint8).reshape(4, 4)
        aset[f'foo'] = {'foo': arr + 10}
        with pytest.raises(TypeError, match='takes from 1 to 2 positional arguments'):
            aset['foo'].update('fail', arr)
        assert len(aset._samples) == 1
        assert len(aset._samples['foo']._subsamples) == 1
        assert 'foo' in aset._samples['foo']._subsamples
        co.close()

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_update_subsamples_with_too_few_arguments_fails(self, backend, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo', shape=(4, 4), dtype=np.uint8,
                                     backend=backend, contains_subsamples=True)
        arr = np.arange(16, dtype=np.uint8).reshape(4, 4)
        aset[f'foo'] = {'foo': arr + 10}
        with pytest.raises(ValueError, match='dictionary update sequence element #0 has length 1; 2 is required'):
            aset['foo'].update('fail')
        assert len(aset._samples) == 1
        assert len(aset._samples['foo']._subsamples) == 1
        assert 'foo' in aset._samples['foo']._subsamples
        co.close()

    @pytest.mark.parametrize('other', [
        ['sample1', [[f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]]],
        ['sample1', ((f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)),)],
        ('sample1', ((f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)),),),
        ('sample1', [[f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]],),
        ['sample1', ([f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)])],
        ['sample1', [(f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4))]],
        ('sample1', ([f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]),),
        ('sample1', [(f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4))],),
        ['sample1', [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]],
        ['sample1', (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4))],
        ('sample1', [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)],),
        ('sample1', (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)),),
        ('sample1', {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)},),
        ['sample1', {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}],
    ])
    def test_update_noniterable_samples_fails(self, other, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        with pytest.raises(ValueError, match='dictionary update sequence'):
            aset.update(other)
        assert len(aset._samples) == 0

    @pytest.mark.parametrize('other', [
        [['sample1', [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]]],
        [['sample1', (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4),)]],
        (('sample1', (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4))),),
        (('sample1', [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]),),
        {'sample1': [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]},
        {'sample1': (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4))},
        {'sample1': (f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4))},
        {'sample1': [f'subsample1', np.arange(16, dtype=np.uint8).reshape(4, 4)]},
    ])
    def test_update_noniterable_subsamples_fails(self, other, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        with pytest.raises(ValueError, match='dictionary update sequence'):
            aset.update(other)
        assert len(aset._samples) == 0

    @pytest.mark.parametrize('other', [
        {'sample1!': {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {-2: {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'lol cat': {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample 1': {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {('sample', 'one'): {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {(1, 2): {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {('sample', 2): {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {(1, 'sample'): {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
    ])
    def test_update_invalid_sample_key_fails(self, other, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        with pytest.raises(ValueError, match='is not suitable'):
            aset.update(other)
        assert len(aset._samples) == 0

    @pytest.mark.parametrize('other', [
        {'sample': {f'subsample1!': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {f'subsample 1': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {-2: np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {f'subsample1\n': np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {(1, 2): np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {('s1', 's2'): np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {('s1', 1): np.arange(16, dtype=np.uint8).reshape(4, 4)}},
        {'sample': {(1, 's1'): np.arange(16, dtype=np.uint8).reshape(4, 4)}},
    ])
    def test_update_sample_invalid_subsample_key_fails(self, other, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        with pytest.raises(ValueError, match='is not suitable'):
            aset.update(other)
        assert len(aset._samples) == 0

    @pytest.mark.parametrize('variable_shape,backend', [
        *[[True, be] for be in variable_shape_backend_params],
        *[[False, be] for be in fixed_shape_backend_params],
    ])
    @pytest.mark.parametrize('other', [
        {'sample': {f'subsample1': np.arange(9, dtype=np.uint8).reshape(3, 3)}},
        {'sample': {f'subsample1': np.arange(8, dtype=np.uint8).reshape(2, 2, 2)}},
        {'sample': {f'subsample1': np.arange(4, dtype=np.float32).reshape(2, 2)}},
        {'sample': {f'subsample1': np.arange(4, dtype=np.uint8).reshape((2, 2), order='F')}},
        {'sample': {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4).tolist()}},
    ])
    def test_update_sample_invalid_array_fails_fixed_shape(self, backend, variable_shape, other, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo',
                                     shape=(2, 2), dtype=np.uint8, variable_shape=variable_shape,
                                     backend=backend, contains_subsamples=True)
        with pytest.raises(ValueError):
            aset.update(other)
        assert len(aset._samples) == 0
        co.close()

    @pytest.mark.parametrize('other', [
        {f'subsample1!': np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {f'subsample 1': np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {-2: np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {f'subsample1\n': np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {(1, 2): np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {('s1', 's2'): np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {('s1', 1): np.arange(16, dtype=np.uint8).reshape(4, 4)},
        {(1, 's1'): np.arange(16, dtype=np.uint8).reshape(4, 4)},
    ])
    def test_update_subsample_invalid_subsample_key_fails(self, other, subsample_writer_written_aset):
        aset = subsample_writer_written_aset
        aset['sample'] = {0: np.zeros((4, 4), dtype=np.uint8)}
        with pytest.raises(ValueError, match='is not suitable'):
            aset['sample'].update(other)
        assert len(aset._samples) == 1
        assert len(aset._samples['sample']._subsamples) == 1
        assert 0 in aset._samples['sample']._subsamples

    @pytest.mark.parametrize('variable_shape,backend', [
        *[[False, be] for be in fixed_shape_backend_params],
    ])
    @pytest.mark.parametrize('other', [
        {f'subsample1': np.arange(9, dtype=np.uint8).reshape(3, 3)},
        {f'subsample1': np.arange(8, dtype=np.uint8).reshape(2, 2, 2)},
        {f'subsample1': np.arange(4, dtype=np.float32).reshape(2, 2)},
        {f'subsample1': np.arange(4, dtype=np.uint8).reshape((2, 2), order='F')},
        {f'subsample1': np.arange(16, dtype=np.uint8).reshape(4, 4).tolist()},
    ])
    def test_update_subsample_invalid_array_fails_fixed_shape(self, backend, variable_shape, other, repo):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo',
                                     shape=(4, 4), dtype=np.uint8, variable_shape=variable_shape,
                                     backend=backend, contains_subsamples=True)
        aset['sample'] = {0: np.zeros((4, 4), dtype=np.uint8)}
        with pytest.raises(ValueError):
            aset['sample'].update(other)
        assert len(aset._samples) == 1
        assert len(aset._samples['sample']._subsamples) == 1
        assert 0 in aset._samples['sample']._subsamples
        co.close()


# --------------------------- Test Remove Data -------------------------------------


@pytest.fixture(scope='class')
def subsample_data_map():
    arr = np.arange(5 * 7).astype(np.uint16).reshape((5, 7))
    res = {
        'foo': {
            0: arr,
            1: arr + 1,
            2: arr + 2
        },
        2: {
            'bar': arr + 3,
            'baz': arr + 4
        }
    }
    return res


@pytest.fixture(params=fixed_shape_backend_params, scope='class')
def backend_param(request):
    return request.param


@pytest.fixture(params=[False, True], scope='class')
def write_enabled(request):
    return request.param


@pytest.fixture(scope='class')
def initialized_arrayset(write_enabled, backend_param, classrepo, subsample_data_map):
    co = classrepo.checkout(write=True)
    aset = co.add_ndarray_column(f'foo{backend_param}{int(write_enabled)}',
                                 shape=(5, 7), dtype=np.uint16, backend=backend_param,
                                 contains_subsamples=True)
    aset.update(subsample_data_map)
    co.commit(f'done {backend_param}{write_enabled}')
    co.close()
    if write_enabled:
        nco = classrepo.checkout(write=True)
        yield nco.columns[f'foo{backend_param}{int(write_enabled)}']
        nco.close()
    else:
        nco = classrepo.checkout()
        yield nco.columns[f'foo{backend_param}{int(write_enabled)}']
        nco.close()


@pytest.fixture()
def initialized_arrayset_write_only(backend_param, repo, subsample_data_map):
    co = repo.checkout(write=True)
    aset = co.add_ndarray_column('foo', shape=(5, 7), dtype=np.uint16,
                                 backend=backend_param, contains_subsamples=True)
    aset.update(subsample_data_map)
    yield co.columns['foo']
    co.close()


class TestRemoveData:

    # --------------------- delete -----------------------------

    def test_delitem_single_sample_from_arrayset(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        del aset['foo']
        assert 'foo' not in aset._samples
        assert 'foo' not in aset

    def test_delitem_single_subsample_from_sample(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        del aset['foo'][0]
        assert 0 not in aset._samples['foo']._subsamples
        assert 0 not in aset['foo']

    def test_delitem_sample_nonexisting_keys_fails(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        assert 'doesnotexist' not in aset._samples
        assert 'doesnotexist' not in aset
        with pytest.raises(KeyError):
            del aset['doesnotexist']

    def test_delitem_single_subsample_nonexisting_key_fails(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        assert 'foo' in aset._samples
        assert 'foo' in aset
        assert 'doesnotexist' not in aset._samples['foo']._subsamples
        assert 'doesnotexist' not in aset['foo']
        with pytest.raises(KeyError):
            del aset['foo']['doesnotexist']

    def test_delitem_multiple_samples_fails_keyerror(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        with pytest.raises(KeyError, match="('foo', 2)"):
            del aset['foo', 2]
        assert 'foo' in aset
        assert 2 in aset

    # ------------------------ pop ----------------------------

    def test_pop_single_sample_from_arrayset(self, initialized_arrayset_write_only, subsample_data_map):
        aset = initialized_arrayset_write_only
        res = aset.pop('foo')
        assert 'foo' not in aset
        assert isinstance(res, dict)
        assert len(res) == len(subsample_data_map['foo'])
        for expected_k, expected_v in subsample_data_map['foo'].items():
            assert_equal(res[expected_k], expected_v)

    def test_pop_multiple_samples_from_arrayset_fails(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        with pytest.raises(TypeError, match="takes 2 positional arguments but 3 were"):
            aset.pop('foo', 2)
        assert 'foo' in aset
        assert 2 in aset

    def test_pop_single_subsample_from_sample(self, initialized_arrayset_write_only, subsample_data_map):
        aset = initialized_arrayset_write_only
        res = aset['foo'].pop(0)
        assert 0 not in aset['foo']
        assert isinstance(res, np.ndarray)
        assert_equal(res, subsample_data_map['foo'][0])

    def test_pop_multiple_subsample_from_sample_fails(self, initialized_arrayset_write_only):
        aset = initialized_arrayset_write_only
        with pytest.raises(TypeError, match="takes 2 positional arguments but 3 were given"):
            aset['foo'].pop(*[0, 1])
        assert 0 in aset['foo']
        assert 1 in aset['foo']


# ------------------------------ Container Introspection -----------------------------------


class TestContainerIntrospection:

    def test_get_sample_returns_object(self, initialized_arrayset, subsample_data_map):
        from hangar.columns.layout_nested import FlatSubsampleReader, NestedSampleReader

        aset = initialized_arrayset
        assert isinstance(aset, NestedSampleReader)
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert isinstance(sample, FlatSubsampleReader)

    # -------------------------- test __dunder__ methods ----------------------------------

    def test_get_sample_test_subsample_len_method(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert len(sample) == len(subsample_data)

    def test_get_sample_test_subsample_contains_method(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            for subsample_name in subsample_data.keys():
                assert subsample_name in sample

    def test_sample_len_reported_correctly(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        assert len(aset) == len(subsample_data_map)
        assert aset.num_subsamples == sum([len(subsample) for subsample in subsample_data_map.values()])

    # ----------------------------- test property ---------------------------

    def test_get_sample_test_subsample_sample_property(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert sample.sample == sample_name

    def test_get_sample_test_subsample_arrayset_property(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert sample.column.startswith('foo')

    def test_get_sample_test_data_property(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            res = sample.data
            assert isinstance(res, dict)
            assert len(res) == len(subsample_data)
            for k, v in res.items():
                assert_equal(v, subsample_data[k])

    def test_get_sample_test_subsample_contains_remote_references_property(
            self, initialized_arrayset, subsample_data_map
    ):
        aset = initialized_arrayset
        # test works before add remote references
        aset.contains_remote_references is False
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert sample.contains_remote_references is False

        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template
        aset[2]._subsamples[50] = template

        aset.contains_remote_references is True
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert sample.contains_remote_references is True

        del aset._samples['foo']._subsamples[50]
        del aset._samples[2]._subsamples[50]

    def test_get_sample_test_subsample_remote_reference_keys_property(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        # test works before add remote references
        assert aset.remote_reference_keys == ()
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert sample.remote_reference_keys == ()

        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template
        aset[2]._subsamples[50] = template

        assert aset.remote_reference_keys == (2, 'foo') or ('foo', 2)
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert sample.remote_reference_keys == (50,)

        del aset._samples['foo']._subsamples[50]
        del aset._samples[2]._subsamples[50]

    def test_getattr_does_not_raise_permission_error_if_alive(self, initialized_arrayset):
        aset = initialized_arrayset

        assert hasattr(aset, 'doesnotexist') is False  # does not raise error
        assert hasattr(aset, '_mode') is True
        with pytest.raises(AttributeError):
            assert getattr(aset, 'doesnotexist')
        assert getattr(aset, '_mode') == 'a' if aset.iswriteable else 'r'

        sample = aset['foo']
        assert hasattr(sample, 'doesnotexist') is False  # does not raise error
        assert hasattr(sample, '_mode') is True
        with pytest.raises(AttributeError):
            assert getattr(sample, 'doesnotexist')
        assert getattr(sample, '_mode') == 'a' if aset.iswriteable else 'r'

        # mock up destruct call in sample and aset.
        original = getattr(aset, '_mode')
        delattr(aset, '_mode')
        delattr(sample, '_mode')
        with pytest.raises(PermissionError):
            hasattr(aset, 'doesnotexist')
        with pytest.raises(PermissionError):
            hasattr(aset, '_mode')

        with pytest.raises(PermissionError):
            hasattr(sample, 'doesnotexist')
        with pytest.raises(PermissionError):
            hasattr(sample, '_mode')
        setattr(aset, '_mode', original)
        setattr(sample, '_mode', original)


# ------------------------------ Getting Data --------------------------------------------


class TestGetDataMethods:

    def test_get_sample_missing_key(self, initialized_arrayset):
        aset = initialized_arrayset
        returned = aset.get('doesnotexist')
        assert returned is None
        default_returned = aset.get(9999, default=True)
        assert default_returned is True

    def test_getitem_sample_missing_key(self, initialized_arrayset):
        aset = initialized_arrayset
        with pytest.raises(KeyError):
            aset['doesnotexist']

    def test_get_sample_get_subsample(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            for subsample_name, subsample_value in subsample_data.items():
                res = sample.get(subsample_name)
                assert_equal(res, subsample_value)

    def test_getitem_sample_getitem_subsample(self, initialized_arrayset, subsample_data_map):
        from hangar.columns.layout_nested import FlatSubsampleReader

        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset[sample_name]
            assert isinstance(sample, FlatSubsampleReader)
            for subsample_name, subsample_value in subsample_data.items():
                res = sample[subsample_name]
                assert_equal(res, subsample_value)

    def test_getitem_subsample_from_column(self, initialized_arrayset, subsample_data_map):
        from hangar.columns.layout_nested import FlatSubsampleReader

        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset[sample_name]
            assert isinstance(sample, FlatSubsampleReader)
            res = aset[sample_name, ...]
            assert res.keys() == subsample_data.keys()
            for subsample_name, subsample_value in subsample_data.items():
                res = aset[sample_name, subsample_name]
                assert_equal(res, subsample_value)

    def test_recursive_subsample_getitem_from_column(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            for subsample_name, subsample_value in subsample_data.items():
                assert isinstance(aset[sample_name, subsample_name, 0, 0], np.uint16)
                assert aset[sample_name, subsample_name, 0, 0] == subsample_value[0][0]

    def test_get_subsample_from_column(self, initialized_arrayset, subsample_data_map):
        from hangar.columns.layout_nested import FlatSubsampleReader

        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert isinstance(sample, FlatSubsampleReader)
            res = aset.get((sample_name, ...))
            assert res.keys() == subsample_data.keys()
            for subsample_name, subsample_value in subsample_data.items():
                res = aset.get((sample_name, subsample_name))
                assert_equal(res, subsample_value)

    def test_get_sample_get_subsample_missing_key(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name in subsample_data_map.keys():

            with pytest.raises(KeyError):
                aset[sample_name, 'doesnotexist']
            returned = aset.get((sample_name, "doesnotexist"))
            assert returned is None
            default_returned = aset.get((sample_name, 9999), default=True)
            assert default_returned is True

            sample = aset.get(sample_name)
            returned = sample.get('doesnotexist')
            assert returned is None
            default_returned = sample.get(9999, default=True)
            assert default_returned is True

    def test_getitem_sample_getitem_subsample_missing_key(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name in subsample_data_map.keys():
            sample = aset[sample_name]
            with pytest.raises(KeyError):
                sample['doesnotexist']

    def test_get_sample_get_multiple_subsamples_fails(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            with pytest.raises(TypeError):
                sample.get(*list(list(subsample_data.keys())[:2]), default=None)

    def test_get_sample_getitem_single_subsample(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            for subsample_name, subsample_value in subsample_data.items():
                res = sample[subsample_name]
                assert_equal(res, subsample_value)

    def test_get_sample_getitem_single_subsample_missing_key(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name in subsample_data_map.keys():
            sample = aset.get(sample_name)
            returned = sample.get('doesnotexist')
            assert returned is None
            default_returned = sample.get(9999, default=True)
            assert default_returned is True

    def test_get_sample_getitem_multiple_subsamples_fails(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            with pytest.raises(TypeError):
                sample[list(subsample_data.keys())[:2]]

    def test_get_sample_getitem_subsamples_with_ellipsis(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            res = sample[...]
            assert isinstance(res, dict)
            assert len(res) == len(subsample_data)
            for k, v in res.items():
                assert_equal(v, subsample_data[k])

    def test_get_sample_getitem_subsamples_with_keys_and_ellipsis_fails(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            existing_subsample_key = next(iter(subsample_data.keys()))
            with pytest.raises(TypeError):
                sample[..., existing_subsample_key]
            with pytest.raises(TypeError):
                sample[..., [existing_subsample_key]]

    def test_get_sample_getitem_subsamples_with_unbound_slice(self, initialized_arrayset, subsample_data_map):
        """unbound slice is ``slice(None) == [:]``"""
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            res = sample[:]
            assert isinstance(res, dict)
            assert len(res) == len(subsample_data)
            for k, v in res.items():
                assert_equal(v, subsample_data[k])

    def test_get_sample_getitem_subsamples_with_bounded_slice(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            res = sample[0:2]
            assert isinstance(res, dict)
            assert len(res) == 2
            for k, v in res.items():
                assert_equal(v, subsample_data[k])

    def test_subsample_getitem_with_bounded_slice_from_column(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            res = aset[sample_name, 0:2]
            assert isinstance(res, dict)
            assert len(res) == 2
            for k, v in res.items():
                assert_equal(v, subsample_data[k])

    def test_get_sample_getitem_subsamples_with_out_of_bounds_slice_does_not_fail(
            self, initialized_arrayset, subsample_data_map):
        """Odd python behavior we emulate: out of bounds sequence slicing is allowed.

        Instead of throwing an exception, the slice is treated as if it should just
        go up to the total number of elements in the container. For example:
            [1, 2, 3][0:5] == [1, 2, 3]
        """
        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            res = sample[0:5]
            assert isinstance(res, dict)
            assert len(res) == len(subsample_data)
            for k, v in res.items():
                assert_equal(v, subsample_data[k])

    def test_aset_contextmanager(self, initialized_arrayset, subsample_data_map):
        assert initialized_arrayset._is_conman is False
        with initialized_arrayset as aset:
            assert aset._is_conman is True
            for sample_name, subsample_data in subsample_data_map.items():
                sample = aset.get(sample_name)
                assert sample._is_conman is True
                for subsample_name, expected_val in subsample_data.items():
                    assert_equal(sample.get(subsample_name), expected_val)
                assert sample._is_conman is True
        assert initialized_arrayset._is_conman is False
        assert aset._is_conman is False
        assert sample._is_conman is False

    def test_sample_contextmanager(self, initialized_arrayset, subsample_data_map):
        for sample_name, subsample_data in subsample_data_map.items():
            sample = initialized_arrayset.get(sample_name)
            assert initialized_arrayset._is_conman is False
            assert sample._is_conman is False
            with sample as sample_cm:
                assert sample_cm._is_conman is True
                assert initialized_arrayset._is_conman is True
                for subsample_name, expected_val in subsample_data.items():
                    assert_equal(sample_cm.get(subsample_name), expected_val)
            assert sample._is_conman is False
            assert initialized_arrayset._is_conman is False
        assert initialized_arrayset._is_conman is False
        assert sample._is_conman is False

    def test_sample_subsample_contextmanager(self, initialized_arrayset, subsample_data_map):
        assert initialized_arrayset._is_conman is False
        with initialized_arrayset as aset:
            assert aset._is_conman is True
            assert aset._enter_count == 1
            for sample_name, subsample_data in subsample_data_map.items():
                sample = aset.get(sample_name)
                assert sample._is_conman is True
                assert sample._enter_count == 1
                with sample as sample_cm:
                    assert aset._is_conman is True
                    assert sample_cm._is_conman is True
                    assert aset._enter_count == 2
                    assert sample_cm._enter_count == 2
                    for subsample_name, expected_val in subsample_data.items():
                        assert_equal(sample_cm.get(subsample_name), expected_val)
                assert aset._is_conman is True
                assert sample_cm._is_conman is True
                assert aset._enter_count == 1
                assert sample_cm._enter_count == 1
        assert initialized_arrayset._is_conman is False
        assert aset._is_conman is False
        assert sample._is_conman is False
        assert aset._enter_count == 0
        assert sample_cm._enter_count == 0

    def test_sample_reentrant_contextmanager_fails(self, initialized_arrayset, subsample_data_map):
        assert initialized_arrayset._is_conman is False

        with initialized_arrayset as aset:
            assert aset._is_conman is True
            assert aset._enter_count == 1
            for sample_name, subsample_data in subsample_data_map.items():
                sample = aset.get(sample_name)
                assert sample._is_conman is True
                assert sample._enter_count == 1
                with sample as sample_cm:
                    assert aset._is_conman is True
                    assert sample_cm._is_conman is True
                    assert aset._enter_count == 2
                    assert sample_cm._enter_count == 2
                    for subsample_name, expected_val in subsample_data.items():
                        assert_equal(sample_cm.get(subsample_name), expected_val)
                # reentrant demonstrated here here
                with sample as sample_cm2:
                    assert aset._is_conman is True
                    assert sample_cm._is_conman is True
                    assert sample_cm2._is_conman is True
                    assert aset._enter_count == 2
                    assert sample_cm._enter_count == 2
                    assert sample_cm2._enter_count == 2
                    for subsample_name, expected_val in subsample_data.items():
                        assert_equal(sample_cm2.get(subsample_name), expected_val)
                assert aset._is_conman is True
                assert sample_cm._is_conman is True
                assert aset._enter_count == 1
                assert sample_cm._enter_count == 1
        assert initialized_arrayset._is_conman is False
        assert aset._is_conman is False
        assert sample._is_conman is False
        assert aset._enter_count == 0
        assert sample_cm._enter_count == 0

    # -------------------------- dict-style iteration methods ---------------------------

    def test_calling_iter_on_arrayset(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        arrayset_it = iter(aset)  # returns iterator over sample keys
        for sample_name in arrayset_it:
            assert sample_name in aset
            assert sample_name in subsample_data_map

    def test_calling_iter_on_sample_in_arrayset(self, initialized_arrayset, subsample_data_map):
        aset = initialized_arrayset
        arrayset_it = iter(aset)  # returns iterator over sample keys
        for sample_name in arrayset_it:
            assert sample_name in aset
            assert sample_name in subsample_data_map

            sample_it = iter(aset[sample_name])  # returns iterator over subsample keys
            for subsample_name in sample_it:
                assert subsample_name in aset[sample_name]
                assert subsample_name in subsample_data_map[sample_name]

    def test_get_sample_keys_method(self, initialized_arrayset):
        from collections.abc import Iterator
        aset = initialized_arrayset

        assert isinstance(aset.keys(), Iterator)
        res = list(aset.keys())
        assert len(res) == 2
        assert 2 and 'foo' in res

    def test_get_sample_keys_method_local_only(self, initialized_arrayset):
        from collections.abc import Iterator
        aset = initialized_arrayset

        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template

        assert isinstance(aset.keys(local=True), Iterator)
        res = list(aset.keys(local=True))
        assert len(res) == 1
        assert 2 in res

        del aset._samples['foo']._subsamples[50]

    def test_get_sample_subsample_keys_method(self, initialized_arrayset, subsample_data_map):
        from collections.abc import Iterator

        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert isinstance(sample.keys(), Iterator)
            res = list(sample.keys())
            for k in res:
                assert k in subsample_data

    def test_get_sample_subsample_keys_method_local_only(self, initialized_arrayset, subsample_data_map):
        from collections.abc import Iterator
        aset = initialized_arrayset

        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template
        aset[2]._subsamples[50] = template

        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)

            # test local only properties
            assert isinstance(sample.keys(local=True), Iterator)
            res = list(sample.keys(local=True))
            assert len(res) == len(subsample_data)
            for k in res:
                assert k in subsample_data
                assert k != 50

            # compare to local+remote properties
            assert isinstance(sample.keys(local=False), Iterator)
            res = list(sample.keys(local=False))
            assert len(res) == len(subsample_data) + 1
            assert 50 in res
            for k in res:
                assert k in list(subsample_data.keys()) + [50]

        del aset._samples['foo']._subsamples[50]
        del aset._samples[2]._subsamples[50]

    def test_get_sample_values_method(self, initialized_arrayset):
        from hangar.columns.layout_nested import FlatSubsampleReader
        from collections.abc import Iterator
        aset = initialized_arrayset

        assert isinstance(aset.values(), Iterator)
        res = list(aset.values())
        assert len(res) == 2
        for sample in res:
            assert sample.sample == 'foo' or 2
            assert isinstance(sample, FlatSubsampleReader)

    def test_get_sample_values_method_local_only(self, initialized_arrayset):
        from hangar.columns.layout_nested import FlatSubsampleReader
        from collections.abc import Iterator
        aset = initialized_arrayset
        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template

        assert isinstance(aset.values(local=True), Iterator)
        res = list(aset.values(local=True))
        assert len(res) == 1
        sample = res[0]
        assert sample.sample == 2
        assert isinstance(sample, FlatSubsampleReader)

        del aset._samples['foo']._subsamples[50]

    def test_get_sample_subsample_values_method(self, initialized_arrayset, subsample_data_map):
        from collections.abc import Iterator

        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert isinstance(sample.values(), Iterator)
            res = list(sample.values())
            for v in res:
                assert any([np.allclose(v, arr) for arr in subsample_data.values()])

    def test_get_sample_subsample_values_method_local_only(self, initialized_arrayset, subsample_data_map):
        from collections.abc import Iterator
        aset = initialized_arrayset

        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        from hangar.columns.common import open_file_handles
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template
        aset[2]._subsamples[50] = template
        mocked_fhand = open_file_handles(
            ['50'], path=initialized_arrayset._path, mode='a', schema=aset._schema)
        aset._be_fs['50'] = mocked_fhand['50']

        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)

            # test local only properties
            assert isinstance(sample.values(local=True), Iterator)
            res = list(sample.values(local=True))
            assert len(res) == len(subsample_data)
            for v in res:
                assert any([np.allclose(v, arr) for arr in subsample_data.values()])

            # test local+remote properties
            with pytest.raises(FileNotFoundError):
                list(sample.values(local=False))

        del aset._be_fs['50']
        del aset._samples['foo']._subsamples[50]
        del aset._samples[2]._subsamples[50]

    def test_get_sample_items_method(self, initialized_arrayset):
        from hangar.columns.layout_nested import FlatSubsampleReader
        from collections.abc import Iterator
        aset = initialized_arrayset

        assert isinstance(aset.items(), Iterator)
        res = list(aset.items())
        assert len(res) == 2
        for sample_name, sample in res:
            assert sample_name == 2 or 'foo'
            assert isinstance(sample, FlatSubsampleReader)
            assert sample_name == sample.sample

    def test_get_sample_items_method_local_only(self, initialized_arrayset):
        from hangar.columns.layout_nested import FlatSubsampleReader
        from collections.abc import Iterator
        aset = initialized_arrayset
        # add subsamples which are not local to each subsample
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template

        assert isinstance(aset.items(local=True), Iterator)
        res = list(aset.items(local=True))
        assert len(res) == 1
        sample_name, sample = res[0]
        assert sample_name == 2
        assert isinstance(sample, FlatSubsampleReader)
        assert sample.sample == sample_name

        del aset._samples['foo']._subsamples[50]

    def test_get_sample_subsample_items_method(self, initialized_arrayset, subsample_data_map):
        from collections.abc import Iterator

        aset = initialized_arrayset
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            assert isinstance(sample.items(), Iterator)
            res = list(sample.items())
            for k, v in res:
                assert_equal(v, subsample_data[k])

    def test_get_sample_subsample_items_method_local_only(self, initialized_arrayset, subsample_data_map):
        from collections.abc import Iterator
        aset = initialized_arrayset

        # add subsamples which are not local to each subsample ato perform the mock
        from hangar.backends import backend_decoder
        from hangar.columns.common import open_file_handles
        template = backend_decoder(b'50:daeaaeeaebv')
        aset['foo']._subsamples[50] = template
        aset[2]._subsamples[50] = template
        mocked_fhand = open_file_handles(
            ['50'], path=initialized_arrayset._path, mode='a', schema=aset._schema)
        aset._be_fs['50'] = mocked_fhand['50']

        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)

            # test local only properties
            assert isinstance(sample.items(local=True), Iterator)
            res = list(sample.items(local=True))
            assert len(res) == len(subsample_data)
            for k, v in res:
                assert_equal(v, subsample_data[k])
                assert k != 50

            # test local+remote properties
            with pytest.raises(FileNotFoundError):
                list(sample.items(local=False))

        del aset._be_fs['50']
        del aset._samples['foo']._subsamples[50]
        del aset._samples[2]._subsamples[50]

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset3_backend", fixed_shape_backend_params)
    def test_arrayset_remote_references_property_with_none(
            self, aset1_backend, aset2_backend, aset3_backend, repo, randomsizedarray
    ):
        co = repo.checkout(write=True)
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray,
                                      backend=aset1_backend, contains_subsamples=True)
        aset2 = co.add_ndarray_column('aset2', shape=(2, 2), dtype=np.int,
                                      backend=aset2_backend, contains_subsamples=True)
        aset3 = co.add_ndarray_column('aset3', shape=(3, 4), dtype=np.float32,
                                      backend=aset3_backend, contains_subsamples=True)
        with aset1 as d1, aset2 as d2, aset3 as d3:
            d1[1] = {11: randomsizedarray}
            d2[1] = {21: np.ones((2, 2), dtype=np.int)}
            d3[1] = {31: np.ones((3, 4), dtype=np.float32)}

        assert co.columns.contains_remote_references == {'aset1': False, 'aset2': False, 'aset3': False}
        assert co.columns.remote_sample_keys == {'aset1': (), 'aset2': (), 'aset3': ()}
        co.close()

    @pytest.mark.parametrize("aset1_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset2_backend", fixed_shape_backend_params)
    @pytest.mark.parametrize("aset3_backend", fixed_shape_backend_params)
    def test_arrayset_remote_references_property_with_remotes(
            self, aset1_backend, aset2_backend, aset3_backend, repo, randomsizedarray
    ):
        co = repo.checkout(write=True)
        aset1 = co.add_ndarray_column('aset1', prototype=randomsizedarray,
                                      backend=aset1_backend, contains_subsamples=True)
        aset2 = co.add_ndarray_column('aset2', shape=(2, 2), dtype=np.int,
                                      backend=aset2_backend, contains_subsamples=True)
        aset3 = co.add_ndarray_column('aset3', shape=(3, 4), dtype=np.float32,
                                      backend=aset3_backend, contains_subsamples=True)
        with aset1 as d1, aset2 as d2, aset3 as d3:
            d1[1] = {11: randomsizedarray}
            d2[1] = {21: np.ones((2, 2), dtype=np.int)}
            d3[1] = {31: np.ones((3, 4), dtype=np.float32)}

        assert co.columns.contains_remote_references == {'aset1': False, 'aset2': False, 'aset3': False}
        assert co.columns.remote_sample_keys == {'aset1': (), 'aset2': (), 'aset3': ()}
        co.commit('hello')
        co.close()
        co = repo.checkout()
        # perform the mock
        # perform the mock
        from hangar.backends import backend_decoder
        template = backend_decoder(b'50:daeaaeeaebv')
        co._columns._columns['aset1']._samples[1]._subsamples[12] = template
        co._columns._columns['aset2']._samples[1]._subsamples[22] = template

        assert co.columns.contains_remote_references == {'aset1': True, 'aset2': True, 'aset3': False}
        assert co.columns.remote_sample_keys == {'aset1': (1,), 'aset2': (1,), 'aset3': ()}
        co.close()


class TestWriteThenReadCheckout:

    @pytest.mark.parametrize('backend', fixed_shape_backend_params)
    def test_add_data_commit_checkout_read_only_contains_same(self, backend, repo, subsample_data_map):
        co = repo.checkout(write=True)
        aset = co.add_ndarray_column('foo', shape=(5, 7), dtype=np.uint16,
                                     backend=backend, contains_subsamples=True)
        added = aset.update(subsample_data_map)
        for sample_name, subsample_data in subsample_data_map.items():
            sample = aset.get(sample_name)
            for subsample_name, subsample_val in subsample_data.items():
                assert_equal(sample[subsample_name], subsample_val)
        co.commit('first')
        co.close()

        rco = repo.checkout()
        naset = rco.columns['foo']
        for sample_name, subsample_data in subsample_data_map.items():
            sample = naset.get(sample_name)
            for subsample_name, subsample_val in subsample_data.items():
                assert_equal(sample[subsample_name], subsample_val)
        rco.close()


================================================
FILE: tests/test_column_pickle.py
================================================
import pytest
import numpy as np
from conftest import fixed_shape_backend_params


def assert_equal(arr, arr2):
    assert np.array_equal(arr, arr2)
    assert arr.dtype == arr2.dtype



@pytest.fixture(scope='class')
def subsample_data_map():
    arr = np.arange(5*7).astype(np.uint16).reshape((5, 7))
    res = {
        'foo': {
            0: arr,
            1: arr + 1,
            2: arr + 2
        },
        2: {
            'bar': arr + 3,
            'baz': arr + 4
        }
    }
    return res


@pytest.fixture(scope='class')
def sample_data_map():
    arr = np.arange(5*7).astype(np.uint16).reshape((5, 7))
    res = {
        0: arr,
        1: arr + 1,
        2: arr + 2,
        'bar': arr + 3,
        'baz': arr + 4,
    }
    return res


@pytest.fixture(params=fixed_shape_backend_params, scope='class')
def backend_param(request):
    return request.param


@pytest.fixture(params=[False, True], scope='class')
def write_enabled(request):
    return request.param


@pytest.fixture(params=[False, True], scope='class')
def contains_subsamples(request):
    return request.param


@pytest.fixture(scope='class')
def initialized_column(
    write_enabled, backend_param, contains_subsamples, classrepo, subsample_data_map, sample_data_map
):
    co = classrepo.checkout(write=True)
    aset = co.add_ndarray_column(f'foo{backend_param}{int(write_enabled)}{int(contains_subsamples)}',
                                    shape=(5, 7), dtype=np.uint16,
                                    backend=backend_param, contains_subsamples=contains_subsamples)
    if contains_subsamples:
        aset.update(subsample_data_map)
    else:
        aset.update(sample_data_map)
    co.commit(f'done {backend_param}{write_enabled}{contains_subsamples}')
    co.close()
    if write_enabled:
        nco = classrepo.checkout(write=True)
        yield nco.columns[f'foo{backend_param}{int(write_enabled)}{int(contains_subsamples)}']
        nco.close()
    else:
        nco = classrepo.checkout()
        yield nco.columns[f'foo{backend_param}{int(write_enabled)}{int(contains_subsamples)}']
        nco.close()


@pytest.fixture(scope='class')
def initialized_column_read_only(backend_param, contains_subsamples, classrepo, subsample_data_map, sample_data_map):
    co = classrepo.checkout(write=True)
    aset = co.add_ndarray_column(f'foo{backend_param}{int(contains_subsamples)}',
                                    shape=(5, 7), dtype=np.uint16,
                                    backend=backend_param, contains_subsamples=contains_subsamples)
    if contains_subsamples:
        aset.update(subsample_data_map)
    else:
        aset.update(sample_data_map)

    digest = co.commit(f'done {backend_param}{contains_subsamples}')
    co.close()
    nco = classrepo.checkout(write=False, commit=digest)
    yield nco.columns[f'foo{backend_param}{int(contains_subsamples)}']
    nco.close()


class TestPickleableColumns:

    def test_is_pickleable(self, initialized_column, sample_data_map, subsample_data_map):
        import pickle

        aset = initialized_column
        if aset.iswriteable:
            with pytest.raises(PermissionError, match='Method "__getstate__" cannot'):
                pickle.dumps(aset, protocol=pickle.HIGHEST_PROTOCOL)
        else:
            pkl = pickle.dumps(aset, protocol=pickle.HIGHEST_PROTOCOL)
            assert isinstance(pkl, bytes)


class TestLoadableColumns:

    def test_is_pickle_is_loadable(self, initialized_column_read_only, sample_data_map, subsample_data_map):
        import pickle

        aset = initialized_column_read_only
        pkl = pickle.dumps(aset, protocol=pickle.HIGHEST_PROTOCOL)
        assert isinstance(pkl, bytes)
        equiv = pickle.loads(pkl)

        if aset.contains_subsamples:
            assert len(aset) == len(subsample_data_map)
            assert len(equiv) == len(subsample_data_map)

            for sample_key, subsample_data in subsample_data_map.items():
                assert sample_key in aset
                assert sample_key in equiv
                aset_sample = aset[sample_key]
                equiv_sample = equiv[sample_key]
                assert len(aset_sample) == len(subsample_data)
                assert len(equiv_sample) == len(subsample_data)

                for subsample_key, expected in subsample_data.items():
                    assert subsample_key in aset_sample
                    assert subsample_key in equiv_sample
                    assert_equal(aset_sample[subsample_key], expected)
                    assert_equal(equiv_sample[subsample_key], expected)
        else:
            assert len(aset) == len(sample_data_map)
            assert len(equiv) == len(sample_data_map)
            for sample_key, expected in sample_data_map.items():
                assert sample_key in aset
                assert sample_key in equiv
                assert_equal(aset[sample_key], expected)
                assert_equal(equiv[sample_key], expected)
        equiv._destruct()
        del equiv


================================================
FILE: tests/test_commit_ref_verification.py
================================================
import pytest


def test_verify_corruption_in_commit_ref_alerts(two_commit_filled_samples_repo):
    from hangar.records.parsing import commit_ref_db_key_from_raw_key
    from hangar.records.parsing import commit_ref_raw_val_from_db_val
    from hangar.records.parsing import commit_ref_db_val_from_raw_val

    repo = two_commit_filled_samples_repo
    history = repo.log(return_contents=True)
    head_commit = history['head']

    refKey = commit_ref_db_key_from_raw_key(head_commit)
    with repo._env.refenv.begin(write=True) as txn:
        refVal = txn.get(refKey)
        ref_unpacked = commit_ref_raw_val_from_db_val(refVal)

        modified_ref = list(ref_unpacked.db_kvs)
        modified_ref[0] = list(modified_ref[0])
        modified_ref[0][1] = b'corrupt!'
        modified_ref[0] = tuple(modified_ref[0])
        modified_ref = tuple(modified_ref)
        modifiedVal = commit_ref_db_val_from_raw_val(modified_ref)

        txn.put(refKey, modifiedVal.raw, overwrite=True)

    with pytest.raises(IOError):
        _ = repo.checkout(write=True)
    with pytest.raises(IOError):
        _ = repo.checkout(write=False)
    with pytest.raises(IOError):
        _ = repo.checkout(write=False, commit=head_commit)


def test_verify_corruption_in_commit_parent_val_alerts(two_commit_filled_samples_repo):
    from hangar.records.parsing import commit_parent_db_key_from_raw_key
    from hangar.records.parsing import commit_parent_raw_val_from_db_val
    from hangar.records.parsing import commit_parent_db_val_from_raw_val

    repo = two_commit_filled_samples_repo
    history = repo.log(return_contents=True)
    head_commit = history['head']

    parentKey = commit_parent_db_key_from_raw_key(head_commit)
    with repo._env.refenv.begin(write=True) as txn:
        parentVal = txn.get(parentKey)

        parent_raw = commit_parent_raw_val_from_db_val(parentVal)
        parent = parent_raw.ancestor_spec
        modifiedVal = commit_parent_db_val_from_raw_val(
            master_ancestor='corrupt',
            dev_ancestor=parent.dev_ancestor,
            is_merge_commit=parent.is_merge_commit)

        txn.put(parentKey, modifiedVal.raw, overwrite=True)

    with pytest.raises(IOError):
        _ = repo.checkout(write=True)
    with pytest.raises(IOError):
        _ = repo.checkout(write=False)
    with pytest.raises(IOError):
        _ = repo.checkout(write=False, commit=head_commit)


def test_verify_corruption_in_spec_val_alerts(two_commit_filled_samples_repo):
    from hangar.records.parsing import commit_spec_db_key_from_raw_key
    from hangar.records.parsing import commit_spec_db_val_from_raw_val
    from hangar.records.parsing import commit_spec_raw_val_from_db_val

    repo = two_commit_filled_samples_repo
    history = repo.log(return_contents=True)
    head_commit = history['head']

    specKey = commit_spec_db_key_from_raw_key(head_commit)
    with repo._env.refenv.begin(write=True) as txn:
        specVal = txn.get(specKey)

        spec_raw = commit_spec_raw_val_from_db_val(specVal)
        modified_spec = spec_raw.user_spec
        modified_spec = modified_spec._replace(commit_time=10.42)
        modifiedVal = commit_spec_db_val_from_raw_val(*modified_spec)

        txn.put(specKey, modifiedVal.raw, overwrite=True)

    with pytest.raises(IOError):
        _ = repo.checkout(write=True)
    with pytest.raises(IOError):
        _ = repo.checkout(write=False)
    with pytest.raises(IOError):
        _ = repo.checkout(write=False, commit=head_commit)


================================================
FILE: tests/test_context_management.py
================================================
import pytest
import numpy as np

from conftest import fixed_shape_backend_params, variable_shape_backend_params

all_backend_params = list(set(fixed_shape_backend_params).union(set(variable_shape_backend_params)))


@pytest.mark.parametrize('backend1', all_backend_params)
@pytest.mark.parametrize('backend2', all_backend_params)
def test_nested_context_manager_does_not_close_all_open(repo, backend1, backend2):
    co = repo.checkout(write=True)
    fooaset = co.add_ndarray_column('foo', prototype=np.arange(10), backend=backend1)
    baraset = co.add_ndarray_column('bar', prototype=np.arange(10), backend=backend2, contains_subsamples=True)

    with co:
        assert co.columns._any_is_conman() is True
        assert fooaset._is_conman is True
        assert baraset._is_conman is True
        with fooaset as foo:
            assert co.columns._any_is_conman() is True
            assert foo._is_conman is True
            assert fooaset._is_conman is True
            assert baraset._is_conman is True
        assert co.columns._any_is_conman() is True
        assert fooaset._is_conman is True
        assert baraset._is_conman is True
    assert co.columns._any_is_conman() is False
    co.close()


================================================
FILE: tests/test_diff.py
================================================
import pytest
import numpy as np


class TestReaderWriterDiff(object):

    @pytest.mark.parametrize('writer', [False, True])
    def test_diff_by_commit_and_branch(self, repo_2_br_no_conf, writer):
        repo = repo_2_br_no_conf
        testco = repo.checkout(branch='testbranch')
        masterco = repo.checkout(write=writer, branch='master')
        commit_diffs = masterco.diff.commit(testco.commit_hash)
        branch_diffs = masterco.diff.branch('testbranch')
        assert commit_diffs == branch_diffs
        testco.close()
        masterco.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_diff_with_wrong_commit_hash(self, repo_2_br_no_conf, writer):
        repo = repo_2_br_no_conf
        testco = repo.checkout(branch='testbranch')
        masterco = repo.checkout(write=writer, branch='master')
        wrong_commit_hash = testco.commit_hash + 'WrongHash'
        with pytest.raises(ValueError):
            masterco.diff.commit(wrong_commit_hash)
        testco.close()
        masterco.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_diff_with_wrong_branch_name(self, repo_1_br_no_conf, writer):
        repo = repo_1_br_no_conf
        masterco = repo.checkout(write=writer, branch='master')
        with pytest.raises(ValueError):
            masterco.diff.branch('wrong_branch_name')
        masterco.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_comparing_diffs_of_dev_and_master(self, repo_1_br_no_conf, writer):
        repo = repo_1_br_no_conf
        dummyData = np.arange(50)

        # mutating and removing data from testbranch
        testco = repo.checkout(write=True, branch='testbranch')
        testco.columns['dummy']['1'] = dummyData
        del testco.columns['dummy']['2']
        testco.commit("mutation and removal")
        testco.close()

        co1 = repo.checkout(write=writer, branch='master')
        diffdata1 = co1.diff.branch('testbranch')
        diffs1 = diffdata1.diff
        co1.close()

        co2 = repo.checkout(write=writer, branch='testbranch')
        diffdata2 = co2.diff.branch('master')
        diffs2 = diffdata2.diff
        co2.close()

        assert diffs1.added.samples == diffs2.added.samples
        assert diffs1.deleted.samples == diffs2.deleted.samples
        assert diffs1.mutated.samples == diffs2.mutated.samples

    @pytest.mark.parametrize('writer', [False, True])
    def test_diff_data_samples(self, repo_1_br_no_conf, writer):
        repo = repo_1_br_no_conf
        dummyData = np.arange(50)

        # mutating and removing data from testbranch
        testco = repo.checkout(write=True, branch='testbranch')
        testco.columns['dummy']['1'] = dummyData
        del testco.columns['dummy']['2']
        testco.commit("mutation and removal")
        testco.close()

        co = repo.checkout(write=writer, branch='master')
        diffdata = co.diff.branch('testbranch')
        conflicts = diffdata.conflict
        assert conflicts.conflict is False

        diffs = diffdata.diff

        # testing columns and metadata that has no change
        assert len(diffs.added.samples) == 20
        assert len(diffs.mutated.samples) == 1
        assert len(diffs.deleted.samples) == 1

        assert len(diffs.added.schema) == 0
        assert len(diffs.deleted.schema) == 0
        assert len(diffs.mutated.schema) == 0

        for datarecord in diffs.added.samples:
            assert 9 < int(datarecord.sample) < 20
        for mutated in diffs.mutated.samples:
            assert mutated.sample == '1'
        co.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_sample_addition_conflict(self, repo_1_br_no_conf, writer):
        # t1
        repo = repo_1_br_no_conf
        dummyData = np.arange(50)

        # adding data in master
        co = repo.checkout(write=True, branch='master')
        dummyData[:] = 123
        co.columns['dummy']['55'] = dummyData
        co.commit('Adding data in master')
        co.close()

        # adding data in testbranch
        co = repo.checkout(write=True, branch='testbranch')
        dummyData[:] = 234
        co.columns['dummy']['55'] = dummyData
        co.commit('adding data in testbranch')
        co.close()

        co = repo.checkout(write=writer, branch='master')
        conflicts = co.diff.branch('testbranch').conflict
        assert conflicts.conflict is True
        assert len(conflicts.t1.samples) == 1
        for k in conflicts.t1.samples:
            assert k.sample == '55'
        co.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_sample_removal_conflict(self, repo_1_br_no_conf, writer):
        # t21 and t22
        dummyData = np.arange(50)
        dummyData[:] = 123
        repo = repo_1_br_no_conf
        co = repo.checkout(write=True, branch='master')
        del co.columns['dummy']['6']
        co.columns['dummy']['7'] = dummyData
        co.commit('removal & mutation in master')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        co.columns['dummy']['6'] = dummyData
        del co.columns['dummy']['7']
        co.commit('removal & mutation in dev')
        co.close()

        co = repo.checkout(write=writer, branch='master')
        conflicts = co.diff.branch('testbranch').conflict
        assert len(conflicts.t21.samples) == 1
        assert len(conflicts.t22.samples) == 1
        for k in conflicts.t21.samples:
            assert k.sample == '6'
        for k in conflicts.t22.samples:
            assert k.sample == '7'
        co.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_sample_mutation_conflict(self, repo_1_br_no_conf, writer):
        # t3
        dummyData = np.arange(50)
        dummyData[:] = 123
        repo = repo_1_br_no_conf
        co = repo.checkout(write=True, branch='master')
        co.columns['dummy']['7'] = dummyData
        co.commit('mutation in master')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        dummyData[:] = 234
        co.columns['dummy']['7'] = dummyData
        co.commit('mutation in dev')
        co.close()

        co = repo.checkout(write=writer, branch='master')
        conflicts = co.diff.branch('testbranch').conflict
        assert len(conflicts.t3.samples) == 1
        for k in conflicts.t3.samples:
            assert k.sample == '7'
        co.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_aset_addition_conflict(self, aset_samples_initialized_repo, writer):
        # t1
        repo = aset_samples_initialized_repo

        repo.create_branch('testbranch')
        co = repo.checkout(write=True, branch='master')
        co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float64)
        co.commit('aset init in master')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        co.add_ndarray_column(name='testing_aset', shape=(7, 7), dtype=np.float64)
        co.commit('aset init in dev')
        co.close()

        co = repo.checkout(write=writer, branch='master')
        conflicts = co.diff.branch('testbranch').conflict
        assert len(conflicts.t1.schema) == 1
        for k in conflicts.t1.schema:
            assert k.column == 'testing_aset'
        co.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_aset_removal_conflict(self, aset_samples_initialized_repo, writer):
        # t21 and t22
        repo = aset_samples_initialized_repo
        co = repo.checkout(write=True, branch='master')
        co.add_ndarray_column(name='testing_aset1', shape=(5, 7), dtype=np.float64)
        co.add_ndarray_column(name='testing_aset2', shape=(5, 7), dtype=np.float64)
        co.commit('added asets')
        co.close()
        repo.create_branch('testbranch')

        co = repo.checkout(write=True, branch='master')
        del co.columns['testing_aset1']
        del co.columns['testing_aset2']
        co.add_ndarray_column(name='testing_aset2', shape=(5, 7), dtype=np.float32)
        co.commit('mutation and removal from master')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        del co.columns['testing_aset1']
        del co.columns['testing_aset2']
        co.add_ndarray_column(name='testing_aset1', shape=(5, 7), dtype=np.float32)
        co.commit('mutation and removal from dev')
        co.close()

        co = repo.checkout(write=writer, branch='master')
        conflicts = co.diff.branch('testbranch')[1]
        assert len(conflicts.t21.schema) == 1
        assert len(conflicts.t22.schema) == 1
        assert list(conflicts.t21.schema.keys())[0].column == 'testing_aset1'
        assert list(conflicts.t22.schema.keys())[0].column == 'testing_aset2'
        co.close()

    @pytest.mark.parametrize('writer', [False, True])
    def test_aset_mutation_conflict(self, aset_samples_initialized_repo, writer):
        # t3
        repo = aset_samples_initialized_repo
        co = repo.checkout(write=True, branch='master')
        co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float64)
        co.commit('added aset')
        co.close()
        repo.create_branch('testbranch')

        co = repo.checkout(write=True, branch='master')
        del co.columns['testing_aset']
        co.add_ndarray_column(name='testing_aset', shape=(7, 7), dtype=np.float64)
        co.commit('mutation from master')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        del co.columns['testing_aset']
        co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float32)
        co.commit('mutation from dev')
        co.close()

        co = repo.checkout(write=writer, branch='master')
        conflicts = co.diff.branch('testbranch')[1]
        assert len(conflicts.t3.schema) == 1
        assert list(conflicts.t3.schema.keys())[0].column == 'testing_aset'
        co.close()


    @pytest.mark.parametrize('writer', [False, True])
    def test_commits_inside_cm(self, aset_samples_initialized_repo, array5by7, writer):
        repo = aset_samples_initialized_repo
        repo.create_branch('testbranch')
        co = repo.checkout(write=True, branch='testbranch')
        aset = co.columns['writtenaset']
        aset2 = co.add_ndarray_column('aset2', prototype=array5by7)
        aset2[1] = array5by7
        with aset:
            aset[100] = array5by7
            co.commit('inside cm')
            aset[101] = array5by7
            co.commit('another commit inside cm')
        co.close()
        co = repo.checkout(write=writer, branch='testbranch')
        assert np.allclose(co.columns['writtenaset'][101], array5by7)
        diff = co.diff.branch('master').diff
        assert 'aset2' in [x.column for x in diff.added.schema.keys()]
        calledWithAset = False
        for record in diff.added.samples:
            if record.column == 'writtenaset':
                calledWithAset = True
                assert record.sample in [100, 101]
        assert calledWithAset is True
        co.close()


class TestWriterDiff(object):

    def test_status_and_staged_column(self, aset_samples_initialized_repo):
        repo = aset_samples_initialized_repo
        co = repo.checkout(write=True)
        co.add_str_column('DOESNOTEXIST')
        co['DOESNOTEXIST'][1] = 'foo'
        assert co.diff.status() == 'DIRTY'
        co.commit('init metadata')
        assert co.diff.status() == 'CLEAN'
        co.close()

    def test_status_and_staged_samples(self, aset_samples_initialized_repo):
        dummyData = np.zeros((5, 7))
        repo = aset_samples_initialized_repo
        co = repo.checkout()
        with pytest.raises(AttributeError):
            co.diff.status()  # Read checkout doesn't have status()

        co = repo.checkout(write=True)
        co.columns['writtenaset']['45'] = dummyData
        assert co.diff.status() == 'DIRTY'
        diff = co.diff.staged()
        calledWithAset = False
        for record in diff.diff.added.samples:
            if record.column == 'writtenaset':
                calledWithAset = True
                assert record.sample in '45'
        assert calledWithAset is True
        co.commit('adding')
        assert co.diff.status() == 'CLEAN'
        co.close()

    def test_status_and_staged_aset(self, aset_samples_initialized_repo):
        repo = aset_samples_initialized_repo
        co = repo.checkout(write=True)
        co.add_ndarray_column(name='sampleaset', shape=(3, 5), dtype=np.float32)
        assert co.diff.status() == 'DIRTY'
        diff = co.diff.staged()
        assert 'sampleaset' in [x.column for x in diff.diff.added.schema]
        co.commit('init aset')
        assert co.diff.status() == 'CLEAN'
        co.close()


def test_repo_diff_method_branch_names(aset_samples_initialized_repo):
    # t3
    repo = aset_samples_initialized_repo
    co = repo.checkout(write=True, branch='master')
    co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float64)
    co.commit('added aset')
    co.close()
    repo.create_branch('testbranch')

    co = repo.checkout(write=True, branch='master')
    del co.columns['testing_aset']
    co.add_ndarray_column(name='testing_aset', shape=(7, 7), dtype=np.float64)
    masterHEAD = co.commit('mutation from master')
    co.close()

    co = repo.checkout(write=True, branch='testbranch')
    del co.columns['testing_aset']
    co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float32)
    devHEAD = co.commit('mutation from dev')
    co.close()

    co = repo.checkout(write=False, branch='master')
    co_diff = co.diff.branch('testbranch')
    co.close()

    repo_diff = repo.diff('master', 'testbranch')
    assert co_diff == repo_diff


def test_repo_diff_method_commit_digests(aset_samples_initialized_repo):
    # t3
    repo = aset_samples_initialized_repo
    co = repo.checkout(write=True, branch='master')
    co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float64)
    co.commit('added aset')
    co.close()
    repo.create_branch('testbranch')

    co = repo.checkout(write=True, branch='master')
    del co.columns['testing_aset']
    co.add_ndarray_column(name='testing_aset', shape=(7, 7), dtype=np.float64)
    masterHEAD = co.commit('mutation from master')
    co.close()

    co = repo.checkout(write=True, branch='testbranch')
    del co.columns['testing_aset']
    co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float32)
    devHEAD = co.commit('mutation from dev')
    co.close()

    co = repo.checkout(write=False, branch='master')
    co_diff = co.diff.commit(devHEAD)
    co.close()

    repo_diff = repo.diff(masterHEAD, devHEAD)
    assert co_diff == repo_diff




def test_repo_diff_method_one_branch_one_commit_digest(aset_samples_initialized_repo):
    # t3
    repo = aset_samples_initialized_repo
    co = repo.checkout(write=True, branch='master')
    co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float64)
    co.commit('added aset')
    co.close()
    repo.create_branch('testbranch')

    co = repo.checkout(write=True, branch='master')
    del co.columns['testing_aset']
    co.add_ndarray_column(name='testing_aset', shape=(7, 7), dtype=np.float64)
    masterHEAD = co.commit('mutation from master')
    co.close()

    co = repo.checkout(write=True, branch='testbranch')
    del co.columns['testing_aset']
    co.add_ndarray_column(name='testing_aset', shape=(5, 7), dtype=np.float32)
    devHEAD = co.commit('mutation from dev')
    co.close()

    co = repo.checkout(write=False, branch='master')
    co_diff = co.diff.commit(devHEAD)
    co.close()

    repo_diff1 = repo.diff('master', devHEAD)
    assert co_diff == repo_diff1

    repo_diff2 = repo.diff(masterHEAD, 'testbranch')
    assert co_diff == repo_diff2


================================================
FILE: tests/test_diff_staged_summary.py
================================================
import pytest
import numpy as np


def test_add_samples_to_existing_column(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo_20_filled_samples2
    expected = '============ \n'\
               '| Branch: master \n'\
               ' \n'\
               '============ \n'\
               '| ADDED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 20 \n'\
               '|  - "dummy": 20 \n'\
               ' \n'\
               '============ \n'\
               '| DELETED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 0 \n'\
               ' \n'\
               '============ \n'\
               '| MUTATED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 0 \n'\
               ' \n'
    dummyData = np.arange(50).astype(np.int64)
    co2 = repo.checkout(write=True)
    for idx in range(10, 20):
        dummyData[:] = idx
        co2.columns['dummy'][str(idx)] = dummyData
        co2.columns['dummy'][idx] = dummyData
    df = co2.diff.staged()
    co2.close()
    assert status(repo._env.hashenv, 'master', df.diff).getvalue() == expected


def test_mutate_sample_values(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo = repo_20_filled_samples2
    expected = '============ \n'\
               '| Branch: master \n'\
               ' \n'\
               '============ \n'\
               '| ADDED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 0 \n'\
               ' \n'\
               '============ \n'\
               '| DELETED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 0 \n'\
               ' \n'\
               '============ \n'\
               '| MUTATED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 5 \n'\
               '|  - "dummy": 5 \n'\
               ' \n'

    dummyData = np.arange(50).astype(np.int64)
    co2 = repo.checkout(write=True)
    for idx in range(5, 10):
        dummyData[:] = idx + 10
        co2.columns['dummy'][idx] = dummyData
    df = co2.diff.staged()
    co2.close()
    assert status(repo._env.hashenv, 'master', df.diff).getvalue() == expected


def test_delete_samples(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo_20_filled_samples2
    expected = '============ \n'\
               '| Branch: master \n'\
               ' \n'\
               '============ \n'\
               '| ADDED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 0 \n'\
               ' \n'\
               '============ \n'\
               '| DELETED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 5 \n'\
               '|  - "dummy": 5 \n'\
               ' \n'\
               '============ \n'\
               '| MUTATED \n'\
               '|---------- \n'\
               '| Schema: 0 \n'\
               '|---------- \n'\
               '| Samples: 0 \n'\
               ' \n'

    co2 = repo.checkout(write=True)
    for idx in range(5, 10):
        del co2.columns['dummy'][idx]
    df = co2.diff.staged()
    co2.close()
    assert status(repo._env.hashenv, 'master', df.diff).getvalue() == expected


def test_add_new_column_schema_and_samples(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo_20_filled_samples2
    expected = (
        '============ \n'
        '| Branch: master \n'
        ' \n'
        '============ \n'
        '| ADDED \n'
        '|---------- \n'
        '| Schema: 1 \n'
        '|  - "new_aset": \n'
        '|       digest="1=555a833b66ab" \n'
        '|       column_layout: flat \n'
        '|       column_type: ndarray \n'
        '|       schema_hasher_tcode: 1 \n'
        '|       data_hasher_tcode: 0 \n'
        '|       schema_type: fixed_shape \n'
        '|       shape: (10, 10) \n'
        '|       dtype: float32 \n'
        '|       backend: 01 \n'
        '|       backend_options: {\'complib\': \'blosc:lz4hc\', \'complevel\': 5, \'shuffle\': \'byte\'} \n'
        '|---------- \n'
        '| Samples: 5 \n'
        '|  - "new_aset": 5 \n'
        ' \n'
        '============ \n'
        '| DELETED \n'
        '|---------- \n'
        '| Schema: 0 \n'
        '|---------- \n'
        '| Samples: 0 \n'
        ' \n'
        '============ \n'
        '| MUTATED \n'
        '|---------- \n'
        '| Schema: 0 \n'
        '|---------- \n'
        '| Samples: 0 \n'
        ' \n'
    )
    co2 = repo.checkout(write=True)
    co2.add_ndarray_column('new_aset', shape=(10, 10), dtype=np.float32)
    for idx in range(5):
        dummyData = np.random.randn(10, 10).astype(np.float32)
        co2.columns['new_aset'][idx] = dummyData
    df = co2.diff.staged()
    co2.close()
    result = status(repo._env.hashenv, 'master', df.diff).getvalue()
    assert result == expected


def test_add_new_column_schema_and_sample_and_delete_old_column(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo_20_filled_samples2
    expected = (
        '============ \n'
        '| Branch: master \n'
        ' \n'
        '============ \n'
        '| ADDED \n'
        '|---------- \n'
        '| Schema: 1 \n'
        '|  - "new_aset": \n'
        '|       digest="1=555a833b66ab" \n'
        '|       column_layout: flat \n'
        '|       column_type: ndarray \n'
        '|       schema_hasher_tcode: 1 \n'
        '|       data_hasher_tcode: 0 \n'
        '|       schema_type: fixed_shape \n'
        '|       shape: (10, 10) \n'
        '|       dtype: float32 \n'
        '|       backend: 01 \n'
        '|       backend_options: {\'complib\': \'blosc:lz4hc\', \'complevel\': 5, \'shuffle\': \'byte\'} \n'
        '|---------- \n'
        '| Samples: 5 \n'
        '|  - "new_aset": 5 \n'
        ' \n'
        '============ \n'
        '| DELETED \n'
        '|---------- \n'
        '| Schema: 1 \n'
        '|  - "dummy": \n'
        '|       digest="1=18599cd5ea25" \n'
        '|       column_layout: flat \n'
        '|       column_type: ndarray \n'
        '|       schema_hasher_tcode: 1 \n'
        '|       data_hasher_tcode: 0 \n'
        '|       schema_type: fixed_shape \n'
        '|       shape: (50,) \n'
        '|       dtype: int64 \n'
        '|       backend: 10 \n'
        '|       backend_options: {} \n'
        '|---------- \n'
        '| Samples: 10 \n'
        '|  - "dummy": 10 \n'
        ' \n'
        '============ \n'
        '| MUTATED \n'
        '|---------- \n'
        '| Schema: 0 \n'
        '|---------- \n'
        '| Samples: 0 \n'
        ' \n'
    )
    co2 = repo.checkout(write=True)
    new = co2.add_ndarray_column('new_aset', shape=(10, 10), dtype=np.float32)
    for idx in range(5):
               dummyData = np.random.randn(10, 10).astype(np.float32)
               co2.columns['new_aset'][idx] = dummyData
    del co2.columns['dummy']
    df = co2.diff.staged()
    co2.close()
    result = status(repo._env.hashenv, 'master', df.diff).getvalue()
    assert result == expected


def test_add_new_schema_and_samples_and_change_old_backend(repo_20_filled_samples2):
    from hangar.records.summarize import status
    repo = repo_20_filled_samples2
    expected = (
        '============ \n'
        '| Branch: master \n'
        ' \n'
        '============ \n'
        '| ADDED \n'
        '|---------- \n'
        '| Schema: 1 \n'
        '|  - "new_aset": \n'
        '|       digest="1=555a833b66ab" \n'
        '|       column_layout: flat \n'
        '|       column_type: ndarray \n'
        '|       schema_hasher_tcode: 1 \n'
        '|       data_hasher_tcode: 0 \n'
        '|       schema_type: fixed_shape \n'
        '|       shape: (10, 10) \n'
        '|       dtype: float32 \n'
        '|       backend: 01 \n'
        '|       backend_options: {\'complib\': \'blosc:lz4hc\', \'complevel\': 5, \'shuffle\': \'byte\'} \n'
        '|---------- \n'
        '| Samples: 5 \n'
        '|  - "new_aset": 5 \n'
        ' \n'
        '============ \n'
        '| DELETED \n'
        '|---------- \n'
        '| Schema: 0 \n'
        '|---------- \n'
        '| Samples: 0 \n'
        ' \n'
        '============ \n'
        '| MUTATED \n'
        '|---------- \n'
        '| Schema: 1 \n'
        '|  - "dummy": \n'
        '|       digest="1=5d6cc8241705" \n'
        '|       column_layout: flat \n'
        '|       column_type: ndarray \n'
        '|       schema_hasher_tcode: 1 \n'
        '|       data_hasher_tcode: 0 \n'
        '|       schema_type: fixed_shape \n'
        '|       shape: (50,) \n'
        '|       dtype: int64 \n'
        '|       backend: 00 \n'
        '|       backend_options: {\'complib\': \'blosc:lz4hc\', \'complevel\': 5, \'shuffle\': \'byte\'} \n'
        '|---------- \n'
        '| Samples: 5 \n'
        '|  - "dummy": 5 \n'
        ' \n'
    )
    co2 = repo.checkout(write=True)
    co2.columns['dummy'].change_backend('00')
    co2.add_ndarray_column('new_aset', shape=(10, 10), dtype=np.float32)
    for idx in range(5):
        dummyData = np.random.randn(10, 10).astype(np.float32)
        co2.columns['new_aset'][idx] = dummyData
        co2.columns['dummy'][idx] = np.arange(50).astype(np.int64) + idx
    df = co2.diff.staged()
    co2.close()
    result = status(repo._env.hashenv, 'master', df.diff).getvalue()
    assert result == expected


================================================
FILE: tests/test_initiate.py
================================================
import os
import pytest
from hangar import Repository


def test_imports():
    import hangar
    from hangar import Repository


def test_starting_up_repo_warns_should_exist_no_args(managed_tmpdir):
    with pytest.warns(UserWarning):
        repo = Repository(path=managed_tmpdir)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    assert repo.list_branches() == ['master']
    assert os.path.isdir(repo._repo_path)
    assert str(repo._repo_path) == os.path.join(managed_tmpdir, '.hangar')
    co = repo.checkout(write=True)
    assert co.diff.status() == 'CLEAN'
    co.close()
    repo._env._close_environments()


def test_starting_up_repo_warns_should_exist_manual_args(managed_tmpdir):
    with pytest.warns(UserWarning):
        repo = Repository(path=managed_tmpdir, exists=True)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    assert repo.list_branches() == ['master']
    assert os.path.isdir(repo._repo_path)
    assert str(repo._repo_path) == os.path.join(managed_tmpdir, '.hangar')
    co = repo.checkout(write=True)
    assert co.diff.status() == 'CLEAN'
    co.close()
    repo._env._close_environments()


def test_starting_up_repo_does_not_warn_not_exist_manual_args(managed_tmpdir):
    with pytest.warns(None) as warn_recs:
        repo = Repository(path=managed_tmpdir, exists=False)
    assert len(warn_recs) == 0

    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    assert repo.list_branches() == ['master']
    assert os.path.isdir(repo._repo_path)
    assert str(repo._repo_path) == os.path.join(managed_tmpdir, '.hangar')
    co = repo.checkout(write=True)
    assert co.diff.status() == 'CLEAN'
    co.close()
    repo._env._close_environments()


def test_initial_read_checkout(managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    with pytest.raises(ValueError):
        repo.checkout()
    repo._env._close_environments()


def test_initial_arrayset(managed_tmpdir, randomsizedarray):
    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)

    w_checkout = repo.checkout(write=True)
    assert len(w_checkout.columns) == 0
    with pytest.raises(KeyError):
        w_checkout.columns['aset']
    aset = w_checkout.add_ndarray_column('aset', prototype=randomsizedarray)
    assert aset.column == 'aset'
    w_checkout.close()
    repo._env._close_environments()


def test_empty_commit(managed_tmpdir, caplog):
    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    w_checkout = repo.checkout(write=True)
    with pytest.raises(RuntimeError):
        w_checkout.commit('this is a merge message')
    w_checkout.close()
    repo._env._close_environments()


def test_cannot_operate_without_repo_init(managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)

    with pytest.raises(RuntimeError):
        repo.writer_lock_held()
    with pytest.raises(RuntimeError):
        repo.checkout()
    with pytest.raises(RuntimeError):
        repo.writer_lock_held()
    with pytest.raises(RuntimeError):
        repo.log()
    with pytest.raises(RuntimeError):
        repo.summary()
    with pytest.raises(RuntimeError):
        repo.merge('fail', 'master', 'nonexistant')
    with pytest.raises(RuntimeError):
        repo.create_branch('test')
    with pytest.raises(RuntimeError):
        repo.list_branches()
    with pytest.raises(RuntimeError):
        repo.force_release_writer_lock()

    with pytest.raises(RuntimeError):
        repo.remote.add('origin', 'foo')
    with pytest.raises(RuntimeError):
        repo.remote.remove('origin')
    with pytest.raises(RuntimeError):
        repo.remote.fetch('origin', 'master')
    with pytest.raises(RuntimeError):
        repo.remote.fetch_data('origin', branch='master')
    with pytest.raises(RuntimeError):
        repo.remote.list_all()
    with pytest.raises(RuntimeError):
        repo.remote.ping('origin')
    with pytest.raises(RuntimeError):
        repo.remote.push('origin', 'master')
    with pytest.raises(RuntimeError):
        repo.remove_branch('master')

    with pytest.raises(RuntimeError):
        repo.path
    with pytest.raises(RuntimeError):
        repo.version
    with pytest.raises(RuntimeError):
        repo.writer_lock_held
    with pytest.raises(RuntimeError):
        repo.size_human
    with pytest.raises(RuntimeError):
        repo.size_nbytes

    assert repo._env.repo_is_initialized is False


def test_check_repo_size(repo_20_filled_samples):
    from hangar.utils import parse_bytes, folder_size

    expected_nbytes = folder_size(repo_20_filled_samples._repo_path, recurse=True)
    nbytes = repo_20_filled_samples.size_nbytes
    assert expected_nbytes == nbytes

    format_nbytes = repo_20_filled_samples.size_human
    # account for rounding when converting int to str.
    assert nbytes * 0.95 <= parse_bytes(format_nbytes) <= nbytes * 1.05


def test_force_release_writer_lock(managed_tmpdir, monkeypatch):

    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    co = repo.checkout(write=True)
    orig_lock = str(co._writer_lock)

    def mock_true(*args, **kwargs):
        return True

    # try to release the writer lock with a process which has different uid
    co._writer_lock = 'lololol'
    with pytest.raises(RuntimeError):
        monkeypatch.setattr(co, '_verify_alive', mock_true)
        monkeypatch.setattr(co._columns, '_destruct', mock_true)
        co.close()
    # replace, but rest of object is closed
    monkeypatch.setattr(co, '_writer_lock', orig_lock)
    monkeypatch.delattr(co._columns, '_destruct')
    co.close()
    repo._env._close_environments()


def test_force_release_writer_lock_works(managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    co = repo.checkout(write=True)

    # try to release the writer lock with a process which has different uid
    with pytest.warns(ResourceWarning):
        repo.force_release_writer_lock()

    co._writer_lock == 'LOCK_AVAILABLE'
    co.close()
    # replace, but rest of object is closed
    repo._env._close_environments()


def test_repo_summary_does_not_error_before_any_commit_made(capfd, managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)

    assert repo.summary() is None
    out, _ = capfd.readouterr()
    assert 'No commits have been made in the repository' in out
    repo._env._close_environments()


def test_get_ecosystem_details(managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)
    repo.init(user_name='tester', user_email='foo@test.bar', remove_old=True)
    eco = repo._ecosystem_details()
    assert isinstance(eco, dict)
    assert 'host' in eco
    assert 'packages' in eco
    for package_name, version in eco['packages']:
        assert version is not None
    repo._env._close_environments()


def test_inject_repo_version(monkeypatch):
    import hangar
    monkeypatch.setattr("hangar.__version__", '0.2.0')
    assert hangar.__version__ == '0.2.0'


def test_check_repository_version(aset_samples_initialized_repo):
    from hangar import __version__
    from pkg_resources import parse_version

    repo = aset_samples_initialized_repo
    assert repo.version == parse_version(__version__).public


def test_check_repository_software_version_startup(managed_tmpdir):
    from hangar import Repository, __version__
    from pkg_resources import parse_version

    repo = Repository(managed_tmpdir, exists=False)
    repo.init('test user', 'test@foo.bar', remove_old=True)
    repo._env._close_environments()

    nrepo = Repository(managed_tmpdir, exists=True)
    assert nrepo.initialized is True
    assert nrepo.version == parse_version(__version__).public
    nrepo._env._close_environments()


@pytest.mark.parametrize('repo_v,hangar_v', [
    ['0.2.0', '0.3.0'],
    ['0.2.0', '0.3.1rc1'],
    ['0.2.0', '0.3.1.dev0'],
    ['0.2.0', '0.3.1'],
    ['0.3.0', '0.4.1.dev0'],
    ['0.3.0', '0.4.1rc1'],
    ['0.3.0', '0.4.0'],
    ['0.3.0', '0.4.1'],
    ['0.4.0', '0.5.0.dev0'],
    ['0.4.0', '0.5.0rc1'],
    ['0.4.0', '0.5.0'],
    ['0.4.0', '0.5.1'],
    ['0.5.0.dev0', '0.4.0'],
    ['0.5.0.dev0', '0.4.1'],
    ['0.5.0', '0.4.1'],
])
def test_check_repository_software_version_fails_hangar_version(monkeypatch, managed_tmpdir, repo_v, hangar_v):
    import hangar
    monkeypatch.setattr("hangar.__version__", hangar_v)
    monkeypatch.setattr("hangar.context.__version__", hangar_v)
    from hangar import Repository
    from hangar.records.vcompat import set_repository_software_version

    repo = Repository(managed_tmpdir, exists=False)
    repo.init('test user', 'test@foo.bar', remove_old=True)
    # force writing of new software version. should trigger error on next read.
    set_repository_software_version(repo._env.branchenv, repo_v, overwrite=True)
    try:
        assert repo.version == repo_v
    finally:
        repo._env._close_environments()

    assert hangar.__version__ == hangar_v

    with pytest.raises(RuntimeError):
        Repository(managed_tmpdir, exists=True)


@pytest.mark.parametrize('futureVersion', ['1.0.0', '0.14.1', '0.15.0', '1.4.1'])
def test_check_repository_software_version_works_on_newer_hangar_version(managed_tmpdir, monkeypatch, futureVersion):
    from hangar import Repository

    repo = Repository(managed_tmpdir, exists=False)
    repo.init('test user', 'test@foo.bar', remove_old=True)
    old_version = repo.version
    # force writing of new software version. should trigger error on next read.
    repo._env._close_environments()

    import hangar
    monkeypatch.setattr(hangar, '__version__', futureVersion)
    nrepo = Repository(managed_tmpdir, exists=True)
    assert hangar.__version__ == futureVersion
    assert nrepo.version == old_version
    nrepo._env._close_environments()


================================================
FILE: tests/test_merging.py
================================================
import pytest
import numpy as np


def test_merge_fails_with_invalid_branch_name(repo_1_br_no_conf):
    with pytest.raises(ValueError):
        cmt_hash = repo_1_br_no_conf.merge('merge commit', 'master', 'failbranchname')
    # no message passed in
    with pytest.raises(TypeError):
        cmt_hash = repo_1_br_no_conf.merge('master', 'testbranch')


def test_is_ff_merge(repo_1_br_no_conf):
    testbranch_head = repo_1_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    cmt_hash = repo_1_br_no_conf.merge('merge commit', 'master', 'testbranch')
    assert cmt_hash == testbranch_head


def test_writer_checkout_ff_merge(repo_1_br_no_conf):
    testbranch_head = repo_1_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    co = repo_1_br_no_conf.checkout(write=True, branch='master')
    master_head = co.commit_hash
    mergeHash = co.merge('dummy message', 'testbranch')
    assert mergeHash == testbranch_head
    assert mergeHash != master_head
    assert co.branch_name == 'master'
    co.close()

    master_order = repo_1_br_no_conf.log(branch='testbranch', return_contents=True)['order']
    tesbranch_order = repo_1_br_no_conf.log(branch='master', return_contents=True)['order']
    assert master_order == tesbranch_order


def test_merge_fails_if_changes_staged(repo_1_br_no_conf):
    co = repo_1_br_no_conf.checkout(write=True, branch='master')
    co.add_str_column('DOESNOTEXIST')
    co['DOESNOTEXIST'][2] = 'lol'
    co.close()
    with pytest.raises(RuntimeError, match='Changes are currently pending'):
        repo_1_br_no_conf.merge('merge commit', 'master', 'testbranch')


def test_writer_checkout_merge_fails_if_changes_staged(repo_1_br_no_conf):
    co = repo_1_br_no_conf.checkout(write=True, branch='master')
    co.add_str_column('DOESNOTEXIST')
    co['DOESNOTEXIST'][2] = 'lol'
    with pytest.raises(RuntimeError, match='Changes are currently pending'):
        co.merge('merge commit', 'testbranch')
    co.close()


def test_ff_merge_no_conf_correct_contents_for_name_or_hash_checkout(repo_1_br_no_conf):
    cmt_hash = repo_1_br_no_conf.merge('merge commit', 'master', 'testbranch')
    coByName = repo_1_br_no_conf.checkout(branch='master')
    coByHash = repo_1_br_no_conf.checkout(commit=cmt_hash)

    assert len(coByHash.columns) == len(coByName.columns)
    for asetn in coByHash.columns.keys():
        aset_byHash = coByHash.columns[asetn]
        aset_byName = coByName.columns[asetn]
        assert len(aset_byHash) == len(aset_byHash)
        for k, v in aset_byHash.items():
            assert np.allclose(v, aset_byName[k])

    coByHash.close()
    coByName.close()


def test_ff_merge_no_conf_updates_head_commit_of_branches(repo_1_br_no_conf):
    repo = repo_1_br_no_conf
    co = repo.checkout(write=True, branch='master')
    co.close()
    repo.create_branch('NotUpdatedBranch')
    old_branch_head = repo.log(branch='NotUpdatedBranch', return_contents=True)['head']

    cmt_hash = repo.merge('merge commit', 'master', 'testbranch')
    master_head = repo.log(branch='master', return_contents=True)['head']
    testbranch_head = repo.log(branch='testbranch', return_contents=True)['head']
    assert master_head == testbranch_head
    assert cmt_hash == master_head

    check_old_branch = repo.log(branch='NotUpdatedBranch', return_contents=True)['head']
    assert check_old_branch == old_branch_head
    assert check_old_branch != master_head


def test_is_3_way_merge(repo_2_br_no_conf):
    testbranch_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    masterbranch_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']
    cmt_hash = repo_2_br_no_conf.merge('merge commit', 'master', 'testbranch')
    assert cmt_hash != testbranch_head
    assert cmt_hash != masterbranch_head


def test_writer_checkout_is_3_way_merge(repo_2_br_no_conf):
    testbranch_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    masterbranch_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']
    co = repo_2_br_no_conf.checkout(write=True, branch='master')
    cmt_hash = co.merge('merge commit', 'testbranch')
    co.close()
    assert cmt_hash != testbranch_head
    assert cmt_hash != masterbranch_head


def test_3_way_merge_no_conflict_correct_contents(repo_2_br_no_conf):
    cmt_hash = repo_2_br_no_conf.merge('merge commit', 'master', 'testbranch')
    co = repo_2_br_no_conf.checkout(branch='master')
    # columns
    assert len(co.columns) == 1
    assert 'dummy' in co.columns
    # column samples
    aset = co.columns['dummy']
    assert len(aset) == 50

    # column sample values
    checkarr = np.zeros_like(np.arange(50))
    for k, v in aset.items():
        checkarr[:] = int(k)
        assert np.allclose(v, checkarr)

    # column sample keys
    aset_keys = list(aset.keys())
    for genKey in range(30):
        assert str(genKey) in aset_keys
        aset_keys.remove(str(genKey))
    assert len(aset_keys) == 20
    co.close()


def test_writer_checkout_3_way_merge_no_conflict_correct_contents(repo_2_br_no_conf):
    co = repo_2_br_no_conf.checkout(write=True, branch='master')
    cmt_hash = co.merge('merge commit', 'testbranch')

    # columns
    assert len(co.columns) == 1
    assert 'dummy' in co.columns
    # column samples
    aset = co.columns['dummy']
    assert len(aset) == 50

    # column sample values
    checkarr = np.zeros_like(np.arange(50))
    for k, v in aset.items():
        checkarr[:] = int(k)
        assert np.allclose(v, checkarr)

    # column sample keys
    aset_keys = list(aset.keys())
    for genKey in range(30):
        assert str(genKey) in aset_keys
        aset_keys.remove(str(genKey))
    assert len(aset_keys) == 20
    co.close()


def test_3_way_merge_no_conflict_and_mutation_correct_contents(repo_2_br_no_conf):
    co = repo_2_br_no_conf.checkout(write=True, branch='master')
    co.columns['dummy']['1'] = co.columns['dummy']['0']
    co.commit('mutated master')
    co.close()

    co = repo_2_br_no_conf.checkout(write=True, branch='testbranch')
    co.columns['dummy']['2'] = co.columns['dummy']['0']
    co.commit('mutated testbranch')
    co.close()

    repo_2_br_no_conf.merge('merge commit', 'master', 'testbranch')
    co = repo_2_br_no_conf.checkout(branch='master')

    # columns
    assert len(co.columns) == 1
    assert 'dummy' in co.columns
    # column samples
    aset = co.columns['dummy']
    assert len(aset) == 50

    # column sample values
    checkarr = np.zeros_like(np.arange(50))
    for k, v in aset.items():
        if k == '2':
            checkarr[:] = 0
        elif k == '1':
            checkarr[:] = 0
        else:
            checkarr[:] = int(k)
        assert np.allclose(v, checkarr)

    # column sample keys
    aset_keys = list(aset.keys())
    for genKey in range(30):
        assert str(genKey) in aset_keys
        aset_keys.remove(str(genKey))
    assert len(aset_keys) == 20
    co.close()


def test_3_way_merge_updates_head_commit_of_branches(repo_2_br_no_conf):
    orig_testbranch_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    orig_masterbranch_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']

    cmt_hash = repo_2_br_no_conf.merge('merge commit', 'master', 'testbranch')

    new_testbranch_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    new_masterbranch_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']

    assert orig_testbranch_head == new_testbranch_head
    assert orig_masterbranch_head != new_masterbranch_head
    assert new_masterbranch_head == cmt_hash


def test_writer_checkout_3_way_merge_updates_head_commit_of_branches(repo_2_br_no_conf):
    orig_testbranch_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    orig_masterbranch_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']

    co = repo_2_br_no_conf.checkout(write=True, branch='master')
    cmt_hash = co.merge('merge commit', 'testbranch')
    assert cmt_hash == co.commit_hash
    co.close()

    new_testbranch_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    new_masterbranch_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']
    assert orig_testbranch_head == new_testbranch_head
    assert orig_masterbranch_head != new_masterbranch_head
    assert new_masterbranch_head == cmt_hash


class TestArraysetSampleConflicts(object):

    def test_conflict_additions_same_str_name_different_value(self, repo_2_br_no_conf):
        newdata = np.arange(50)
        newdata = newdata * 2

        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        co.columns['dummy']['15'] = newdata
        co.commit('commit on master with conflicting data')
        co.close()

        with pytest.raises(ValueError):
            repo.merge('merge commit', 'master', 'testbranch')

    def test_conflict_additions_same_int_name_different_value(self, repo_2_br_no_conf):
        newdata = np.arange(50)
        newdata = newdata * 2

        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        co.columns['dummy'][15] = newdata
        co.commit('commit on master with conflicting data')
        co.close()

        with pytest.raises(ValueError):
            repo.merge('merge commit', 'master', 'testbranch')

    def test_conflict_additions_same_str_and_int_name_different_value(self, repo_2_br_no_conf):
        newdata = np.arange(50)
        newdata = newdata * 2

        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        co.columns['dummy'][15] = newdata
        co.columns['dummy']['15'] = newdata
        co.commit('commit on master with conflicting data')
        co.close()

        with pytest.raises(ValueError):
            repo.merge('merge commit', 'master', 'testbranch')

    def test_no_conflict_additions_same_name_and_value(self, repo_2_br_no_conf):
        newdata = np.arange(50)
        newdata[:] = 15

        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        co.columns['dummy']['15'] = newdata
        co.columns['dummy'][15] = newdata
        co.commit('commit on master with same value data')
        co.close()

        cmt_hash = repo.merge('merge commit', 'master', 'testbranch')
        co = repo.checkout(commit=cmt_hash)
        aset = co.columns['dummy']
        assert np.allclose(aset['15'], newdata)
        assert np.allclose(aset[15], newdata)
        co.close()

    def test_conflict_mutations_same_name_different_value(self, repo_2_br_no_conf):
        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        newdata = np.arange(50)
        co.columns['dummy']['0'] = newdata
        co.commit('commit on master with conflicting data')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        newdata = newdata * 2
        co.columns['dummy']['0'] = newdata
        co.commit('commit on testbranch with conflicting data')
        co.close()

        with pytest.raises(ValueError):
            repo.merge('merge commit', 'master', 'testbranch')

    def test_conflict_mutation_and_removal(self, repo_2_br_no_conf):
        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        newdata = np.arange(50)
        co.columns['dummy']['0'] = newdata
        co.commit('commit on master with conflicting data')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        del co.columns['dummy']['0']
        co.commit('commit on testbranch with removal')
        co.close()

        with pytest.raises(ValueError):
            repo.merge('merge commit', 'master', 'testbranch')

    def test_no_conflict_both_removal(self, repo_2_br_no_conf):
        repo = repo_2_br_no_conf
        co = repo.checkout(write=True, branch='master')
        del co.columns['dummy']['0']
        del co.columns['dummy'][21]
        co.commit('commit on master with removal')
        co.close()

        co = repo.checkout(write=True, branch='testbranch')
        del co.columns['dummy']['0']
        del co.columns['dummy'][10]
        co.commit('commit on testbranch with removal')
        co.close()

        cmt_hash = repo.merge('merge commit', 'master', 'testbranch')
        co = repo.checkout(commit=cmt_hash)
        aset = co.columns['dummy']
        assert '0' not in aset
        assert len(aset) == 47


================================================
FILE: tests/test_optimized_utils.py
================================================
import pytest

from hangar.optimized_utils import SizedDict


def test_sizeddict_maxsize_property():
    d = SizedDict(maxsize=5)
    assert d.maxsize == 5
    d2 = SizedDict(maxsize=10)
    assert d2.maxsize == 10


def test_sizeddict_setitem_no_overflow_retains_keys_and_len():
    d = SizedDict(maxsize=5)
    for i in range(5):
        d[i] = i

    assert len(d) == 5
    for i in range(5):
        assert i in d
        assert d[i] == i


def test_sizeddict_setitem_overflow_truncates_keys_and_len():
    d = SizedDict(maxsize=5)
    for i in range(10):
        d[i] = i

    assert len(d) == 5
    for i in range(0, 5):
        assert i not in d
        with pytest.raises(KeyError):
            _ = d[i]
    for i in range(5, 10):
        assert i in d
        assert d[i] == i


def test_sizeddict_update_no_overflow_retains_keys_and_len():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    assert len(d) == 5
    for i in range(5):
        assert i in d
        assert d[i] == i


def test_sizeddict_updateoverflow_truncates_keys_and_len():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(10)}
    d.update(inp)

    assert len(d) == 5
    for i in range(0, 5):
        assert i not in d
        with pytest.raises(KeyError):
            _ = d[i]
    for i in range(5, 10):
        assert i in d
        assert d[i] == i


def test_sizeddict_get_returns_default_on_missing_key():
    d = SizedDict()
    res = d.get('doesnotexist')
    assert res is None
    res = d.get('doesnotexist', default='foo')
    assert res == 'foo'


def test_sizeddict_delitem():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    del d[3]
    assert len(d) == 4
    assert 3 not in d

    d['new'] = 'foo'
    assert len(d) == 5
    assert 'new' in d


def test_sizeddict_pop():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    res = d.pop(0)
    assert res == 0
    assert len(d) == 4
    res = d.pop('doesnotexist', default='foo')
    assert res == 'foo'
    assert len(d) == 4


def test_sizeddict_popitem():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    res = d.popitem()
    assert res == (4, 4)
    assert len(d) == 4
    res = d.popitem()
    assert res == (3, 3)
    assert len(d) == 3

    d['foo'] = 'bar'
    assert len(d) == 4
    res = d.popitem()
    assert res == ('foo', 'bar')
    assert len(d) == 3


def test_sizeddict_keys():
    d = SizedDict(maxsize=5)
    inp = {str(i): i for i in range(5)}
    d.update(inp)

    assert list(d.keys()) == list(inp.keys())
    for res_k, expected_k in zip(d.keys(), inp.keys()):
        assert res_k == expected_k


def test_sizeddict_values():
    d = SizedDict(maxsize=5)
    inp = {str(i): i for i in range(5)}
    d.update(inp)

    assert list(d.values()) == list(inp.values())
    for res_v, expected_v in zip(d.values(), inp.values()):
        assert res_v == expected_v


def test_sizeddict_keys():
    d = SizedDict(maxsize=5)
    inp = {str(i): i for i in range(5)}
    d.update(inp)

    assert list(d.items()) == list(inp.items())
    for res_kv, expected_kv in zip(d.items(), inp.items()):
        assert res_kv == expected_kv


def test_sizeddict_setdefault():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    res = d.setdefault('doesnotexist')
    assert res is None
    assert len(d) == 5
    assert 'doesnotexist' in d
    assert d['doesnotexist'] is None

    res = d.setdefault('doesnotexist2', default=True)
    assert res is True
    assert len(d) == 5
    assert 'doesnotexist2' in d
    assert d['doesnotexist2'] is True

    res = d.setdefault(2, default=True)
    assert res == 2
    assert len(d) == 5
    assert 2 in d
    assert d[2] == 2


def test_sizeddict_clear():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    assert len(d) == 5
    d.clear()
    assert len(d) == 0
    assert len(d._stack) == 0
    assert len(d._data) == 0
    assert d._stack_size == 0
    assert d._maxsize == 5


def test_sizeddict_repr():
    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    expected = repr(inp)
    res = repr(d)
    assert res == expected


def test_sizeddict_is_pickleable():
    import pickle

    d = SizedDict(maxsize=5)
    inp = {i: i for i in range(5)}
    d.update(inp)

    pick = pickle.dumps(d, protocol=pickle.HIGHEST_PROTOCOL)
    res = pickle.loads(pick)

    assert res._maxsize == d._maxsize
    assert res._stack == d._stack
    assert res._stack_size == d._stack_size
    assert res._data == d._data


================================================
FILE: tests/test_remote_serialize.py
================================================
import pytest

import numpy as np


param_shapes = [(1,), (1000,), (1, 1), (623, 3, 5), (2, 4, 5, 6, 1, 3)]
param_dtypes = [np.uint8, np.float32, np.float64, np.int32]
param_digest = ['0=digesta', '0=digestaaaaaa', '2=digestaaaaaaaaaaaaaaaaaaaaaaaaaa']
param_schema = ['schemaa', 'schemaaaaaaaaa', 'schemaaaaaaaaaaaaaaaa']


def assert_array_equal(arr, arr2):
    assert np.array_equal(arr, arr2)
    assert arr.dtype == arr2.dtype


@pytest.fixture(scope='module', params=param_shapes)
def arr_shape(request):
    return request.param

@pytest.fixture(scope='module', params=param_dtypes)
def arr_dtype(request):
    return request.param

@pytest.fixture(scope='module', params=param_digest)
def ident_digest(request):
    return request.param

@pytest.fixture(scope='module', params=param_schema)
def ident_schema(request):
    return request.param


@pytest.fixture(scope='module')
def array_testcase(arr_shape, arr_dtype):
    arr = 200 * np.random.random_sample(arr_shape) - 100
    return arr.astype(arr_dtype)


@pytest.fixture(scope='module', params=[
    'hello', ' world', 'how are you today! ',
    '325', f'{"".join([str(i) for i in range(100)])}',
    'o\n\x01'
])
def str_testcase(request):
    return request.param


@pytest.fixture(scope='module', params=[
    b'hello', b' world', b'how are you today! ',
    b'325', b'\x01\x00\x12\x14'
])
def bytes_testcase(request):
    return request.param


@pytest.fixture(scope='module')
def ident_testcase(ident_digest, ident_schema):
    return (ident_digest, ident_schema)


def test_serialize_deserialize_array(array_testcase):
    from hangar.remote.chunks import _serialize_arr
    from hangar.remote.chunks import _deserialize_arr

    raw = _serialize_arr(array_testcase)
    res = _deserialize_arr(raw)
    assert_array_equal(array_testcase, res)


def test_serialize_deserialize_str(str_testcase):
    from hangar.remote.chunks import _serialize_str, _deserialize_str
    raw = _serialize_str(str_testcase)
    res = _deserialize_str(raw)
    assert res == str_testcase


def test_serialize_deserialize_bytes(bytes_testcase):
    from hangar.remote.chunks import _serialize_bytes, _deserialize_bytes
    raw = _serialize_bytes(bytes_testcase)
    res = _deserialize_bytes(raw)
    assert bytes_testcase == res


@pytest.mark.parametrize('expected_dtype_code,data', [
    (0, np.array([0, 1, 2, 3, 4])),
    (2, 'i am string'),
    (3, b'i am bytes')
])
def test_serialize_deserialize_data(expected_dtype_code, data):
    from hangar.remote.chunks import serialize_data, deserialize_data

    dtcode, raw = serialize_data(data)
    res = deserialize_data(dtype_code=dtcode, raw_data=raw)
    assert dtcode == expected_dtype_code
    if isinstance(res, np.ndarray):
        assert_array_equal(data, res)
    elif isinstance(res, (str, bytes)):
        assert data == res
    else:
        raise TypeError(data)


def test_serialize_deserialize_ident(ident_testcase):
    from hangar.remote.chunks import serialize_ident
    from hangar.remote.chunks import deserialize_ident
    from hangar.remote.chunks import DataIdent

    digest, schema = ident_testcase
    raw = serialize_ident(digest, schema)
    res = deserialize_ident(raw)
    assert isinstance(res, DataIdent)
    assert res.digest == digest
    assert res.schema == schema


def test_serialize_deserialize_record(array_testcase, ident_testcase):
    from hangar.remote.chunks import serialize_record
    from hangar.remote.chunks import deserialize_record
    from hangar.remote.chunks import DataRecord

    digest, schema = ident_testcase
    raw = serialize_record(array_testcase, digest, schema)
    res = deserialize_record(raw)
    assert isinstance(res, DataRecord)
    assert_array_equal(res.data, array_testcase)
    assert res.digest == digest
    assert res.schema == schema


@pytest.mark.parametrize('nrecords', [1, 25])
def test_serialize_deserialize_record_pack(ident_testcase, nrecords):
    from hangar.remote.chunks import serialize_record
    from hangar.remote.chunks import serialize_record_pack
    from hangar.remote.chunks import deserialize_record
    from hangar.remote.chunks import deserialize_record_pack
    from hangar.remote.chunks import DataRecord

    idx = 0
    ArrList, RecList = [], []
    digest, schema = ident_testcase
    for shape in param_shapes:
        for dtype in param_dtypes:
            arr = 200 * np.random.random_sample(shape) + 100
            arr = arr.astype(dtype)
            digest = f'digest{str(idx) + str(digest)}'
            schema = f'schema{str(idx) + str(schema)}'
            idx += 1

            ArrList.append((arr, digest, schema))
            RecList.append(serialize_record(arr, digest, schema))

    rawpack = serialize_record_pack(RecList)
    reslist = deserialize_record_pack(rawpack)

    assert reslist == RecList

    for rawres, origRec in zip(reslist, ArrList):
        resRec = deserialize_record(rawres)
        assert isinstance(resRec, DataRecord)
        assert_array_equal(resRec.data, origRec[0])
        assert resRec.digest == origRec[1]
        assert resRec.schema == origRec[2]


def test_serialize_deserialize_ident_digest_field_only(ident_testcase):
    from hangar.remote.chunks import serialize_ident
    from hangar.remote.chunks import deserialize_ident
    from hangar.remote.chunks import DataIdent

    digest, schema = ident_testcase
    raw = serialize_ident(digest, '')
    res = deserialize_ident(raw)
    assert isinstance(res, DataIdent)
    assert res.digest == digest
    assert res.schema == ''


def test_serialize_deserialize_ident_schema_field_only(ident_testcase):
    from hangar.remote.chunks import serialize_ident
    from hangar.remote.chunks import deserialize_ident
    from hangar.remote.chunks import DataIdent

    digest, schema = ident_testcase
    raw = serialize_ident('', schema)
    res = deserialize_ident(raw)
    assert isinstance(res, DataIdent)
    assert res.digest == ''
    assert res.schema == schema


@pytest.mark.parametrize('nrecords', [1, 25])
def test_serialize_deserialize_ident_only_record_pack(ident_testcase, nrecords):
    from hangar.remote.chunks import serialize_ident
    from hangar.remote.chunks import deserialize_ident
    from hangar.remote.chunks import serialize_record_pack
    from hangar.remote.chunks import deserialize_record_pack
    from hangar.remote.chunks import DataIdent

    idx = 0
    IdentList, RawList = [], []
    digest, schema = ident_testcase
    for idx in range(nrecords):
        digest = f'digest{str(idx) + str(digest)}'
        schema = f'schema{str(idx) + str(schema)}'

        IdentList.append((digest, schema))
        RawList.append(serialize_ident(digest, schema))

    packed_raw = serialize_record_pack(RawList)
    unpacked_raw = deserialize_record_pack(packed_raw)

    assert unpacked_raw == RawList

    for raw, origIdent in zip(unpacked_raw, IdentList):
        resIdent = deserialize_ident(raw)
        assert isinstance(resIdent, DataIdent)
        assert resIdent.digest == origIdent[0]
        assert resIdent.schema == origIdent[1]


@pytest.mark.parametrize('nrecords', [1, 25])
def test_serialize_deserialize_ident_only_digest_only_record_pack(ident_testcase, nrecords):
    from hangar.remote.chunks import serialize_ident
    from hangar.remote.chunks import deserialize_ident
    from hangar.remote.chunks import serialize_record_pack
    from hangar.remote.chunks import deserialize_record_pack
    from hangar.remote.chunks import DataIdent

    idx = 0
    IdentList, RawList = [], []
    digest, schema = ident_testcase
    for idx in range(nrecords):
        digest = f'digest{str(idx)+str(digest)}'
        schema = f''

        IdentList.append((digest, schema))
        RawList.append(serialize_ident(digest, schema))

    packed_raw = serialize_record_pack(RawList)
    unpacked_raw = deserialize_record_pack(packed_raw)

    assert unpacked_raw == RawList

    for raw, origIdent in zip(unpacked_raw, IdentList):
        resIdent = deserialize_ident(raw)
        assert isinstance(resIdent, DataIdent)
        assert resIdent.digest == origIdent[0]
        assert resIdent.schema == origIdent[1]


@pytest.mark.parametrize('nrecords', [1, 25])
def test_serialize_deserialize_ident_only_schema_only_record_pack(ident_testcase, nrecords):
    from hangar.remote.chunks import serialize_ident
    from hangar.remote.chunks import deserialize_ident
    from hangar.remote.chunks import serialize_record_pack
    from hangar.remote.chunks import deserialize_record_pack
    from hangar.remote.chunks import DataIdent

    idx = 0
    IdentList, RawList = [], []
    digest, schema = ident_testcase
    for idx in range(nrecords):
        digest = f''
        schema = f'schema{str(idx)+str(schema)}'

        IdentList.append((digest, schema))
        RawList.append(serialize_ident(digest, schema))

    packed_raw = serialize_record_pack(RawList)
    unpacked_raw = deserialize_record_pack(packed_raw)

    assert unpacked_raw == RawList

    for raw, origIdent in zip(unpacked_raw, IdentList):
        resIdent = deserialize_ident(raw)
        assert isinstance(resIdent, DataIdent)
        assert resIdent.digest == origIdent[0]
        assert resIdent.schema == origIdent[1]


================================================
FILE: tests/test_remotes.py
================================================
import pytest

import numpy as np
import time
from os.path import join as pjoin
from os import mkdir
from random import randint
import platform



@pytest.mark.parametrize('name', [
    'invalid\n', '\ninvalid', 'inv\name', 'inva/lid', 12, ' try', 'and this ',
    'VeryLongNameIsInvalidOver64CharactersNotAllowedVeryLongNameIsInva'])
def test_cannot_add_invalid_remote_names(repo, name):
    with pytest.raises(ValueError):
        repo.remote.add(name, 'localhost:50051')


def test_list_all_remotes_works(repo):

    remote_spec1 = repo.remote.add('origin', 'test')
    currentRemotes = repo.remote.list_all()

    assert len(currentRemotes) == 1
    currentSpec = currentRemotes[0]
    assert len(currentSpec) == 2
    assert currentSpec.name == 'origin'
    assert currentSpec.address == 'test'

    remote_spec2 = repo.remote.add('origin2', 'test2')
    currentRemotes = repo.remote.list_all()

    assert len(currentRemotes) == 2
    currentSpec = currentRemotes[0]
    assert currentSpec == remote_spec1
    assert len(currentSpec) == 2
    assert currentSpec.name == 'origin'
    assert currentSpec.address == 'test'
    currentSpec = currentRemotes[1]
    assert currentSpec == remote_spec2
    assert currentSpec.name == 'origin2'
    assert currentSpec.address == 'test2'


def test_cannot_add_remote_twice_with_same_name(repo):
    remote_spec = repo.remote.add('origin', 'test')
    assert remote_spec.name == 'origin'
    assert remote_spec.address == 'test'
    with pytest.raises(ValueError):
        repo.remote.add('origin', 'new')


def test_remote_remote_which_does_not_exist_fails(repo):
    with pytest.raises(ValueError):
        repo.remote.remove('origin')


def test_can_update_remote_after_removal(repo):
    remote_spec = repo.remote.add('origin', 'test')
    assert remote_spec.name == 'origin'
    assert remote_spec.address == 'test'
    channel_address_removed = repo.remote.remove('origin')
    assert channel_address_removed.name == 'origin'
    assert channel_address_removed.address == 'test'
    new_name = repo.remote.add('origin', 'test2')
    assert new_name.name == 'origin'
    assert new_name.address == 'test2'


def test_server_is_started_multiple_times_via_ping_pong(server_instance,
                                                        aset_samples_initialized_repo):
    # start multiple times and test that pings go through multiple times
    aset_samples_initialized_repo.remote.add('origin', server_instance)
    roundTripTime = aset_samples_initialized_repo.remote.ping('origin')
    assert isinstance(roundTripTime, float)


@pytest.mark.parametrize('nCommits,nSamples', [[1, 10], [5, 10]])
def test_push_and_clone_master_linear_history_multiple_commits(
        server_instance, repo, managed_tmpdir, array5by7, nCommits, nSamples):
    from hangar import Repository
    from hangar.records.summarize import list_history

    cmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32)
    for cIdx in range(nCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        sampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                sampList.append(arr)
        cmt = co.commit(f'commit number: {cIdx}')
        cmtList.append((cmt, sampList))
        co.close()
    masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master')

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'

    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in cmtList:
        with pytest.warns(UserWarning):
            nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'writtenaset' in nco.columns
        assert len(nco.columns['writtenaset']) == len(sampList)

        assert nco.columns['writtenaset'].contains_remote_references is True
        remoteKeys = nco.columns['writtenaset'].remote_reference_keys
        assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys
        for idx, _ in enumerate(sampList):
            sIdx = str(idx)
            assert sIdx in nco.columns['writtenaset']
            with pytest.raises(FileNotFoundError):
                shouldNotExist = nco.columns['writtenaset'][sIdx]
        nco.close()
    cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master')
    assert cloneMasterHist == masterHist
    newRepo._env._close_environments()


@pytest.mark.parametrize('nMasterCommits,nMasterSamples', [[1, 4], [5, 10]])
@pytest.mark.parametrize('nDevCommits,nDevSamples', [[1, 3], [3, 5]])
def test_server_push_second_branch_with_new_commit(server_instance, repo,
                                                   array5by7, nMasterCommits,
                                                   nMasterSamples, nDevCommits,
                                                   nDevSamples):

    masterCmtList, devCmtList = [], []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32)
    for cIdx in range(nMasterCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nMasterSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'

    branch = repo.create_branch('testbranch')
    for cIdx in range(nDevCommits):
        co = repo.checkout(write=True, branch=branch.name)
        devSampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nDevSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                devSampList.append(arr)
        cmt = co.commit(f'dev commit number: {cIdx}')
        devCmtList.append((cmt, devSampList))
        co.close()

    push2 = repo.remote.push('origin', branch.name)
    assert push2 == branch.name


@pytest.mark.parametrize('nMasterCommits,nMasterSamples', [[1, 4], [5, 10]])
@pytest.mark.parametrize('nDevCommits,nDevSamples', [[1, 5], [3, 5]])
def test_server_push_second_branch_with_new_commit_then_clone_partial_fetch(
        server_instance, repo, managed_tmpdir, array5by7, nMasterCommits,
        nMasterSamples, nDevCommits, nDevSamples):
    from hangar import Repository
    from hangar.records.summarize import list_history

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32)
    for cIdx in range(nMasterCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nMasterSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'
    masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master')

    # Push dev branch test
    devCmtList = []
    branch = repo.create_branch('testbranch')
    for cIdx in range(nDevCommits):
        co = repo.checkout(write=True, branch=branch.name)
        devSampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nDevSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                devSampList.append(arr)
        cmt = co.commit(f'dev commit number: {cIdx}')
        devCmtList.append((cmt, devSampList))
        co.close()

    push2 = repo.remote.push('origin', branch.name)
    assert push2 == branch.name
    branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name)

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in masterCmtList:
        with pytest.warns(UserWarning):
            nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'writtenaset' in nco.columns
        assert len(nco.columns['writtenaset']) == nMasterSamples

        assert nco.columns['writtenaset'].contains_remote_references is True
        remoteKeys = nco.columns['writtenaset'].remote_reference_keys
        assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys
        for idx, _ in enumerate(sampList):
            sIdx = str(idx)
            assert sIdx in nco.columns['writtenaset']
            with pytest.raises(FileNotFoundError):
                shouldNotExist = nco.columns['writtenaset'][sIdx]
        nco.close()
    cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master')
    assert cloneMasterHist == masterHist

    # Fetch test
    fetch = newRepo.remote.fetch('origin', branch=branch.name)
    assert fetch == f'origin/{branch.name}'
    assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}']
    for cmt, sampList in devCmtList:

        with pytest.warns(UserWarning):
            nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'writtenaset' in nco.columns
        assert len(nco.columns['writtenaset']) == nDevSamples

        assert nco.columns['writtenaset'].contains_remote_references is True
        remoteKeys = nco.columns['writtenaset'].remote_reference_keys
        assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys
        for idx, _ in enumerate(sampList):
            sIdx = str(idx)
            assert sIdx in nco.columns['writtenaset']
            with pytest.raises(FileNotFoundError):
                shouldNotExist = nco.columns['writtenaset'][sIdx]
        nco.close()

    cloneBranchHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name=f'origin/{branch.name}')
    assert cloneBranchHist == branchHist
    newRepo._env._close_environments()


@pytest.fixture(scope='class')
def array5by7_class():
    return np.random.random((5, 7))

@pytest.fixture(scope='class')
def two_branch_multi_commit_repo_class(server_instance_class, classrepo, array5by7_class):
    from hangar.records.summarize import list_history

    nMasterCommits = 2
    nMasterSamples = 10
    nDevCommits = 1
    nDevSamples = 16

    # Push master branch test
    masterCmts = {}
    co = classrepo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32)
    co.add_ndarray_column(name='_two', shape=(20), dtype=np.float32)
    co.add_str_column('str_col')
    co.add_bytes_column('bytes_col')
    for cIdx in range(nMasterCommits):
        if cIdx != 0:
            co = classrepo.checkout(write=True)
        masterSampList1 = []
        masterSampList2 = []
        masterSampList3 = []
        masterSampList4 = []
        with co.columns['writtenaset'] as d,\
                co.columns['_two'] as dd,\
                co.columns['str_col'] as scol, \
                co.columns['bytes_col'] as bcol:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
                del dd[prevKey]
                del scol[prevKey]
                del bcol[prevKey]

            for sIdx in range(nMasterSamples):
                arr1 = np.random.randn(*array5by7_class.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr1
                masterSampList1.append(arr1)
                arr2 = np.random.randn(20).astype(np.float32)
                dd[str(sIdx)] = arr2
                masterSampList2.append(arr2)
                sval = f'strval master {cIdx} {sIdx}'
                scol[str(sIdx)] = sval
                masterSampList3.append(sval)
                bval = f'bytesval master {cIdx} {sIdx}'.encode()
                bcol[str(sIdx)] = bval
                masterSampList4.append(bval)

        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmts[cmt] = (masterSampList1, masterSampList2, masterSampList3, masterSampList4)
        co.close()

    classrepo.remote.add('origin', server_instance_class)
    push1 = classrepo.remote.push('origin', 'master')
    assert push1 == 'master'
    masterHist = list_history(classrepo._env.refenv, classrepo._env.branchenv, branch_name='master')

    # Push dev branch test
    devCmts = masterCmts.copy()
    branch = classrepo.create_branch('testbranch')
    for cIdx in range(nDevCommits):
        co = classrepo.checkout(write=True, branch=branch.name)
        devSampList1 = []
        devSampList2 = []
        devSampList3 = []
        devSampList4 = []
        with co.columns['writtenaset'] as d,\
                co.columns['_two'] as dd,\
                co.columns['str_col'] as scol, \
                co.columns['bytes_col'] as bcol:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
                del dd[prevKey]
                del scol[prevKey]
                del bcol[prevKey]

            for sIdx in range(nDevSamples):
                arr1 = np.random.randn(*array5by7_class.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr1
                devSampList1.append(arr1)
                arr2 = np.random.randn(20).astype(np.float32)
                dd[str(sIdx)] = arr2
                devSampList2.append(arr2)
                sval = f'strval dev {cIdx} {sIdx}'
                scol[str(sIdx)] = sval
                devSampList3.append(sval)
                bval = f'bytesval dev {cIdx} {sIdx}'.encode()
                bcol[str(sIdx)] = bval
                devSampList4.append(bval)

        cmt = co.commit(f'dev commit number: {cIdx}')
        devCmts[cmt] = (devSampList1, devSampList2, devSampList3, devSampList4)
        co.close()

    push2 = classrepo.remote.push('origin', branch.name)
    assert push2 == branch.name
    branchHist = list_history(classrepo._env.refenv, classrepo._env.branchenv, branch_name=branch.name)

    yield branch, branchHist, devCmts, masterHist, server_instance_class
    pass


class TestLargeRemoteServer:

    @pytest.mark.filterwarnings('ignore::UserWarning')
    @pytest.mark.parametrize('fetchAsetns', [
        None, ('writtenaset',), ('_two',), ('str_col',), ('bytes_col',),
    ])
    @pytest.mark.parametrize('fetchBranch', [None, 'testbranch'])
    @pytest.mark.parametrize('fetchCommit', [None, 'ma'])
    @pytest.mark.parametrize('fetchAll_history', [False, True])
    def test_server_push_two_branch_then_clone_fetch_data_options(
            self, two_branch_multi_commit_repo_class, managed_tmpdir_class, array5by7_class,
            fetchBranch, fetchCommit, fetchAsetns, fetchAll_history, tmp_path_factory):
        from hangar import Repository
        from operator import eq

        branch, branchHist, devCmts, masterHist, server_instance = two_branch_multi_commit_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)
        newRepo.remote.fetch('origin', branch=branch.name)
        newRepo.create_branch('testbranch', base_commit=branchHist['head'])
        assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}', branch.name]

        # ------------------ format arguments depending on options -----------------

        kwargs = {
            'column_names': fetchAsetns,
            'retrieve_all_history': fetchAll_history,
        }
        if fetchBranch is not None:
            func = branchHist if fetchBranch == 'testbranch' else masterHist
            kwargs['branch'] = fetchBranch
            kwargs['commit'] = None
        else:
            func = branchHist if fetchBranch == 'br' else masterHist
            kwargs['branch'] = None
            kwargs['commit'] = func['head']

        if fetchAll_history is True:
            commits_to_check = func['order']
        else:
            commits_to_check = [func['head']]

        # ----------------------- retrieve data with desired options --------------

        # get data
        fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
        assert commits_to_check == fetch_commits

        # ------------- check that you got everything you expected ----------------

        for fCmt in fetch_commits:
            co = newRepo.checkout(commit=fCmt)
            assert co.commit_hash == fCmt

            # when we are checking one aset only
            if isinstance(fetchAsetns, tuple):
                d = co.columns[fetchAsetns[0]]
                # ensure we didn't fetch the other data simultaneously
                ds1SampList, ds2SampList, ds3SampList, ds4SampList = devCmts[fCmt]
                if fetchAsetns[0] == 'writtenaset':
                    compare = ds1SampList
                    cmp_func = np.allclose
                elif fetchAsetns[0] == '_two':
                    compare = ds2SampList
                    cmp_func = np.allclose
                elif fetchAsetns[0] == 'str_col':
                    compare = ds3SampList
                    cmp_func = eq
                else:
                    compare = ds4SampList
                    cmp_func = eq

                for idx, samp in enumerate(compare):
                    assert cmp_func(samp, d[str(idx)])

            # compare both asets at the same time
            else:
                d = co.columns['writtenaset']
                dd = co.columns['_two']
                str_col = co.columns['str_col']
                bytes_col = co.columns['bytes_col']
                ds1List, ds2List, ds3List, ds4List = devCmts[fCmt]
                for idx, ds1ds2ds3ds4 in enumerate(zip(ds1List, ds2List, ds3List, ds4List)):
                    ds1, ds2, ds3, ds4 = ds1ds2ds3ds4
                    assert np.allclose(ds1, d[str(idx)])
                    assert np.allclose(ds2, dd[str(idx)])
                    assert ds3 == str_col[str(idx)]
                    assert ds4 == bytes_col[str(idx)]
            co.close()
        newRepo._env._close_environments()


@pytest.fixture(scope='class')
def two_multi_format_repo_class(server_instance_class, classrepo):

    co = classrepo.checkout(write=True)
    array_flat = co.add_ndarray_column(name='array_flat', shape=(5, 7), dtype=np.float32)
    array_nested = co.add_ndarray_column(name='array_nested', shape=(20,), dtype=np.float32, contains_subsamples=True)
    str_flat = co.add_str_column('str_flat')
    str_nested = co.add_str_column('str_nested', contains_subsamples=True)
    bytes_flat = co.add_bytes_column('bytes_flat')
    bytes_nested = co.add_bytes_column('bytes_nested', contains_subsamples=True)

    for i in range(5):
        arr = np.random.randn(5, 7).astype(np.float32)
        array_flat[i] = arr
    for i in range(5):
        data = {f'{idx}': np.random.randn(20).astype(np.float32) for idx in range(4)}
        array_nested[i] = data

    for i in range(5):
        str_flat[i] = f'string_{i}' * (i + 1)
    for i in range(5):
        data = {f'{idx}': f'string_{idx}' * (idx + 1) for idx in range(4)}
        str_nested[i] = data

    for i in range(5):
        bytes_flat[i] = f'bytes_{i}'.encode() * (i + 1)
    for i in range(5):
        data = {f'{idx}': f'bytes_{idx}'.encode() * (idx + 1) for idx in range(4)}
        bytes_nested[i] = data

    cmt = co.commit('first commit')
    co.close()
    classrepo.remote.add('origin', server_instance_class)
    classrepo.remote.push('origin', 'master')

    yield cmt, server_instance_class
    pass


class TestRemoteServerFetchDataSample:

    @pytest.mark.filterwarnings('ignore::UserWarning')
    @pytest.mark.parametrize('fetchOp', ['branch', 'commit'])
    @pytest.mark.parametrize('column_name,keys', [
        ('array_flat', 0),
        ('array_flat', [0, 1]),
        ('array_nested', 0),
        ('array_nested', [0, 1]),
        ('array_nested', [(0, '0')]),
        ('array_nested', [(0, '0'), (1, '1')]),
        ('array_nested', [0, (1, '1')]),
        ('array_nested', [(0,)]),
        ('array_nested', [(0, ...)]),
        ('array_nested', [(0, ...), 1, (2, '2')]),
        ('str_flat', 0),
        ('str_flat', [0, 1]),
        ('str_nested', 0),
        ('str_nested', [0, 1]),
        ('str_nested', [(0, '0')]),
        ('str_nested', [(0, '0'), (1, '1')]),
        ('str_nested', [0, (1, '1')]),
        ('str_nested', [(0,)]),
        ('str_nested', [(0, ...)]),
        ('str_nested', [(0, ...), 1, (2, '2')]),
        ('bytes_flat', 0),
        ('bytes_flat', [0, 1]),
        ('bytes_nested', 0),
        ('bytes_nested', [0, 1]),
        ('bytes_nested', [(0, '0')]),
        ('bytes_nested', [(0, '0'), (1, '1')]),
        ('bytes_nested', [0, (1, '1')]),
        ('bytes_nested', [(0,)]),
        ('bytes_nested', [(0, ...)]),
        ('bytes_nested', [(0, ...), 1, (2, '2')]),
    ])
    def test_server_fetch_data_sample(
            self, two_multi_format_repo_class, managed_tmpdir_class,
            fetchOp, column_name, keys, tmp_path_factory
    ):
        from hangar import Repository

        cmt, server_instance = two_multi_format_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

        # ------------------ format arguments depending on options -----------------

        kwargs = {
            'column': column_name,
            'samples': keys
        }
        if fetchOp == 'branch':
            kwargs['branch'] = 'master'
        elif fetchOp == 'commit':
            kwargs['commit'] = cmt
        else:
            raise ValueError(f'fetchOp unknown: {fetchOp}')

        fetch_commit = newRepo.remote.fetch_data_sample(remote='origin', **kwargs)
        assert fetch_commit == cmt

        co = newRepo.checkout()
        try:
            col = co[column_name]
            if isinstance(keys, (list, tuple)):
                if column_name.endswith('flat'):
                    for key in keys:
                        assert col[key] is not None
                else:
                    for sample in keys:
                        if isinstance(sample, (list, tuple)):
                            if len(sample) == 2:
                                assert col[sample[0]][sample[1]] is not None
                            elif len(sample) == 1:
                                assert col[sample[0]][...] is not None
                        else:
                            assert col[sample][...] is not None
        finally:
            co.close()
            newRepo._env._close_environments()

    def test_server_fetch_data_sample_commit_not_existing(
            self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory
    ):
        from hangar import Repository

        cmt, server_instance = two_multi_format_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

        with pytest.raises(ValueError, match='specified commit'):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                commit='DOESNOTEXISTCOMMIT',
                column='array_flat',
                samples=[0, 1])

        newRepo._env._close_environments()

    def test_server_fetch_data_sample_branch_not_existing(
            self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory
    ):
        from hangar import Repository

        cmt, server_instance = two_multi_format_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

        with pytest.raises(ValueError, match='branch with name'):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='DOESNOTEXISTBRANCH',
                column='array_flat',
                samples=[0, 1])

        newRepo._env._close_environments()

    def test_server_fetch_data_sample_branch_and_commit_args_passed_fails(
            self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory
    ):
        from hangar import Repository

        cmt, server_instance = two_multi_format_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

        with pytest.raises(ValueError, match='``branch`` and ``commit``'):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='master',  # actual value which might otherwise work
                commit=cmt,  # actual value which might otherwise work
                column='array_flat',
                samples=[0, 1])

        newRepo._env._close_environments()

    def test_server_fetch_data_sample_not_existing_fails(
            self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory
    ):
        from hangar import Repository

        cmt, server_instance = two_multi_format_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

        with pytest.raises(KeyError):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='master',
                column='array_flat',
                samples=['DOESNOTEXIST'])

        with pytest.raises(KeyError):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='master',
                column='array_nested',
                samples=[(1, 'DOESNOTEXIST')])

        with pytest.raises(KeyError):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='master',
                column='array_nested',
                samples=[('DOESNOTEXIST', 0)])

        newRepo._env._close_environments()

    def test_server_fetch_data_sample_not_valid_type(
            self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory
    ):
        from hangar import Repository

        cmt, server_instance = two_multi_format_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

        with pytest.raises(TypeError):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='master',
                column='array_flat',
                samples=[b'BYTES_TYPE_NOT_VALID'])

        with pytest.raises(ValueError, match='nested column specifier sequence'):
            newRepo.remote.fetch_data_sample(
                remote='origin',
                branch='master',
                column='array_nested',
                samples=[(0, 1, 'ARRAY_NOT_VALID')])

        newRepo._env._close_environments()


def test_push_unchanged_repo_makes_no_modifications(written_two_cmt_server_repo):
    _, repo = written_two_cmt_server_repo
    with pytest.warns(UserWarning):
        branchName = repo.remote.push('origin', 'master')
    assert branchName == 'master'


def test_fetch_unchanged_repo_makes_no_modifications(written_two_cmt_server_repo):
    _, repo = written_two_cmt_server_repo
    with pytest.warns(UserWarning):
        branchName = repo.remote.fetch('origin', 'master')
    assert branchName == 'master'


def test_fetch_newer_disk_repo_makes_no_modifications(written_two_cmt_server_repo):
    _, repo = written_two_cmt_server_repo
    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co['test_meta'][0] = 'lol'
    co.commit('newer commit')
    co.close()
    with pytest.warns(UserWarning):
        branchName = repo.remote.fetch('origin', 'master')
    assert branchName == 'master'


def test_fetch_branch_which_does_not_exist_client_server_raises_rpc_error(written_two_cmt_server_repo):
    import grpc
    _, repo = written_two_cmt_server_repo
    with pytest.raises(grpc.RpcError) as rpc_error:
        repo.remote.fetch('origin', 'not-a-branch')
    assert rpc_error.value._state.code == grpc.StatusCode.NOT_FOUND


def test_fetch_branch_on_client_which_does_not_existserver_raises_rpc_error(written_two_cmt_server_repo):
    import grpc
    _, repo = written_two_cmt_server_repo
    repo.create_branch('new-branch')
    with pytest.raises(grpc.RpcError) as exc_info:
        repo.remote.fetch('origin', 'new-branch')
    assert exc_info.value._state.code == grpc.StatusCode.NOT_FOUND


def test_push_clone_three_way_merge(server_instance, repo_2_br_no_conf, managed_tmpdir):
    from hangar import Repository

    repo_2_br_no_conf.remote.add('origin', server_instance)
    push1 = repo_2_br_no_conf.remote.push('origin', 'master')
    assert push1 == 'master'
    push2 = repo_2_br_no_conf.remote.push('origin', 'testbranch')
    assert push2 == 'testbranch'

    test_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head']
    master_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']

    merge_cmt = repo_2_br_no_conf.merge('merge commit', 'master', 'testbranch')
    merge_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head']
    merge_order = repo_2_br_no_conf.log(branch='master', return_contents=True)['order']
    merge_push = repo_2_br_no_conf.remote.push('origin', 'master')
    assert merge_push == 'master'
    assert merge_head != master_head
    assert merge_head != test_head

    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User', 'tester@foo.com', server_instance, remove_old=True)

    clone_head = newRepo.log(branch='master', return_contents=True)['head']
    clone_order = newRepo.log(branch='master', return_contents=True)['order']
    assert clone_head == merge_head == merge_cmt
    assert merge_order == clone_order
    newRepo._env._close_environments()


# -----------------------------------------------------------------------------


def test_push_restricted_with_right_username_password(server_instance_push_restricted, repo, managed_tmpdir):
    from hangar import Repository

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='aset', shape=(50, 20), dtype=np.float32)
    for cIdx in range(1):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['aset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(70):
                arr = np.random.randn(50, 20).astype(np.float32)
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance_push_restricted)
    push1 = repo.remote.push('origin',
                             'master',
                             username='right_username',
                             password='right_password')
    assert push1 == 'master'

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User', 'tester@foo.com', server_instance_push_restricted, remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in masterCmtList:
        newRepo.remote.fetch_data('origin', commit=cmt)
        nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'aset' in nco.columns
        assert len(nco.columns['aset']) == 70
        for sIdx, samp in enumerate(sampList):
            assert np.allclose(nco.columns['aset'][str(sIdx)], samp)
        nco.close()
    newRepo._env._close_environments()


def test_push_restricted_wrong_user_and_password(server_instance_push_restricted, repo, managed_tmpdir):

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='aset', shape=(50, 20), dtype=np.float32)
    for cIdx in range(1):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['aset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(70):
                arr = np.random.randn(50, 20).astype(np.float32)
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance_push_restricted)
    with pytest.raises(PermissionError):
        push1 = repo.remote.push('origin',
                                 'master',
                                 username='wrong_username',
                                 password='right_password')

    with pytest.raises(PermissionError):
        push2 = repo.remote.push('origin',
                                 'master',
                                 username='right_username',
                                 password='wrong_password')

    with pytest.raises(PermissionError):
        push3 = repo.remote.push('origin',
                                 'master',
                                 username='wrong_username',
                                 password='wrong_password')


================================================
FILE: tests/test_repo_integrity_verification.py
================================================
import pytest

import numpy as np


@pytest.fixture()
def diverse_repo(repo):
    co = repo.checkout(write=True)
    co.add_ndarray_column('test', prototype=np.arange(10))
    co.add_str_column('test_meta')
    co.add_bytes_column('test_bytes')
    co.columns['test'][0] = np.arange(10)
    co.columns['test'][1] = np.arange(10) + 1
    co.columns['test'][2] = np.arange(10) + 2
    co.columns['test'][3] = np.arange(10) + 3
    co.columns['test'][4] = np.arange(10) + 4
    co['test_meta']['hi'] = 'foo'
    co['test_meta']['aea'] = 'eeae'
    co['test_bytes']['lol'] = b'foo bytes'
    co.commit('hello world')

    sample_trimg = np.arange(50).reshape(5, 10).astype(np.uint8)
    sample_trlabel = np.array([0], dtype=np.int64)
    sample_vimg = np.zeros(50).reshape(5, 10).astype(np.uint16)
    sample_vlabel = np.array([1], dtype=np.int32)

    co.close()
    repo.create_branch('dev')
    co = repo.checkout(write=True, branch='dev')
    dset_trlabels = co.add_ndarray_column(name='train_labels', prototype=sample_trlabel)
    dset_trimgs = co.add_ndarray_column('train_images', prototype=sample_trimg, backend='01')
    dset_trlabels[0] = sample_trlabel
    dset_trlabels[1] = sample_trlabel + 1
    dset_trlabels[2] = sample_trlabel + 2
    dset_trimgs[0] = sample_trimg
    dset_trimgs[1] = sample_trimg + 1
    dset_trimgs[2] = sample_trimg + 2
    co.commit('second on dev')
    co.close()

    co = repo.checkout(write=True, branch='master')
    dset_vimgs = co.add_ndarray_column('valid_images', prototype=sample_vimg)
    dset_vlabels = co.add_ndarray_column('valid_labels', prototype=sample_vlabel)
    dset_vlabels[0] = sample_vlabel
    dset_vlabels[1] = sample_vlabel + 1
    dset_vlabels[2] = sample_vlabel + 2
    dset_vimgs[0] = sample_vimg
    dset_vimgs[1] = sample_vimg + 1
    dset_vimgs[2] = sample_vimg + 2
    co['test_meta']['second'] = 'on master now'
    co['test_bytes']['second'] = b'on master now'
    co.commit('second on master')
    co.close()

    base = repo.merge('merge commit', 'master', 'dev')
    repo.create_branch('newbranch', base_commit=base)
    co = repo.checkout(write=True, branch='master')
    co['test_meta']['newmeta'] = 'wow'
    co['test_bytes']['newbytes'] = b'new bytesdata'
    co.commit('on master after merge')
    co.close()

    co = repo.checkout(write=True, branch='newbranch')
    ds_trimgs = co.columns['train_images']
    ds_trlabels = co.columns['train_labels']
    ds_trlabels[3] = sample_trlabel + 3
    ds_trlabels[4] = sample_trlabel + 4
    ds_trlabels[5] = sample_trlabel + 5
    ds_trimgs[3] = sample_trimg + 3
    ds_trimgs[4] = sample_trimg + 4
    ds_trimgs[5] = sample_trimg + 5
    co.commit('on newdev after merge')
    co.close()

    base = repo.merge('new merge commit', 'master', 'newbranch')
    return repo


def test_verify_correct(diverse_repo):
    assert diverse_repo.verify_repo_integrity() is True


class TestVerifyCommitRefDigests(object):

    def test_remove_array_digest_is_caught(self, diverse_repo):
        from hangar.records import hashs
        from hangar.diagnostics.integrity import _verify_commit_ref_digests_exist

        hq = hashs.HashQuery(diverse_repo._env.hashenv)
        keys_to_remove = list(hq.gen_all_hash_keys_db())

        for key_removed in keys_to_remove:
            with diverse_repo._env.hashenv.begin(write=True) as txn:
                val_removed = txn.get(key_removed)
                txn.delete(key_removed)

            with pytest.raises(RuntimeError):
                _verify_commit_ref_digests_exist(hashenv=diverse_repo._env.hashenv,
                                                 refenv=diverse_repo._env.refenv)

            with diverse_repo._env.hashenv.begin(write=True) as txn:
                txn.put(key_removed, val_removed)

    def test_remove_schema_digest_is_caught(self, diverse_repo):
        from hangar.records import hashs
        from hangar.diagnostics.integrity import _verify_commit_ref_digests_exist

        hq = hashs.HashQuery(diverse_repo._env.hashenv)
        keys_to_remove = list(hq.gen_all_schema_keys_db())
        for key_removed in keys_to_remove:
            with diverse_repo._env.hashenv.begin(write=True) as txn:
                val_removed = txn.get(key_removed)
                txn.delete(key_removed)

            with pytest.raises(RuntimeError):
                _verify_commit_ref_digests_exist(hashenv=diverse_repo._env.hashenv,
                                                 refenv=diverse_repo._env.refenv)

            with diverse_repo._env.hashenv.begin(write=True) as txn:
                txn.put(key_removed, val_removed)


class TestVerifyCommitTree(object):

    def test_parent_ref_digest_of_cmt_does_not_exist(self, diverse_repo):
        from hangar.diagnostics.integrity import _verify_commit_tree_integrity
        from hangar.records.parsing import commit_parent_db_key_from_raw_key

        repo = diverse_repo
        history = repo.log(return_contents=True)
        all_commits = history['order']
        for cmt in all_commits:
            parentKey = commit_parent_db_key_from_raw_key(cmt)
            with repo._env.refenv.begin(write=True) as txn:
                parentVal = txn.get(parentKey)
                txn.delete(parentKey)

            with pytest.raises(RuntimeError, match='Data corruption detected for parent ref'):
                _verify_commit_tree_integrity(repo._env.refenv)

            with repo._env.refenv.begin(write=True) as txn:
                txn.put(parentKey, parentVal)

    def test_parent_ref_references_nonexisting_commits(self, diverse_repo):
        from hangar.diagnostics.integrity import _verify_commit_tree_integrity
        from hangar.records.parsing import commit_parent_db_key_from_raw_key
        from hangar.records.parsing import commit_parent_raw_val_from_db_val
        from hangar.records.parsing import commit_parent_db_val_from_raw_val

        repo = diverse_repo
        history = repo.log(return_contents=True)
        all_commits = history['order']
        for cmt in all_commits:
            parentKey = commit_parent_db_key_from_raw_key(cmt)
            with repo._env.refenv.begin(write=True) as txn:
                parentVal = txn.get(parentKey)
                parent_raw = commit_parent_raw_val_from_db_val(parentVal)
                parent = parent_raw.ancestor_spec

                if parent.dev_ancestor:
                    modifiedVal = commit_parent_db_val_from_raw_val(
                        master_ancestor=parent.master_ancestor,
                        dev_ancestor='corrupt',
                        is_merge_commit=parent.is_merge_commit)
                elif parent.master_ancestor:
                    modifiedVal = commit_parent_db_val_from_raw_val(
                        master_ancestor='corrupt',
                        dev_ancestor=parent.dev_ancestor,
                        is_merge_commit=parent.is_merge_commit)
                else:
                    continue

                txn.put(parentKey, modifiedVal.raw, overwrite=True)

            with pytest.raises(RuntimeError, match='Data corruption detected in commit tree'):
                _verify_commit_tree_integrity(repo._env.refenv)

            with repo._env.refenv.begin(write=True) as txn:
                txn.put(parentKey, parentVal)

    def test_parent_ref_has_two_initial_commits(self, diverse_repo):
        from hangar.diagnostics.integrity import _verify_commit_tree_integrity
        from hangar.records.parsing import commit_parent_db_key_from_raw_key

        repo = diverse_repo
        repo = diverse_repo
        history = repo.log(return_contents=True)
        all_commits = history['order']
        initial_commit = all_commits[-1]
        for cmt in all_commits:
            if cmt == initial_commit:
                continue

            parentKey = commit_parent_db_key_from_raw_key(cmt)
            with repo._env.refenv.begin(write=True) as txn:
                parentVal = txn.get(parentKey)
                txn.put(parentKey, b'', overwrite=True)

            with pytest.raises(RuntimeError, match='Commit tree integrity compromised. Multiple "initial"'):
                _verify_commit_tree_integrity(repo._env.refenv)

            with repo._env.refenv.begin(write=True) as txn:
                txn.put(parentKey, parentVal, overwrite=True)


class TestBranchIntegrity(object):

    def test_atleast_one_branch_exists(self, diverse_repo):
        from hangar.records.heads import get_branch_names
        from hangar.records.parsing import repo_branch_head_db_key_from_raw_key
        from hangar.diagnostics.integrity import _verify_branch_integrity

        branch_names = get_branch_names(diverse_repo._env.branchenv)
        with diverse_repo._env.branchenv.begin(write=True) as txn:
            for bname in branch_names:
                branchKey = repo_branch_head_db_key_from_raw_key(bname)
                txn.delete(branchKey)

        with pytest.raises(
                RuntimeError,
                match='Branch map compromised. Repo must contain atleast one branch'
        ):
            _verify_branch_integrity(diverse_repo._env.branchenv, diverse_repo._env.refenv)

    def test_branch_name_head_commit_digests_exist(self, diverse_repo):
        from hangar.records.heads import get_branch_names, get_branch_head_commit
        from hangar.records.parsing import commit_ref_db_key_from_raw_key
        from hangar.records.parsing import commit_parent_db_key_from_raw_key
        from hangar.records.parsing import commit_spec_db_key_from_raw_key
        from hangar.diagnostics.integrity import _verify_branch_integrity

        branch_names = get_branch_names(diverse_repo._env.branchenv)
        for bname in branch_names:
            bhead = get_branch_head_commit(diverse_repo._env.branchenv, branch_name=bname)
            with diverse_repo._env.refenv.begin(write=True) as txn:
                cmtRefKey = commit_ref_db_key_from_raw_key(bhead)
                cmtSpecKey = commit_spec_db_key_from_raw_key(bhead)
                cmtParentKey = commit_parent_db_key_from_raw_key(bhead)

                cmtRefVal = txn.get(cmtRefKey)
                cmtSpecVal = txn.get(cmtSpecKey)
                cmtParentVal = txn.get(cmtParentKey)

                txn.delete(cmtRefKey)
                txn.delete(cmtSpecKey)
                txn.delete(cmtParentKey)

            with pytest.raises(RuntimeError, match='Branch commit map compromised. Branch name'):
                _verify_branch_integrity(diverse_repo._env.branchenv, diverse_repo._env.refenv)

            with diverse_repo._env.refenv.begin(write=True) as txn:
                txn.put(cmtRefKey, cmtRefVal)
                txn.put(cmtSpecKey, cmtSpecVal)
                txn.put(cmtParentKey, cmtParentVal)

    def test_staging_head_branch_name_exists(self, diverse_repo):
        from hangar.records.heads import get_staging_branch_head
        from hangar.records.parsing import repo_branch_head_db_key_from_raw_key
        from hangar.diagnostics.integrity import _verify_branch_integrity

        bname = get_staging_branch_head(diverse_repo._env.branchenv)
        with diverse_repo._env.branchenv.begin(write=True) as txn:
            branchKey = repo_branch_head_db_key_from_raw_key(bname)
            txn.delete(branchKey)

        with pytest.raises(
                RuntimeError,
                match='Brach commit map compromised. Staging head refers to branch name'
        ):
            _verify_branch_integrity(diverse_repo._env.branchenv, diverse_repo._env.refenv)


def test_data_digest_modification_is_caught(diverse_repo):
    from hangar.records import hashs
    from hangar.diagnostics.integrity import _verify_column_integrity

    hq = hashs.HashQuery(diverse_repo._env.hashenv)
    keys_to_replace = list(hq.gen_all_hash_keys_db())
    replacer_key = keys_to_replace.pop()
    for kreplaced in keys_to_replace:
        with diverse_repo._env.hashenv.begin(write=True) as txn:
            replacer_val = txn.get(replacer_key)
            vreplaced = txn.get(kreplaced)
            txn.put(kreplaced, replacer_val)

        with pytest.raises(RuntimeError):
            _verify_column_integrity(hashenv=diverse_repo._env.hashenv,
                                     repo_path=diverse_repo._env.repo_path)

        with diverse_repo._env.hashenv.begin(write=True) as txn:
            txn.put(kreplaced, vreplaced)


def test_data_digest_remote_location_warns(diverse_repo):
    from hangar.records import hashs
    from hangar.diagnostics.integrity import _verify_column_integrity

    hq = hashs.HashQuery(diverse_repo._env.hashenv)
    replace_key = list(hq.gen_all_hash_keys_db())[0]
    with diverse_repo._env.hashenv.begin(write=True) as txn:
        txn.put(replace_key, b'50:ekaearar')

    with pytest.warns(RuntimeWarning, match='Can not verify integrity of partially fetched array'):
        _verify_column_integrity(hashenv=diverse_repo._env.hashenv,
                                 repo_path=diverse_repo._env.repo_path)


def test_schema_digest_modification_is_caught(diverse_repo):
    from hangar.records import hashs
    from hangar.diagnostics.integrity import _verify_schema_integrity

    hq = hashs.HashQuery(diverse_repo._env.hashenv)
    keys_to_replace = list(hq.gen_all_schema_keys_db())
    replacer_key = keys_to_replace.pop()
    for kreplaced in keys_to_replace:
        with diverse_repo._env.hashenv.begin(write=True) as txn:
            replacer_val = txn.get(replacer_key)
            vreplaced = txn.get(kreplaced)
            txn.put(kreplaced, replacer_val)

        with pytest.raises(RuntimeError, match='Data corruption detected for schema. Expected digest'):
            _verify_schema_integrity(hashenv=diverse_repo._env.hashenv)

        with diverse_repo._env.hashenv.begin(write=True) as txn:
            txn.put(kreplaced, vreplaced)


================================================
FILE: tests/test_utils.py
================================================
import pytest


@pytest.mark.parametrize('arg,key,expected', [
    ['AAABBBCCC', None, ['A', 'B', 'C']],
    ['AAABbBCcC', str.lower, ['A', 'B', 'C']],
    ['ABACBACDA', None, ['A', 'B', 'C', 'D']],
    ['ABacBaCAd', str.upper, ['A', 'B', 'c', 'd']],
])
def test_unique_everseen(arg, key, expected):
    from hangar.utils import unique_everseen

    res = list(unique_everseen(arg, key=key))
    assert res == expected


@pytest.mark.parametrize('pth', [pytest.File, None, 123])
def test_valid_directory_path_errors_on_invalid_path_arg(pth):
    from hangar.utils import is_valid_directory_path
    with pytest.raises(TypeError, match='Path arg `p`'):
        is_valid_directory_path(pth)


def test_valid_directory_path_recognizes_not_a_directory(managed_tmpdir):
    from hangar.utils import is_valid_directory_path
    from pathlib import Path

    test_pth = Path(managed_tmpdir, 'test.txt').resolve()
    with test_pth.open('w+') as f:
        f.write('hello')
    with pytest.raises(NotADirectoryError):
        is_valid_directory_path(test_pth)


@pytest.mark.parametrize('arg,expected', [
    [1, '1.00 B'],
    [1234, '1.23 kB'],
    [12345678, '12.35 MB'],
    [1234567890, '1.23 GB'],
    [1234567890000, '1.23 TB'],
    [1234567890000000, '1.23 PB']
])
def test_format_bytes(arg, expected):
    from hangar.utils import format_bytes

    res = format_bytes(arg)
    assert res == expected


@pytest.mark.parametrize('arg,expected', [
    ['100', 100],
    ['100 MB', 100000000],
    ['100M', 100000000],
    ['5kB', 5000],
    ['5.4 kB', 5400],
    ['1kiB', 1024],
    ['1e6', 1000000],
    ['1e6 kB', 1000000000],
    ['MB', 1000000]
])
def test_parse_bytes(arg, expected):
    from hangar.utils import parse_bytes

    res = parse_bytes(arg)
    assert res == expected


@pytest.mark.parametrize('arg,expected', [
    [0, 2],
    [1, 2],
    [2, 2],
    [3, 3],
    [4, 5],
    [7, 7],
    [174, 179],
    [10065, 10067],
    [104721, 104723],
])
def test_find_next_prime(arg, expected):
    from hangar.optimized_utils import find_next_prime

    res = find_next_prime(arg)
    assert res == expected


================================================
FILE: tests/test_version.py
================================================
# -*- coding: utf-8 -*-
"""
Portions of this code have been taken and modified from the "packaging" project.

URL:      https://github.com/pypa/packaging
Files:    tests/test_version.py
          tests/test_structures.py
Commit:   6a09d4015b54f80762ff3ef1597a8b6740563c19
Accessed: 11 DEC 2019

packaging License
-------------------------------------------------------------------------------
License: Dual licensed under the terms of the Apache License, Version 2.0, and the BSD License.
URL:     https://github.com/pypa/packaging/blob/6a09d4015b/LICENSE
         https://github.com/pypa/packaging/blob/6a09d4015b/LICENSE.APACHE
         https://github.com/pypa/packaging/blob/6a09d4015b/LICENSE.BSD
"""
import itertools
import operator

import pretend
import pytest

from hangar._version import (
    Infinity, InvalidVersion, NegativeInfinity, Version, parse
)

# ------------------ Test Structures ---------------------


def test_infinity_repr():
    repr(Infinity) == "Infinity"


def test_negative_infinity_repr():
    repr(NegativeInfinity) == "-Infinity"


def test_infinity_hash():
    assert hash(Infinity) == hash(Infinity)


def test_negative_infinity_hash():
    assert hash(NegativeInfinity) == hash(NegativeInfinity)


@pytest.mark.parametrize("left", [1, "a", ("b", 4)])
def test_infinity_comparison(left):
    assert left < Infinity
    assert left <= Infinity
    assert not left == Infinity
    assert left != Infinity
    assert not left > Infinity
    assert not left >= Infinity


@pytest.mark.parametrize("left", [1, "a", ("b", 4)])
def test_negative_infinity_lesser(left):
    assert not left < NegativeInfinity
    assert not left <= NegativeInfinity
    assert not left == NegativeInfinity
    assert left != NegativeInfinity
    assert left > NegativeInfinity
    assert left >= NegativeInfinity


def test_infinty_equal():
    assert Infinity == Infinity


def test_negative_infinity_equal():
    assert NegativeInfinity == NegativeInfinity


def test_negate_infinity():
    assert isinstance(-Infinity, NegativeInfinity.__class__)


def test_negate_negative_infinity():
    assert isinstance(-NegativeInfinity, Infinity.__class__)


# ---------------------- Test Versions ---------------------------


@pytest.mark.parametrize(
    ("version", "klass"), [("1.0", Version)]
)
def test_parse(version, klass):
    assert isinstance(parse(version), klass)


def test_legacy_version_raises():
    with pytest.raises(InvalidVersion):
        parse('1-1-1')


# This list must be in the correct sorting order
VERSIONS = [
    # Implicit epoch of 0
    "1.0.dev456",
    "1.0a1",
    "1.0a12.dev456",
    "1.0a12",
    "1.0b1.dev456",
    "1.0b2.post345.dev456",
    "1.0b2.post345",
    "1.0b2-346",
    "1.0c1.dev456",
    "1.0rc2",
    "1.0c3",
    "1.0",
    "1.0.post456",
    "1.1.dev1",
    "1.2+abc",
    "1.2+abc123def",
    "1.2+123456",
    # Explicit epoch of 1
    "1!1.0.dev456",
    "1!1.0a2.dev456",
    "1!1.0c1",
    "1!1.0c3",
    "1!1.0",
    "1!1.0.post456.dev34",
    "1!1.2+123abc",
    "1!1.2+123abc456",
    "1!1.2+abc123",
    "1!1.2+123456",
    "1!1.2.r32+123456",
]


class TestVersion:
    @pytest.mark.parametrize("version", VERSIONS)
    def test_valid_versions(self, version):
        Version(version)

    @pytest.mark.parametrize(
        "version",
        [
            # Non sensical versions should be invalid
            "french toast",
            # Versions with invalid local versions
            "1.0+a+",
            "1.0++",
            "1.0+_foobar",
            "1.0+foo&asd",
            "1.0+1+1",
        ],
    )
    def test_invalid_versions(self, version):
        with pytest.raises(InvalidVersion):
            Version(version)

    @pytest.mark.parametrize(
        ("version", "normalized"),
        [
            # Various development release incarnations
            ("1.0dev", "1.0.dev0"),
            ("1.0.dev", "1.0.dev0"),
            ("1.0dev1", "1.0.dev1"),
            ("1.0dev", "1.0.dev0"),
            ("1.0-dev", "1.0.dev0"),
            ("1.0-dev1", "1.0.dev1"),
            ("1.0DEV", "1.0.dev0"),
            ("1.0.DEV", "1.0.dev0"),
            ("1.0DEV1", "1.0.dev1"),
            ("1.0DEV", "1.0.dev0"),
            ("1.0.DEV1", "1.0.dev1"),
            ("1.0-DEV", "1.0.dev0"),
            ("1.0-DEV1", "1.0.dev1"),
            # Various alpha incarnations
            ("1.0a", "1.0a0"),
            ("1.0.a", "1.0a0"),
            ("1.0.a1", "1.0a1"),
            ("1.0-a", "1.0a0"),
            ("1.0-a1", "1.0a1"),
            ("1.0alpha", "1.0a0"),
            ("1.0.alpha", "1.0a0"),
            ("1.0.alpha1", "1.0a1"),
            ("1.0-alpha", "1.0a0"),
            ("1.0-alpha1", "1.0a1"),
            ("1.0A", "1.0a0"),
            ("1.0.A", "1.0a0"),
            ("1.0.A1", "1.0a1"),
            ("1.0-A", "1.0a0"),
            ("1.0-A1", "1.0a1"),
            ("1.0ALPHA", "1.0a0"),
            ("1.0.ALPHA", "1.0a0"),
            ("1.0.ALPHA1", "1.0a1"),
            ("1.0-ALPHA", "1.0a0"),
            ("1.0-ALPHA1", "1.0a1"),
            # Various beta incarnations
            ("1.0b", "1.0b0"),
            ("1.0.b", "1.0b0"),
            ("1.0.b1", "1.0b1"),
            ("1.0-b", "1.0b0"),
            ("1.0-b1", "1.0b1"),
            ("1.0beta", "1.0b0"),
            ("1.0.beta", "1.0b0"),
            ("1.0.beta1", "1.0b1"),
            ("1.0-beta", "1.0b0"),
            ("1.0-beta1", "1.0b1"),
            ("1.0B", "1.0b0"),
            ("1.0.B", "1.0b0"),
            ("1.0.B1", "1.0b1"),
            ("1.0-B", "1.0b0"),
            ("1.0-B1", "1.0b1"),
            ("1.0BETA", "1.0b0"),
            ("1.0.BETA", "1.0b0"),
            ("1.0.BETA1", "1.0b1"),
            ("1.0-BETA", "1.0b0"),
            ("1.0-BETA1", "1.0b1"),
            # Various release candidate incarnations
            ("1.0c", "1.0rc0"),
            ("1.0.c", "1.0rc0"),
            ("1.0.c1", "1.0rc1"),
            ("1.0-c", "1.0rc0"),
            ("1.0-c1", "1.0rc1"),
            ("1.0rc", "1.0rc0"),
            ("1.0.rc", "1.0rc0"),
            ("1.0.rc1", "1.0rc1"),
            ("1.0-rc", "1.0rc0"),
            ("1.0-rc1", "1.0rc1"),
            ("1.0C", "1.0rc0"),
            ("1.0.C", "1.0rc0"),
            ("1.0.C1", "1.0rc1"),
            ("1.0-C", "1.0rc0"),
            ("1.0-C1", "1.0rc1"),
            ("1.0RC", "1.0rc0"),
            ("1.0.RC", "1.0rc0"),
            ("1.0.RC1", "1.0rc1"),
            ("1.0-RC", "1.0rc0"),
            ("1.0-RC1", "1.0rc1"),
            # Various post release incarnations
            ("1.0post", "1.0.post0"),
            ("1.0.post", "1.0.post0"),
            ("1.0post1", "1.0.post1"),
            ("1.0post", "1.0.post0"),
            ("1.0-post", "1.0.post0"),
            ("1.0-post1", "1.0.post1"),
            ("1.0POST", "1.0.post0"),
            ("1.0.POST", "1.0.post0"),
            ("1.0POST1", "1.0.post1"),
            ("1.0POST", "1.0.post0"),
            ("1.0r", "1.0.post0"),
            ("1.0rev", "1.0.post0"),
            ("1.0.POST1", "1.0.post1"),
            ("1.0.r1", "1.0.post1"),
            ("1.0.rev1", "1.0.post1"),
            ("1.0-POST", "1.0.post0"),
            ("1.0-POST1", "1.0.post1"),
            ("1.0-5", "1.0.post5"),
            ("1.0-r5", "1.0.post5"),
            ("1.0-rev5", "1.0.post5"),
            # Local version case insensitivity
            ("1.0+AbC", "1.0+abc"),
            # Integer Normalization
            ("1.01", "1.1"),
            ("1.0a05", "1.0a5"),
            ("1.0b07", "1.0b7"),
            ("1.0c056", "1.0rc56"),
            ("1.0rc09", "1.0rc9"),
            ("1.0.post000", "1.0.post0"),
            ("1.1.dev09000", "1.1.dev9000"),
            ("00!1.2", "1.2"),
            ("0100!0.0", "100!0.0"),
            # Various other normalizations
            ("v1.0", "1.0"),
            ("   v1.0\t\n", "1.0"),
        ],
    )
    def test_normalized_versions(self, version, normalized):
        assert str(Version(version)) == normalized

    @pytest.mark.parametrize(
        ("version", "expected"),
        [
            ("1.0.dev456", "1.0.dev456"),
            ("1.0a1", "1.0a1"),
            ("1.0a2.dev456", "1.0a2.dev456"),
            ("1.0a12.dev456", "1.0a12.dev456"),
            ("1.0a12", "1.0a12"),
            ("1.0b1.dev456", "1.0b1.dev456"),
            ("1.0b2", "1.0b2"),
            ("1.0b2.post345.dev456", "1.0b2.post345.dev456"),
            ("1.0b2.post345", "1.0b2.post345"),
            ("1.0rc1.dev456", "1.0rc1.dev456"),
            ("1.0rc1", "1.0rc1"),
            ("1.0", "1.0"),
            ("1.0.post456.dev34", "1.0.post456.dev34"),
            ("1.0.post456", "1.0.post456"),
            ("1.0.1", "1.0.1"),
            ("0!1.0.2", "1.0.2"),
            ("1.0.3+7", "1.0.3+7"),
            ("0!1.0.4+8.0", "1.0.4+8.0"),
            ("1.0.5+9.5", "1.0.5+9.5"),
            ("1.2+1234.abc", "1.2+1234.abc"),
            ("1.2+123456", "1.2+123456"),
            ("1.2+123abc", "1.2+123abc"),
            ("1.2+123abc456", "1.2+123abc456"),
            ("1.2+abc", "1.2+abc"),
            ("1.2+abc123", "1.2+abc123"),
            ("1.2+abc123def", "1.2+abc123def"),
            ("1.1.dev1", "1.1.dev1"),
            ("7!1.0.dev456", "7!1.0.dev456"),
            ("7!1.0a1", "7!1.0a1"),
            ("7!1.0a2.dev456", "7!1.0a2.dev456"),
            ("7!1.0a12.dev456", "7!1.0a12.dev456"),
            ("7!1.0a12", "7!1.0a12"),
            ("7!1.0b1.dev456", "7!1.0b1.dev456"),
            ("7!1.0b2", "7!1.0b2"),
            ("7!1.0b2.post345.dev456", "7!1.0b2.post345.dev456"),
            ("7!1.0b2.post345", "7!1.0b2.post345"),
            ("7!1.0rc1.dev456", "7!1.0rc1.dev456"),
            ("7!1.0rc1", "7!1.0rc1"),
            ("7!1.0", "7!1.0"),
            ("7!1.0.post456.dev34", "7!1.0.post456.dev34"),
            ("7!1.0.post456", "7!1.0.post456"),
            ("7!1.0.1", "7!1.0.1"),
            ("7!1.0.2", "7!1.0.2"),
            ("7!1.0.3+7", "7!1.0.3+7"),
            ("7!1.0.4+8.0", "7!1.0.4+8.0"),
            ("7!1.0.5+9.5", "7!1.0.5+9.5"),
            ("7!1.1.dev1", "7!1.1.dev1"),
        ],
    )
    def test_version_str_repr(self, version, expected):
        assert str(Version(version)) == expected
        assert repr(Version(version)) == "".format(repr(expected))

    def test_version_rc_and_c_equals(self):
        assert Version("1.0rc1") == Version("1.0c1")

    @pytest.mark.parametrize("version", VERSIONS)
    def test_version_hash(self, version):
        assert hash(Version(version)) == hash(Version(version))

    @pytest.mark.parametrize(
        ("version", "public"),
        [
            ("1.0", "1.0"),
            ("1.0.dev0", "1.0.dev0"),
            ("1.0.dev6", "1.0.dev6"),
            ("1.0a1", "1.0a1"),
            ("1.0a1.post5", "1.0a1.post5"),
            ("1.0a1.post5.dev6", "1.0a1.post5.dev6"),
            ("1.0rc4", "1.0rc4"),
            ("1.0.post5", "1.0.post5"),
            ("1!1.0", "1!1.0"),
            ("1!1.0.dev6", "1!1.0.dev6"),
            ("1!1.0a1", "1!1.0a1"),
            ("1!1.0a1.post5", "1!1.0a1.post5"),
            ("1!1.0a1.post5.dev6", "1!1.0a1.post5.dev6"),
            ("1!1.0rc4", "1!1.0rc4"),
            ("1!1.0.post5", "1!1.0.post5"),
            ("1.0+deadbeef", "1.0"),
            ("1.0.dev6+deadbeef", "1.0.dev6"),
            ("1.0a1+deadbeef", "1.0a1"),
            ("1.0a1.post5+deadbeef", "1.0a1.post5"),
            ("1.0a1.post5.dev6+deadbeef", "1.0a1.post5.dev6"),
            ("1.0rc4+deadbeef", "1.0rc4"),
            ("1.0.post5+deadbeef", "1.0.post5"),
            ("1!1.0+deadbeef", "1!1.0"),
            ("1!1.0.dev6+deadbeef", "1!1.0.dev6"),
            ("1!1.0a1+deadbeef", "1!1.0a1"),
            ("1!1.0a1.post5+deadbeef", "1!1.0a1.post5"),
            ("1!1.0a1.post5.dev6+deadbeef", "1!1.0a1.post5.dev6"),
            ("1!1.0rc4+deadbeef", "1!1.0rc4"),
            ("1!1.0.post5+deadbeef", "1!1.0.post5"),
        ],
    )
    def test_version_public(self, version, public):
        assert Version(version).public == public

    @pytest.mark.parametrize(
        ("version", "base_version"),
        [
            ("1.0", "1.0"),
            ("1.0.dev0", "1.0"),
            ("1.0.dev6", "1.0"),
            ("1.0a1", "1.0"),
            ("1.0a1.post5", "1.0"),
            ("1.0a1.post5.dev6", "1.0"),
            ("1.0rc4", "1.0"),
            ("1.0.post5", "1.0"),
            ("1!1.0", "1!1.0"),
            ("1!1.0.dev6", "1!1.0"),
            ("1!1.0a1", "1!1.0"),
            ("1!1.0a1.post5", "1!1.0"),
            ("1!1.0a1.post5.dev6", "1!1.0"),
            ("1!1.0rc4", "1!1.0"),
            ("1!1.0.post5", "1!1.0"),
            ("1.0+deadbeef", "1.0"),
            ("1.0.dev6+deadbeef", "1.0"),
            ("1.0a1+deadbeef", "1.0"),
            ("1.0a1.post5+deadbeef", "1.0"),
            ("1.0a1.post5.dev6+deadbeef", "1.0"),
            ("1.0rc4+deadbeef", "1.0"),
            ("1.0.post5+deadbeef", "1.0"),
            ("1!1.0+deadbeef", "1!1.0"),
            ("1!1.0.dev6+deadbeef", "1!1.0"),
            ("1!1.0a1+deadbeef", "1!1.0"),
            ("1!1.0a1.post5+deadbeef", "1!1.0"),
            ("1!1.0a1.post5.dev6+deadbeef", "1!1.0"),
            ("1!1.0rc4+deadbeef", "1!1.0"),
            ("1!1.0.post5+deadbeef", "1!1.0"),
        ],
    )
    def test_version_base_version(self, version, base_version):
        assert Version(version).base_version == base_version

    @pytest.mark.parametrize(
        ("version", "epoch"),
        [
            ("1.0", 0),
            ("1.0.dev0", 0),
            ("1.0.dev6", 0),
            ("1.0a1", 0),
            ("1.0a1.post5", 0),
            ("1.0a1.post5.dev6", 0),
            ("1.0rc4", 0),
            ("1.0.post5", 0),
            ("1!1.0", 1),
            ("1!1.0.dev6", 1),
            ("1!1.0a1", 1),
            ("1!1.0a1.post5", 1),
            ("1!1.0a1.post5.dev6", 1),
            ("1!1.0rc4", 1),
            ("1!1.0.post5", 1),
            ("1.0+deadbeef", 0),
            ("1.0.dev6+deadbeef", 0),
            ("1.0a1+deadbeef", 0),
            ("1.0a1.post5+deadbeef", 0),
            ("1.0a1.post5.dev6+deadbeef", 0),
            ("1.0rc4+deadbeef", 0),
            ("1.0.post5+deadbeef", 0),
            ("1!1.0+deadbeef", 1),
            ("1!1.0.dev6+deadbeef", 1),
            ("1!1.0a1+deadbeef", 1),
            ("1!1.0a1.post5+deadbeef", 1),
            ("1!1.0a1.post5.dev6+deadbeef", 1),
            ("1!1.0rc4+deadbeef", 1),
            ("1!1.0.post5+deadbeef", 1),
        ],
    )
    def test_version_epoch(self, version, epoch):
        assert Version(version).epoch == epoch

    @pytest.mark.parametrize(
        ("version", "release"),
        [
            ("1.0", (1, 0)),
            ("1.0.dev0", (1, 0)),
            ("1.0.dev6", (1, 0)),
            ("1.0a1", (1, 0)),
            ("1.0a1.post5", (1, 0)),
            ("1.0a1.post5.dev6", (1, 0)),
            ("1.0rc4", (1, 0)),
            ("1.0.post5", (1, 0)),
            ("1!1.0", (1, 0)),
            ("1!1.0.dev6", (1, 0)),
            ("1!1.0a1", (1, 0)),
            ("1!1.0a1.post5", (1, 0)),
            ("1!1.0a1.post5.dev6", (1, 0)),
            ("1!1.0rc4", (1, 0)),
            ("1!1.0.post5", (1, 0)),
            ("1.0+deadbeef", (1, 0)),
            ("1.0.dev6+deadbeef", (1, 0)),
            ("1.0a1+deadbeef", (1, 0)),
            ("1.0a1.post5+deadbeef", (1, 0)),
            ("1.0a1.post5.dev6+deadbeef", (1, 0)),
            ("1.0rc4+deadbeef", (1, 0)),
            ("1.0.post5+deadbeef", (1, 0)),
            ("1!1.0+deadbeef", (1, 0)),
            ("1!1.0.dev6+deadbeef", (1, 0)),
            ("1!1.0a1+deadbeef", (1, 0)),
            ("1!1.0a1.post5+deadbeef", (1, 0)),
            ("1!1.0a1.post5.dev6+deadbeef", (1, 0)),
            ("1!1.0rc4+deadbeef", (1, 0)),
            ("1!1.0.post5+deadbeef", (1, 0)),
        ],
    )
    def test_version_release(self, version, release):
        assert Version(version).release == release

    @pytest.mark.parametrize(
        ("version", "local"),
        [
            ("1.0", None),
            ("1.0.dev0", None),
            ("1.0.dev6", None),
            ("1.0a1", None),
            ("1.0a1.post5", None),
            ("1.0a1.post5.dev6", None),
            ("1.0rc4", None),
            ("1.0.post5", None),
            ("1!1.0", None),
            ("1!1.0.dev6", None),
            ("1!1.0a1", None),
            ("1!1.0a1.post5", None),
            ("1!1.0a1.post5.dev6", None),
            ("1!1.0rc4", None),
            ("1!1.0.post5", None),
            ("1.0+deadbeef", "deadbeef"),
            ("1.0.dev6+deadbeef", "deadbeef"),
            ("1.0a1+deadbeef", "deadbeef"),
            ("1.0a1.post5+deadbeef", "deadbeef"),
            ("1.0a1.post5.dev6+deadbeef", "deadbeef"),
            ("1.0rc4+deadbeef", "deadbeef"),
            ("1.0.post5+deadbeef", "deadbeef"),
            ("1!1.0+deadbeef", "deadbeef"),
            ("1!1.0.dev6+deadbeef", "deadbeef"),
            ("1!1.0a1+deadbeef", "deadbeef"),
            ("1!1.0a1.post5+deadbeef", "deadbeef"),
            ("1!1.0a1.post5.dev6+deadbeef", "deadbeef"),
            ("1!1.0rc4+deadbeef", "deadbeef"),
            ("1!1.0.post5+deadbeef", "deadbeef"),
        ],
    )
    def test_version_local(self, version, local):
        assert Version(version).local == local

    @pytest.mark.parametrize(
        ("version", "pre"),
        [
            ("1.0", None),
            ("1.0.dev0", None),
            ("1.0.dev6", None),
            ("1.0a1", ("a", 1)),
            ("1.0a1.post5", ("a", 1)),
            ("1.0a1.post5.dev6", ("a", 1)),
            ("1.0rc4", ("rc", 4)),
            ("1.0.post5", None),
            ("1!1.0", None),
            ("1!1.0.dev6", None),
            ("1!1.0a1", ("a", 1)),
            ("1!1.0a1.post5", ("a", 1)),
            ("1!1.0a1.post5.dev6", ("a", 1)),
            ("1!1.0rc4", ("rc", 4)),
            ("1!1.0.post5", None),
            ("1.0+deadbeef", None),
            ("1.0.dev6+deadbeef", None),
            ("1.0a1+deadbeef", ("a", 1)),
            ("1.0a1.post5+deadbeef", ("a", 1)),
            ("1.0a1.post5.dev6+deadbeef", ("a", 1)),
            ("1.0rc4+deadbeef", ("rc", 4)),
            ("1.0.post5+deadbeef", None),
            ("1!1.0+deadbeef", None),
            ("1!1.0.dev6+deadbeef", None),
            ("1!1.0a1+deadbeef", ("a", 1)),
            ("1!1.0a1.post5+deadbeef", ("a", 1)),
            ("1!1.0a1.post5.dev6+deadbeef", ("a", 1)),
            ("1!1.0rc4+deadbeef", ("rc", 4)),
            ("1!1.0.post5+deadbeef", None),
        ],
    )
    def test_version_pre(self, version, pre):
        assert Version(version).pre == pre

    @pytest.mark.parametrize(
        ("version", "expected"),
        [
            ("1.0.dev0", True),
            ("1.0.dev1", True),
            ("1.0a1.dev1", True),
            ("1.0b1.dev1", True),
            ("1.0c1.dev1", True),
            ("1.0rc1.dev1", True),
            ("1.0a1", True),
            ("1.0b1", True),
            ("1.0c1", True),
            ("1.0rc1", True),
            ("1.0a1.post1.dev1", True),
            ("1.0b1.post1.dev1", True),
            ("1.0c1.post1.dev1", True),
            ("1.0rc1.post1.dev1", True),
            ("1.0a1.post1", True),
            ("1.0b1.post1", True),
            ("1.0c1.post1", True),
            ("1.0rc1.post1", True),
            ("1.0", False),
            ("1.0+dev", False),
            ("1.0.post1", False),
            ("1.0.post1+dev", False),
        ],
    )
    def test_version_is_prerelease(self, version, expected):
        assert Version(version).is_prerelease is expected

    @pytest.mark.parametrize(
        ("version", "dev"),
        [
            ("1.0", None),
            ("1.0.dev0", 0),
            ("1.0.dev6", 6),
            ("1.0a1", None),
            ("1.0a1.post5", None),
            ("1.0a1.post5.dev6", 6),
            ("1.0rc4", None),
            ("1.0.post5", None),
            ("1!1.0", None),
            ("1!1.0.dev6", 6),
            ("1!1.0a1", None),
            ("1!1.0a1.post5", None),
            ("1!1.0a1.post5.dev6", 6),
            ("1!1.0rc4", None),
            ("1!1.0.post5", None),
            ("1.0+deadbeef", None),
            ("1.0.dev6+deadbeef", 6),
            ("1.0a1+deadbeef", None),
            ("1.0a1.post5+deadbeef", None),
            ("1.0a1.post5.dev6+deadbeef", 6),
            ("1.0rc4+deadbeef", None),
            ("1.0.post5+deadbeef", None),
            ("1!1.0+deadbeef", None),
            ("1!1.0.dev6+deadbeef", 6),
            ("1!1.0a1+deadbeef", None),
            ("1!1.0a1.post5+deadbeef", None),
            ("1!1.0a1.post5.dev6+deadbeef", 6),
            ("1!1.0rc4+deadbeef", None),
            ("1!1.0.post5+deadbeef", None),
        ],
    )
    def test_version_dev(self, version, dev):
        assert Version(version).dev == dev

    @pytest.mark.parametrize(
        ("version", "expected"),
        [
            ("1.0", False),
            ("1.0.dev0", True),
            ("1.0.dev6", True),
            ("1.0a1", False),
            ("1.0a1.post5", False),
            ("1.0a1.post5.dev6", True),
            ("1.0rc4", False),
            ("1.0.post5", False),
            ("1!1.0", False),
            ("1!1.0.dev6", True),
            ("1!1.0a1", False),
            ("1!1.0a1.post5", False),
            ("1!1.0a1.post5.dev6", True),
            ("1!1.0rc4", False),
            ("1!1.0.post5", False),
            ("1.0+deadbeef", False),
            ("1.0.dev6+deadbeef", True),
            ("1.0a1+deadbeef", False),
            ("1.0a1.post5+deadbeef", False),
            ("1.0a1.post5.dev6+deadbeef", True),
            ("1.0rc4+deadbeef", False),
            ("1.0.post5+deadbeef", False),
            ("1!1.0+deadbeef", False),
            ("1!1.0.dev6+deadbeef", True),
            ("1!1.0a1+deadbeef", False),
            ("1!1.0a1.post5+deadbeef", False),
            ("1!1.0a1.post5.dev6+deadbeef", True),
            ("1!1.0rc4+deadbeef", False),
            ("1!1.0.post5+deadbeef", False),
        ],
    )
    def test_version_is_devrelease(self, version, expected):
        assert Version(version).is_devrelease is expected

    @pytest.mark.parametrize(
        ("version", "post"),
        [
            ("1.0", None),
            ("1.0.dev0", None),
            ("1.0.dev6", None),
            ("1.0a1", None),
            ("1.0a1.post5", 5),
            ("1.0a1.post5.dev6", 5),
            ("1.0rc4", None),
            ("1.0.post5", 5),
            ("1!1.0", None),
            ("1!1.0.dev6", None),
            ("1!1.0a1", None),
            ("1!1.0a1.post5", 5),
            ("1!1.0a1.post5.dev6", 5),
            ("1!1.0rc4", None),
            ("1!1.0.post5", 5),
            ("1.0+deadbeef", None),
            ("1.0.dev6+deadbeef", None),
            ("1.0a1+deadbeef", None),
            ("1.0a1.post5+deadbeef", 5),
            ("1.0a1.post5.dev6+deadbeef", 5),
            ("1.0rc4+deadbeef", None),
            ("1.0.post5+deadbeef", 5),
            ("1!1.0+deadbeef", None),
            ("1!1.0.dev6+deadbeef", None),
            ("1!1.0a1+deadbeef", None),
            ("1!1.0a1.post5+deadbeef", 5),
            ("1!1.0a1.post5.dev6+deadbeef", 5),
            ("1!1.0rc4+deadbeef", None),
            ("1!1.0.post5+deadbeef", 5),
        ],
    )
    def test_version_post(self, version, post):
        assert Version(version).post == post

    @pytest.mark.parametrize(
        ("version", "expected"),
        [
            ("1.0.dev1", False),
            ("1.0", False),
            ("1.0+foo", False),
            ("1.0.post1.dev1", True),
            ("1.0.post1", True),
        ],
    )
    def test_version_is_postrelease(self, version, expected):
        assert Version(version).is_postrelease is expected

    @pytest.mark.parametrize(
        ("left", "right", "op"),
        # Below we'll generate every possible combination of VERSIONS that
        # should be True for the given operator
        itertools.chain(
            *
            # Verify that the less than (<) operator works correctly
            [
                [(x, y, operator.lt) for y in VERSIONS[i + 1 :]]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the less than equal (<=) operator works correctly
            [
                [(x, y, operator.le) for y in VERSIONS[i:]]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the equal (==) operator works correctly
            [[(x, x, operator.eq) for x in VERSIONS]]
            +
            # Verify that the not equal (!=) operator works correctly
            [
                [(x, y, operator.ne) for j, y in enumerate(VERSIONS) if i != j]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the greater than equal (>=) operator works correctly
            [
                [(x, y, operator.ge) for y in VERSIONS[: i + 1]]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the greater than (>) operator works correctly
            [
                [(x, y, operator.gt) for y in VERSIONS[:i]]
                for i, x in enumerate(VERSIONS)
            ]
        ),
    )
    def test_comparison_true(self, left, right, op):
        assert op(Version(left), Version(right))

    @pytest.mark.parametrize(
        ("left", "right", "op"),
        # Below we'll generate every possible combination of VERSIONS that
        # should be False for the given operator
        itertools.chain(
            *
            # Verify that the less than (<) operator works correctly
            [
                [(x, y, operator.lt) for y in VERSIONS[: i + 1]]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the less than equal (<=) operator works correctly
            [
                [(x, y, operator.le) for y in VERSIONS[:i]]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the equal (==) operator works correctly
            [
                [(x, y, operator.eq) for j, y in enumerate(VERSIONS) if i != j]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the not equal (!=) operator works correctly
            [[(x, x, operator.ne) for x in VERSIONS]]
            +
            # Verify that the greater than equal (>=) operator works correctly
            [
                [(x, y, operator.ge) for y in VERSIONS[i + 1 :]]
                for i, x in enumerate(VERSIONS)
            ]
            +
            # Verify that the greater than (>) operator works correctly
            [
                [(x, y, operator.gt) for y in VERSIONS[i:]]
                for i, x in enumerate(VERSIONS)
            ]
        ),
    )
    def test_comparison_false(self, left, right, op):
        assert not op(Version(left), Version(right))

    @pytest.mark.parametrize(("op", "expected"), [("eq", False), ("ne", True)])
    def test_compare_other(self, monkeypatch, op, expected):
        other = pretend.stub(**{"__{0}__".format(op): lambda other: NotImplemented})

        assert getattr(operator, op)(Version("1"), other) is expected

    def test_major_version(self):
        assert Version("2.1.0").major == 2

    def test_minor_version(self):
        assert Version("2.1.0").minor == 1
        assert Version("2").minor == 0

    def test_micro_version(self):
        assert Version("2.1.3").micro == 3
        assert Version("2.1").micro == 0
        assert Version("2").micro == 0


================================================
FILE: tests/test_visualizations.py
================================================
import pytest
import numpy as np


def verify_out(capfd, expected):
    out, _ = capfd.readouterr()
    print(out)
    assert expected == out


def test_flat_merge_graph(capfd):
    from hangar.diagnostics import Graph

    flat_log_contents = {
        'head': '3c9530ac0da1106c0acbe1201900c51548bbcdd9',
        'ancestors': {
            '0ff3f2ec156ab8e1026b5271630ccae4556cc260': [''],
            '3c9530ac0da1106c0acbe1201900c51548bbcdd9': ['fed88489ab6e59913aee935169b15fe68755d82c'],
            'fed88489ab6e59913aee935169b15fe68755d82c': ['0ff3f2ec156ab8e1026b5271630ccae4556cc260']},
        'specs': {
            '0ff3f2ec156ab8e1026b5271630ccae4556cc260': {
                'commit_message': 'first commit adding training images and labels',
                'commit_time': 1562203787.257128, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'},
            '3c9530ac0da1106c0acbe1201900c51548bbcdd9': {
                'commit_message': 'added testing labels only',
                'commit_time': 1562203787.388417, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'},
            'fed88489ab6e59913aee935169b15fe68755d82c': {
                'commit_message': 'added testing images only',
                'commit_time': 1562203787.372292, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'}},
        'order': ['3c9530ac0da1106c0acbe1201900c51548bbcdd9',
                'fed88489ab6e59913aee935169b15fe68755d82c',
                '0ff3f2ec156ab8e1026b5271630ccae4556cc260']}

    flat_hash_branch_map = {
        '3c9530ac0da1106c0acbe1201900c51548bbcdd9': ['add-test'],
        '0ff3f2ec156ab8e1026b5271630ccae4556cc260': ['untouched-live-demo-branch']}

    g = Graph(use_color=False)
    g.show_nodes(
        dag=flat_log_contents['ancestors'],
        spec=flat_log_contents['specs'],
        branch=flat_hash_branch_map,
        start=flat_log_contents['head'],
        order=flat_log_contents['order'],
        show_time=False,
        show_user=False)

    expected = '* 3c9530ac0da1106c0acbe1201900c51548bbcdd9 (add-test) : added testing labels only\n'\
               '* fed88489ab6e59913aee935169b15fe68755d82c : added testing images only\n'\
               '* 0ff3f2ec156ab8e1026b5271630ccae4556cc260 (untouched-live-demo-branch) : first commit adding training images and labels\n'

    verify_out(capfd, expected)


def test_three_way_merge_graph(capfd):
    from hangar.diagnostics import Graph

    three_way_log_contents = {
        'head': '074f81d6b9fa5fa856175d47c7cc95cc4a839965',
        'ancestors': {
            '074f81d6b9fa5fa856175d47c7cc95cc4a839965': ['e5ea58dd9c7ffacd45fb128ddc00aced08d13889', '3c9530ac0da1106c0acbe1201900c51548bbcdd9'],
            'e5ea58dd9c7ffacd45fb128ddc00aced08d13889': ['0ff3f2ec156ab8e1026b5271630ccae4556cc260'],
            '0ff3f2ec156ab8e1026b5271630ccae4556cc260': [''],
            '3c9530ac0da1106c0acbe1201900c51548bbcdd9': ['fed88489ab6e59913aee935169b15fe68755d82c'],
            'fed88489ab6e59913aee935169b15fe68755d82c': ['0ff3f2ec156ab8e1026b5271630ccae4556cc260']},
        'specs': {
            '074f81d6b9fa5fa856175d47c7cc95cc4a839965': {
                'commit_message': 'adding in the new testing columns',
                'commit_time': 1562203830.775428, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'},
            'e5ea58dd9c7ffacd45fb128ddc00aced08d13889': {
                'commit_message': 'commit adding validation images and labels',
                'commit_time': 1562203787.320624, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'},
            '0ff3f2ec156ab8e1026b5271630ccae4556cc260': {
                'commit_message': 'first commit adding training images and labels',
                'commit_time': 1562203787.257128, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'},
            '3c9530ac0da1106c0acbe1201900c51548bbcdd9': {
                'commit_message': 'added testing labels only',
                'commit_time': 1562203787.388417, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'},
            'fed88489ab6e59913aee935169b15fe68755d82c': {
                'commit_message': 'added testing images only',
                'commit_time': 1562203787.372292, 'commit_user': 'Foo User', 'commit_email': 'foo@bar.com'}},
        'order': ['074f81d6b9fa5fa856175d47c7cc95cc4a839965',
                '3c9530ac0da1106c0acbe1201900c51548bbcdd9',
                'fed88489ab6e59913aee935169b15fe68755d82c',
                'e5ea58dd9c7ffacd45fb128ddc00aced08d13889',
                '0ff3f2ec156ab8e1026b5271630ccae4556cc260']}

    three_way_hash_branch_map = {
        '3c9530ac0da1106c0acbe1201900c51548bbcdd9': ['add-test'],
        'e5ea58dd9c7ffacd45fb128ddc00aced08d13889': ['add-validation'],
        '074f81d6b9fa5fa856175d47c7cc95cc4a839965': ['master'],
        '0ff3f2ec156ab8e1026b5271630ccae4556cc260': ['untouched-live-demo-branch']}

    g = Graph(use_color=False)
    g.show_nodes(
        dag=three_way_log_contents['ancestors'],
        spec=three_way_log_contents['specs'],
        branch=three_way_hash_branch_map,
        start=three_way_log_contents['head'],
        order=three_way_log_contents['order'],
        show_time=False,
        show_user=False)

    real = '*   074f81d6b9fa5fa856175d47c7cc95cc4a839965 (master) : adding in the new testing columns\n'\
           '|\\  \n'\
           '| * 3c9530ac0da1106c0acbe1201900c51548bbcdd9 (add-test) : added testing labels only\n'\
           '| * fed88489ab6e59913aee935169b15fe68755d82c : added testing images only\n'\
           '* | e5ea58dd9c7ffacd45fb128ddc00aced08d13889 (add-validation) : commit adding validation images and labels\n'\
           '|/  \n'\
           '* 0ff3f2ec156ab8e1026b5271630ccae4556cc260 (untouched-live-demo-branch) : first commit adding training images and labels\n'

    verify_out(capfd, real)


def test_octopus_merge_graph(capfd):
    from hangar.diagnostics import Graph

    octopus_log_contents = {
        'head': '05ad17beab54ede8d7f9214c5c6ae44509c3da97',
        'ancestors': {
            '05ad17beab54ede8d7f9214c5c6ae44509c3da97': ['b9c7da873c06c730f52bad5808df5312c4cc0a38', '1b49223ae5e731da3750e4836d14565dbe504f18'],
            'b9c7da873c06c730f52bad5808df5312c4cc0a38': ['a74236e598b96dcde10b176921eb58bb4a9c64bf', 'c4d6875caeff83a29413ae163dbcfdc3c57ad373'],
            'a74236e598b96dcde10b176921eb58bb4a9c64bf': ['9152a4578f74b36838f8187e43c8644b1eba47b5'],
            '9152a4578f74b36838f8187e43c8644b1eba47b5': ['ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d', '21f274d31abc09ede4ad6753f079297885b02a09'],
            'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d': ['e9ca97e336496b1fceb75869adf0294af5635922'],
            'e9ca97e336496b1fceb75869adf0294af5635922': ['489bceb38246f27cae2a0f47eba0e488d95618db'],
            '489bceb38246f27cae2a0f47eba0e488d95618db': ['17286961175c5cbbd4381fef07cc0a20920a5ce6'],
            '17286961175c5cbbd4381fef07cc0a20920a5ce6': ['63ac654df43bd149a1ca5f919e714bc57e69af99'],
            '63ac654df43bd149a1ca5f919e714bc57e69af99': [''],
            '1b49223ae5e731da3750e4836d14565dbe504f18': ['9152a4578f74b36838f8187e43c8644b1eba47b5'],
            'c4d6875caeff83a29413ae163dbcfdc3c57ad373': ['63ac654df43bd149a1ca5f919e714bc57e69af99'],
            '21f274d31abc09ede4ad6753f079297885b02a09': ['5c0ea20c6513f135f0131d9e10d86801ded29537'],
            '5c0ea20c6513f135f0131d9e10d86801ded29537': ['10e84be056afb2ace6b7ba044ce1e9c9811eae4f'],
            '10e84be056afb2ace6b7ba044ce1e9c9811eae4f': ['e9ca97e336496b1fceb75869adf0294af5635922']},
        'specs': {
            '05ad17beab54ede8d7f9214c5c6ae44509c3da97': {'commit_message': 'try number two',
                'commit_time': 1562363265.6635652, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'b9c7da873c06c730f52bad5808df5312c4cc0a38': {'commit_message': 'merging the long running branch into master',
                'commit_time': 1562363265.652887, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'a74236e598b96dcde10b176921eb58bb4a9c64bf': {'commit_message': 'another on master',
                'commit_time': 1562363265.6346502, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '9152a4578f74b36838f8187e43c8644b1eba47b5': {'commit_message': 'this is the first merge',
                'commit_time': 1562363265.578071, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d': {'commit_message': 'third commit on master',
                'commit_time': 1562363265.4683158, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'e9ca97e336496b1fceb75869adf0294af5635922': {'commit_message': 'second commit on master with training labels',
                'commit_time': 1562363265.398268, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '489bceb38246f27cae2a0f47eba0e488d95618db': {'commit_message': 'second',
                'commit_time': 1562363264.7388191, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '17286961175c5cbbd4381fef07cc0a20920a5ce6': {'commit_message': 'hi',
                'commit_time': 1562363264.735318, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '63ac654df43bd149a1ca5f919e714bc57e69af99': {'commit_message': 'initial commit on master with training images',
                'commit_time': 1562363264.731286, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '1b49223ae5e731da3750e4836d14565dbe504f18': {'commit_message': 'another on try delete',
                'commit_time': 1562363265.642503, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'c4d6875caeff83a29413ae163dbcfdc3c57ad373': {'commit_message': 'first commit on the large branch',
                'commit_time': 1562363265.374819, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '21f274d31abc09ede4ad6753f079297885b02a09': {'commit_message': 'another commit on test banch after adding to new_set',
                'commit_time': 1562363265.56455, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '5c0ea20c6513f135f0131d9e10d86801ded29537': {'commit_message': 'second commit on test branch with new aset',
                'commit_time': 1562363265.545484, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '10e84be056afb2ace6b7ba044ce1e9c9811eae4f': {'commit_message': 'first commit on test branch',
                'commit_time': 1562363265.524131, 'commit_user': 'test user', 'commit_email': 'test@email.com'}},
        'order': [
            '05ad17beab54ede8d7f9214c5c6ae44509c3da97',
            'b9c7da873c06c730f52bad5808df5312c4cc0a38',
            '1b49223ae5e731da3750e4836d14565dbe504f18',
            'a74236e598b96dcde10b176921eb58bb4a9c64bf',
            '9152a4578f74b36838f8187e43c8644b1eba47b5',
            '21f274d31abc09ede4ad6753f079297885b02a09',
            '5c0ea20c6513f135f0131d9e10d86801ded29537',
            '10e84be056afb2ace6b7ba044ce1e9c9811eae4f',
            'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d',
            'e9ca97e336496b1fceb75869adf0294af5635922',
            'c4d6875caeff83a29413ae163dbcfdc3c57ad373',
            '489bceb38246f27cae2a0f47eba0e488d95618db',
            '17286961175c5cbbd4381fef07cc0a20920a5ce6',
            '63ac654df43bd149a1ca5f919e714bc57e69af99']
        }

    octopus_hash_branch_map = {
        'c4d6875caeff83a29413ae163dbcfdc3c57ad373': ['large_branch'],
        '21f274d31abc09ede4ad6753f079297885b02a09': ['test_branch'],
        '05ad17beab54ede8d7f9214c5c6ae44509c3da97': ['master'],
        '1b49223ae5e731da3750e4836d14565dbe504f18': ['trydelete']}

    g = Graph(use_color=False)
    g.show_nodes(
        dag=octopus_log_contents['ancestors'],
        spec=octopus_log_contents['specs'],
        branch=octopus_hash_branch_map,
        start=octopus_log_contents['head'],
        order=octopus_log_contents['order'],
        show_time=False,
        show_user=False)

    real = '*   05ad17beab54ede8d7f9214c5c6ae44509c3da97 (master) : try number two\n'\
           '|\\  \n'\
           '* \\   b9c7da873c06c730f52bad5808df5312c4cc0a38 : merging the long running branch into master\n'\
           '|\\ \\  \n'\
           '| | * 1b49223ae5e731da3750e4836d14565dbe504f18 (trydelete) : another on try delete\n'\
           '* | | a74236e598b96dcde10b176921eb58bb4a9c64bf : another on master\n'\
           '| |/  \n'\
           '|/|   \n'\
           '* |   9152a4578f74b36838f8187e43c8644b1eba47b5 : this is the first merge\n'\
           '|\\ \\  \n'\
           '| * | 21f274d31abc09ede4ad6753f079297885b02a09 (test_branch) : another commit on test banch after adding to new_set\n'\
           '| * | 5c0ea20c6513f135f0131d9e10d86801ded29537 : second commit on test branch with new aset\n'\
           '| * | 10e84be056afb2ace6b7ba044ce1e9c9811eae4f : first commit on test branch\n'\
           '* | | ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d : third commit on master\n'\
           '|/ /  \n'\
           '* | e9ca97e336496b1fceb75869adf0294af5635922 : second commit on master with training labels\n'\
           '| * c4d6875caeff83a29413ae163dbcfdc3c57ad373 (large_branch) : first commit on the large branch\n'\
           '* | 489bceb38246f27cae2a0f47eba0e488d95618db : second\n'\
           '* | 17286961175c5cbbd4381fef07cc0a20920a5ce6 : hi\n'\
           '|/  \n'\
           '* 63ac654df43bd149a1ca5f919e714bc57e69af99 : initial commit on master with training images\n'

    verify_out(capfd, real)


def test_octopus_large_merge_graph(capfd):
    from hangar.diagnostics import Graph

    octopus_log_contents = {
        'head': 'ddeeff',
        'ancestors': {
            '05ad17beab54ede8d7f9214c5c6ae44509c3da97': ['b9c7da873c06c730f52bad5808df5312c4cc0a38', '1b49223ae5e731da3750e4836d14565dbe504f18'],
            'b9c7da873c06c730f52bad5808df5312c4cc0a38': ['a74236e598b96dcde10b176921eb58bb4a9c64bf', 'c4d6875caeff83a29413ae163dbcfdc3c57ad373', 'e9ca97e336496b1fceb75869adf0294af5635922'],
            'a74236e598b96dcde10b176921eb58bb4a9c64bf': ['9152a4578f74b36838f8187e43c8644b1eba47b5', '21f274d31abc09ede4ad6753f079297885b02a09'],
            '9152a4578f74b36838f8187e43c8644b1eba47b5': ['ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d', '21f274d31abc09ede4ad6753f079297885b02a09'],
            'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d': ['e9ca97e336496b1fceb75869adf0294af5635922', 'c4d6875caeff83a29413ae163dbcfdc3c57ad373'],
            'e9ca97e336496b1fceb75869adf0294af5635922': ['489bceb38246f27cae2a0f47eba0e488d95618db'],
            '489bceb38246f27cae2a0f47eba0e488d95618db': ['17286961175c5cbbd4381fef07cc0a20920a5ce6'],
            '17286961175c5cbbd4381fef07cc0a20920a5ce6': ['63ac654df43bd149a1ca5f919e714bc57e69af99'],
            '63ac654df43bd149a1ca5f919e714bc57e69af99': [''],
            '1b49223ae5e731da3750e4836d14565dbe504f18': ['9152a4578f74b36838f8187e43c8644b1eba47b5', 'a74236e598b96dcde10b176921eb58bb4a9c64bf'],
            'c4d6875caeff83a29413ae163dbcfdc3c57ad373': ['63ac654df43bd149a1ca5f919e714bc57e69af99'],
            '21f274d31abc09ede4ad6753f079297885b02a09': ['5c0ea20c6513f135f0131d9e10d86801ded29537'],
            '5c0ea20c6513f135f0131d9e10d86801ded29537': ['10e84be056afb2ace6b7ba044ce1e9c9811eae4f', 'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d'],
            '10e84be056afb2ace6b7ba044ce1e9c9811eae4f': ['e9ca97e336496b1fceb75869adf0294af5635922', 'c4d6875caeff83a29413ae163dbcfdc3c57ad373'],
            'aabbcc': ['9152a4578f74b36838f8187e43c8644b1eba47b5', '5c0ea20c6513f135f0131d9e10d86801ded29537'],
            'ddeeff': ['aabbcc', '05ad17beab54ede8d7f9214c5c6ae44509c3da97'],
        },
        'specs': {
            'ddeeff': {'commit_message': 'new master',
                'commit_time': 1562363266.6635652, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '05ad17beab54ede8d7f9214c5c6ae44509c3da97': {'commit_message': 'try number two',
                'commit_time': 1562363265.6635652, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'b9c7da873c06c730f52bad5808df5312c4cc0a38': {'commit_message': 'merging the long running branch into master',
                'commit_time': 1562363265.652887, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'a74236e598b96dcde10b176921eb58bb4a9c64bf': {'commit_message': 'another on master',
                'commit_time': 1562363265.6346502, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '9152a4578f74b36838f8187e43c8644b1eba47b5': {'commit_message': 'this is the first merge',
                'commit_time': 1562363265.578071, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d': {'commit_message': 'third commit on master',
                'commit_time': 1562363265.4683158, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'e9ca97e336496b1fceb75869adf0294af5635922': {'commit_message': 'second commit on master with training labels',
                'commit_time': 1562363265.398268, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '489bceb38246f27cae2a0f47eba0e488d95618db': {'commit_message': 'second',
                'commit_time': 1562363264.7388191, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '17286961175c5cbbd4381fef07cc0a20920a5ce6': {'commit_message': 'hi',
                'commit_time': 1562363264.735318, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '63ac654df43bd149a1ca5f919e714bc57e69af99': {'commit_message': 'initial commit on master with training images',
                'commit_time': 1562363264.731286, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '1b49223ae5e731da3750e4836d14565dbe504f18': {'commit_message': 'another on try delete',
                'commit_time': 1562363265.642503, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'aabbcc': {'commit_message': 'made up b',
                'commit_time': 1562363265.640021, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            'c4d6875caeff83a29413ae163dbcfdc3c57ad373': {'commit_message': 'first commit on the large branch',
                'commit_time': 1562363265.374819, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '21f274d31abc09ede4ad6753f079297885b02a09': {'commit_message': 'another commit on test banch after adding to new_set',
                'commit_time': 1562363265.56455, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '5c0ea20c6513f135f0131d9e10d86801ded29537': {'commit_message': 'second commit on test branch with new aset',
                'commit_time': 1562363265.545484, 'commit_user': 'test user', 'commit_email': 'test@email.com'},
            '10e84be056afb2ace6b7ba044ce1e9c9811eae4f': {'commit_message': 'first commit on test branch',
                'commit_time': 1562363265.524131, 'commit_user': 'test user', 'commit_email': 'test@email.com'}},
        'order': [
            'ddeeff',
            '05ad17beab54ede8d7f9214c5c6ae44509c3da97',
            'b9c7da873c06c730f52bad5808df5312c4cc0a38',
            '1b49223ae5e731da3750e4836d14565dbe504f18',
            'aabbcc',
            'a74236e598b96dcde10b176921eb58bb4a9c64bf',
            '9152a4578f74b36838f8187e43c8644b1eba47b5',
            '21f274d31abc09ede4ad6753f079297885b02a09',
            '5c0ea20c6513f135f0131d9e10d86801ded29537',
            '10e84be056afb2ace6b7ba044ce1e9c9811eae4f',
            'ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d',
            'e9ca97e336496b1fceb75869adf0294af5635922',
            'c4d6875caeff83a29413ae163dbcfdc3c57ad373',
            '489bceb38246f27cae2a0f47eba0e488d95618db',
            '17286961175c5cbbd4381fef07cc0a20920a5ce6',
            '63ac654df43bd149a1ca5f919e714bc57e69af99']
        }

    octopus_hash_branch_map = {
        'c4d6875caeff83a29413ae163dbcfdc3c57ad373': ['large_branch'],
        '21f274d31abc09ede4ad6753f079297885b02a09': ['test_branch'],
        'ddeeff': ['master'],
        '1b49223ae5e731da3750e4836d14565dbe504f18': ['trydelete'],
        'aabbcc': ['madeupbranch']
    }

    g = Graph(use_color=False)
    g.show_nodes(
        dag=octopus_log_contents['ancestors'],
        spec=octopus_log_contents['specs'],
        branch=octopus_hash_branch_map,
        start=octopus_log_contents['head'],
        order=octopus_log_contents['order'],
        show_time=True,
        show_user=True)

    real = '*   ddeeff (master) (05Jul2019 21:47:46)(test user): new master\n'\
           '|\\  \n'\
           '| *   05ad17beab54ede8d7f9214c5c6ae44509c3da97 (05Jul2019 21:47:45)(test user): try number two\n'\
           '| |\\  \n'\
           '| | \\     \n'\
           '| |  \\    \n'\
           '| *-. \\   b9c7da873c06c730f52bad5808df5312c4cc0a38 (05Jul2019 21:47:45)(test user): merging the long running branch into master\n'\
           '| |\\ \\ \\  \n'\
           '| | | | *   1b49223ae5e731da3750e4836d14565dbe504f18 (trydelete) (05Jul2019 21:47:45)(test user): another on try delete\n'\
           '| | | | |\\  \n'\
           '| | |_|_|/  \n'\
           '| |/| | |   \n'\
           '* | | | |   aabbcc (madeupbranch) (05Jul2019 21:47:45)(test user): made up b\n'\
           '|\\ \\ \\ \\ \\  \n'\
           '| |_|_|_|/  \n'\
           '|/| | | |   \n'\
           '| | * | |   a74236e598b96dcde10b176921eb58bb4a9c64bf (05Jul2019 21:47:45)(test user): another on master\n'\
           '| | |\\ \\ \\  \n'\
           '| |/ / / /  \n'\
           '|/| | | |   \n'\
           '* | | | |   9152a4578f74b36838f8187e43c8644b1eba47b5 (05Jul2019 21:47:45)(test user): this is the first merge\n'\
           '|\\ \\ \\ \\ \\  \n'\
           '| | |/ / /  \n'\
           '| |/| | |   \n'\
           '| * | | | 21f274d31abc09ede4ad6753f079297885b02a09 (test_branch) (05Jul2019 21:47:45)(test user): another commit on test banch after adding to new_set\n'\
           '| |/ / /  \n'\
           '| * | |   5c0ea20c6513f135f0131d9e10d86801ded29537 (05Jul2019 21:47:45)(test user): second commit on test branch with new aset\n'\
           '| |\\ \\ \\  \n'\
           '| |/ / /  \n'\
           '|/| | |   \n'\
           '| * | |   10e84be056afb2ace6b7ba044ce1e9c9811eae4f (05Jul2019 21:47:45)(test user): first commit on test branch\n'\
           '| |\\ \\ \\  \n'\
           '| | |/ /  \n'\
           '| | | /   \n'\
           '| | |/    \n'\
           '| |/|     \n'\
           '* | |   ef7b6e5bcaaebf62b9e02902ff60eb7862c3472d (05Jul2019 21:47:45)(test user): third commit on master\n'\
           '|\\ \\ \\  \n'\
           '| |/ /  \n'\
           '|/| /   \n'\
           '| |/    \n'\
           '* | e9ca97e336496b1fceb75869adf0294af5635922 (05Jul2019 21:47:45)(test user): second commit on master with training labels\n'\
           '| * c4d6875caeff83a29413ae163dbcfdc3c57ad373 (large_branch) (05Jul2019 21:47:45)(test user): first commit on the large branch\n'\
           '* | 489bceb38246f27cae2a0f47eba0e488d95618db (05Jul2019 21:47:44)(test user): second\n'\
           '* | 17286961175c5cbbd4381fef07cc0a20920a5ce6 (05Jul2019 21:47:44)(test user): hi\n'\
           '|/  \n'\
           '* 63ac654df43bd149a1ca5f919e714bc57e69af99 (05Jul2019 21:47:44)(test user): initial commit on master with training images\n'

    verify_out(capfd, real)


def test_repo_log_return_contents_correct_default_args(repo):

    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co['test_meta']['foo'] = 'bar'
    ancestor_digest = co.commit('first')
    co['test_meta']['hello'] = 'world'
    master_head = co.commit('second')
    co.close()

    ancestor_branch = repo.create_branch('ancestor', base_commit=ancestor_digest)
    dev_branch = repo.create_branch('dev', base_commit=ancestor_digest)

    co = repo.checkout(write=True, branch=dev_branch.name)
    co['test_meta']['zen'] = 'of python'
    dev_head = co.commit('third on test')
    co.close()

    log = repo.log(return_contents=True)

    assert log['head'] == dev_head

    expected_ancestors = {
        dev_head: [ancestor_digest],
        ancestor_digest: [''],
    }
    assert log['ancestors'] == expected_ancestors

    assert len(log['specs']) == 2
    assert len(log['specs'][ancestor_digest]) == 4
    assert len(log['specs'][dev_head]) == 4
    assert log['specs'][ancestor_digest]['commit_message'] == 'first'
    assert log['specs'][dev_head]['commit_message'] == 'third on test'

    assert log['order'] == [dev_head, ancestor_digest]

    assert len(log['branch_heads']) == 2
    assert log['branch_heads'][ancestor_digest] == [ancestor_branch.name]
    assert log['branch_heads'][dev_head] == [dev_branch.name]


def test_repo_log_return_contents_correct_when_specify_branch_name(repo):

    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co['test_meta']['foo'] = 'bar'
    ancestor_digest = co.commit('first')
    co['test_meta']['hello'] = 'world'
    master_head = co.commit('second')
    co.close()

    ancestor_branch = repo.create_branch('ancestor', base_commit=ancestor_digest)
    dev_branch = repo.create_branch('dev', base_commit=ancestor_digest)

    co = repo.checkout(write=True, branch=dev_branch.name)
    co['test_meta']['zen'] = 'of python'
    dev_head = co.commit('third on test')
    co.close()

    log = repo.log(branch='master', return_contents=True)

    assert log['head'] == master_head

    expected_ancestors = {
        master_head: [ancestor_digest],
        ancestor_digest: [''],
    }
    assert log['ancestors'] == expected_ancestors

    assert len(log['specs']) == 2
    assert len(log['specs'][ancestor_digest]) == 4
    assert len(log['specs'][master_head]) == 4
    assert log['specs'][ancestor_digest]['commit_message'] == 'first'
    assert log['specs'][master_head]['commit_message'] == 'second'

    assert log['order'] == [master_head, ancestor_digest]

    assert len(log['branch_heads']) == 2
    assert log['branch_heads'][ancestor_digest] == [ancestor_branch.name]
    assert log['branch_heads'][master_head] == ['master']


def test_repo_log_return_contents_correct_when_specify_digest(repo):

    co = repo.checkout(write=True)
    co.add_str_column('test_meta')
    co['test_meta']['foo'] = 'bar'
    ancestor_digest = co.commit('first')
    co['test_meta']['hello'] = 'world'
    master_head = co.commit('second')
    co.close()

    ancestor_branch = repo.create_branch('ancestor', base_commit=ancestor_digest)
    dev_branch = repo.create_branch('dev', base_commit=ancestor_digest)

    co = repo.checkout(write=True, branch=dev_branch.name)
    co['test_meta']['zen'] = 'of python'
    dev_head = co.commit('third on test')
    co.close()

    log = repo.log(commit=master_head, return_contents=True)

    assert log['head'] == master_head

    expected_ancestors = {
        master_head: [ancestor_digest],
        ancestor_digest: [''],
    }
    assert log['ancestors'] == expected_ancestors

    assert len(log['specs']) == 2
    assert len(log['specs'][ancestor_digest]) == 4
    assert len(log['specs'][master_head]) == 4
    assert log['specs'][ancestor_digest]['commit_message'] == 'first'
    assert log['specs'][master_head]['commit_message'] == 'second'

    assert log['order'] == [master_head, ancestor_digest]

    assert len(log['branch_heads']) == 2
    assert log['branch_heads'][ancestor_digest] == [ancestor_branch.name]
    assert log['branch_heads'][master_head] == ['master']


================================================
FILE: tests/typesystem/test_ndarray_typesysem.py
================================================
import pytest
import numpy as np


from hangar.typesystem import NdarrayFixedShape, NdarrayVariableShape


class TestInvalidValues:

    @pytest.mark.parametrize('shape,expected_exc', [
        [tuple(range(32)), ValueError],
        [(1.2, 2), TypeError],
        [[1, 2], TypeError],
        ['shouldntwork', TypeError],
    ])
    def test_shape_not_tuple_of_int_less_than_32_dims(self, shape, expected_exc):
        with pytest.raises(expected_exc):
            NdarrayFixedShape(shape=shape, dtype=np.uint8, column_layout='flat')
        with pytest.raises(expected_exc):
            NdarrayVariableShape(shape=shape, dtype=np.uint8, column_layout='flat')

    @pytest.mark.parametrize(
        'coltype', ['str', str, 'notvalid', None, 32, 3.5, {'foo': 'bar'}, ascii])
    def test_column_type_must_be_ndarray(self, coltype):
        with pytest.raises(ValueError):
            NdarrayFixedShape(shape=(1,), dtype=np.uint8, column_layout='flat', column_type=coltype)
        with pytest.raises(ValueError):
            NdarrayVariableShape(shape=(1,), dtype=np.uint8, column_layout='flat', column_type=coltype)

    @pytest.mark.parametrize(
        'collayout', ['f', 'n', 'notvalid', None, 32, 3.5, {'foo': 'bar'}, ascii])
    def test_column_layout_must_be_valid_value(self, collayout):
        with pytest.raises(ValueError):
            NdarrayFixedShape(shape=(1,), dtype=np.uint8, column_layout=collayout)
        with pytest.raises(ValueError):
            NdarrayVariableShape(shape=(1,), dtype=np.uint8, column_layout=collayout)

    @pytest.mark.parametrize(
        'backend', ['30', 24, {'10': '10'}, ('00',), ['50', ], ascii, 'None'])
    def test_fixed_shape_backend_code_valid_value(self, backend):
        with pytest.raises(ValueError):
            NdarrayFixedShape(shape=(1,), dtype=np.uint8, column_layout='flat', backend=backend)

    @pytest.mark.parametrize(
        'backend', ['30', '01', 24, {'10': '10'}, ('00',), ['50', ], ascii, 'None'])
    def test_variable_shape_backend_code_valid_value(self, backend):
        with pytest.raises(ValueError):
            NdarrayVariableShape(shape=(1,), dtype=np.uint8, column_layout='flat', backend=backend)

    @pytest.mark.parametrize(
        'opts', ['val', [], (), [('key', 'val')], 10, ({'key': 'val'},), ascii])
    def test_backend_options_must_be_dict_or_nonetype(self, opts):
        with pytest.raises(TypeError):
            NdarrayFixedShape(shape=(1,), dtype=np.uint8, column_layout='flat', backend='00', backend_options=opts)
        with pytest.raises(TypeError):
            NdarrayVariableShape(shape=(1,), dtype=np.uint8, column_layout='flat', backend='00', backend_options=opts)

    def test_backend_must_be_specified_if_backend_options_provided(self):
        with pytest.raises(ValueError):
            NdarrayFixedShape(shape=(1,), dtype=np.uint8, column_layout='flat', backend_options={})
        with pytest.raises(ValueError):
            NdarrayVariableShape(shape=(1,), dtype=np.uint8, column_layout='flat', backend_options={})

    @pytest.mark.parametrize(
        'schema_type', ['fixed_shape', True, 'str', np.uint8, 3, ascii])
    def test_variable_shape_must_have_variable_shape_schema_type(self, schema_type):
        with pytest.raises(ValueError):
            NdarrayVariableShape(shape=(1,), dtype=np.uint8, column_layout='flat', schema_type=schema_type)

    @pytest.mark.parametrize(
        'schema_type', ['variable_shape', True, 'str', np.uint8, 3, ascii])
    def test_fixed_shape_must_have_fixed_shape_schema_type(self, schema_type):
        with pytest.raises(ValueError):
            NdarrayFixedShape(shape=(1,), dtype=np.uint8, column_layout='flat', schema_type=schema_type)



================================================
FILE: tests/typesystem/test_pybytes_typesystem.py
================================================
import pytest
import numpy as np

from hangar.typesystem import BytesVariableShape


class TestInvalidValues:

    @pytest.mark.parametrize('coltype', ['ndarray', np.ndarray, 32, {'foo': 'bar'}, ascii])
    def test_column_type_must_be_str(self, coltype):
        with pytest.raises(ValueError):
            BytesVariableShape(dtype=bytes, column_layout='flat', column_type=coltype)

    @pytest.mark.parametrize('collayout', ['f', 'n', None, 32, {'foo': 'bar'}, ascii])
    def test_column_layout_must_be_valid_value(self, collayout):
        with pytest.raises(ValueError):
            BytesVariableShape(dtype=bytes, column_layout=collayout)

    @pytest.mark.parametrize('backend', ['00', 24, {'31': '31'}, ('31',), ['50', ], ascii, 'None'])
    def test_variable_shape_backend_code_valid_value(self, backend):
        with pytest.raises(ValueError):
            BytesVariableShape(dtype=bytes, column_layout='flat', backend=backend)

    @pytest.mark.parametrize('opts', ['val', [], (), [('k', 'v')], 10, ({'k': 'v'},), ascii])
    def test_backend_options_must_be_dict_or_nonetype(self, opts):
        with pytest.raises(TypeError):
            BytesVariableShape(dtype=bytes, column_layout='flat', backend='31', backend_options=opts)

    def test_backend_must_be_specified_if_backend_options_provided(self):
        with pytest.raises(ValueError):
            BytesVariableShape(dtype=bytes, column_layout='flat', backend_options={})

    @pytest.mark.parametrize('schema_type', ['fixed_shape', True, 'str', np.uint8, 3, ascii])
    def test_variable_shape_must_have_variable_shape_schema_type(self, schema_type):
        with pytest.raises(ValueError):
            BytesVariableShape(dtype=bytes, column_layout='flat', schema_type=schema_type)


# ----------------------- Fixtures for Valid Schema ---------------------------


@pytest.fixture(params=['nested', 'flat'], scope='class')
def column_layout(request):
    return request.param


@pytest.fixture(params=['31'], scope='class')
def backend(request):
    return request.param


@pytest.fixture(params=[{}], scope='class')
def backend_options(request):
    return request.param


@pytest.fixture(scope='class')
def valid_schema(column_layout, backend, backend_options):
    schema = BytesVariableShape(
        dtype=bytes, column_layout=column_layout, backend=backend, backend_options=backend_options)
    return schema


class TestValidSchema:

    @pytest.mark.parametrize('data', [
        b'hello', b'world how are you?', b'\n what\'s up',
        b'loob!', b'lol',
        (b"\x80\x04\x95'\x00\x00\x00\x00\x00\x00\x00\x8c\x08__main__"
         b"\x94\x8c\x07testobj\x94\x93\x94)\x81\x94}\x94\x8c\x04name\x94Nsb.")
    ])
    def test_valid_data(self, valid_schema, data):
        res = valid_schema.verify_data_compatible(data)
        assert res.compatible is True
        assert res.reason == ''

    def test_data_over_2MB_size_not_allowed(self, valid_schema):
        data = ''.join(['a' for _ in range(2_000_001)]).encode()
        res = valid_schema.verify_data_compatible(data)
        assert res.compatible is False





================================================
FILE: tests/typesystem/test_pystr_typesystem.py
================================================
import pytest
import numpy as np
from random import randint, choices


from hangar.typesystem import StringVariableShape


class TestInvalidValues:

    @pytest.mark.parametrize('coltype', ['ndarray', np.ndarray, 32, {'foo': 'bar'}, ascii])
    def test_column_type_must_be_str(self, coltype):
        with pytest.raises(ValueError):
            StringVariableShape(dtype=str, column_layout='flat', column_type=coltype)

    @pytest.mark.parametrize('collayout', ['f', 'n', None, 32, {'foo': 'bar'}, ascii])
    def test_column_layout_must_be_valid_value(self, collayout):
        with pytest.raises(ValueError):
            StringVariableShape(dtype=str, column_layout=collayout)

    @pytest.mark.parametrize('backend', ['00', 24, {'30': '30'}, ('30',), ['50',], ascii, 'None'])
    def test_variable_shape_backend_code_valid_value(self, backend):
        with pytest.raises(ValueError):
            StringVariableShape(dtype=str, column_layout='flat', backend=backend)

    @pytest.mark.parametrize('opts', ['val', [], (), [('k', 'v')], 10, ({'k': 'v'},), ascii])
    def test_backend_options_must_be_dict_or_nonetype(self, opts):
        with pytest.raises(TypeError):
            StringVariableShape(dtype=str, column_layout='flat', backend='30', backend_options=opts)

    def test_backend_must_be_specified_if_backend_options_provided(self):
        with pytest.raises(ValueError):
            StringVariableShape(dtype=str, column_layout='flat', backend_options={})

    @pytest.mark.parametrize('schema_type', ['fixed_shape', True, 'str', np.uint8, 3, ascii])
    def test_variable_shape_must_have_variable_shape_schema_type(self, schema_type):
        with pytest.raises(ValueError):
            StringVariableShape(dtype=str, column_layout='flat', schema_type=schema_type)


# ----------------------- Fixtures for Valid Schema ---------------------------


@pytest.fixture(params=['nested', 'flat'], scope='class')
def column_layout(request):
    return request.param


@pytest.fixture(params=['30'], scope='class')
def backend(request):
    return request.param


@pytest.fixture(params=[{}], scope='class')
def backend_options(request):
    return request.param


@pytest.fixture(scope='class')
def valid_schema(column_layout, backend, backend_options):
    schema = StringVariableShape(
        dtype=str, column_layout=column_layout, backend=backend, backend_options=backend_options)
    return schema


class TestValidSchema:

    @pytest.mark.parametrize('data', [
        'hello', 'world how are you?', '\n what\'s up',
        'loob!', 'a\xac\u1234\u20ac\U00008000', 'lol'
    ])
    def test_valid_data(self, valid_schema, data):
        res = valid_schema.verify_data_compatible(data)
        assert res.compatible is True
        assert res.reason == ''

    @pytest.mark.parametrize('data', [chr(24523), chr(253), chr(6222)])
    def test_large_unicode_codepoints_strings_compatible(self, valid_schema, data):
        res = valid_schema.verify_data_compatible(data)
        assert res.compatible is True
        assert res.reason == ''

    def test_strings_over_2MB_size_not_allowed(self, valid_schema):
        data = ''.join(['a' for _ in range(2_000_001)])
        res = valid_schema.verify_data_compatible(data)
        assert res.compatible is False





================================================
FILE: tox.ini
================================================
[tox]
envlist =
    clean,
    docs,
    py{36,37,38}-cov{yes,no}-ml{yes,no},
    report,
    mypy

# -------------- dependency setup ---------------

[gh-actions]
python =
    3.6: py36
    3.7: py37
    3.8: py38

[gh-actions:env]
TESTCOVER =
    yes: covyes
    no: covno
TESTML =
    yes: mlyes
    no: mlno

[base]
deps =
    Cython
    py{36,37,38}: pytest
        pytest-xdist
    py{36,37,38}-mlno: hypothesis[numpy]
        pretend
    py{36,37,38}-covyes: pytest-cov
    py{36,37,38}-cov{yes,no}-mlyes,docs: tensorflow-cpu == 2.2.0
    py{36,37,38}-cov{yes,no}-mlyes,docs: torch == 1.4.0+cpu ; sys_platform != 'darwin'
    py{36,37,38}-cov{yes,no}-mlyes,docs: torch == 1.4.0 ; sys_platform == 'darwin'


[testenv]
deps =
    {[base]deps}
usedevelop =
    covyes,docs: true
    covno: false
ignore_basepython_conflict = true
setenv =
    PYTHONPATH={toxinidir}/tests
passenv =
    *
install_command =
    pip install {packages} -f https://download.pytorch.org/whl/torch_stable.html
commands =
    py{36,37,38}-covno-mlno: pytest --ignore {env:PYTHONPATH}/ml_datasets -n={env:PYTEST_XDIST_PROC_NR:4} {posargs}
    py{36,37,38}-covyes-mlno: pytest --ignore {env:PYTHONPATH}/ml_datasets --cov --cov-append --cov-report term -n={env:PYTEST_XDIST_PROC_NR:4} {posargs}
    py{36,37,38}-covno-mlyes: pytest -n={env:PYTEST_XDIST_PROC_NR:4} {posargs} {env:PYTHONPATH}/ml_datasets
    py{36,37,38}-covyes-mlyes: pytest --cov --cov-append --cov-report term -n={env:PYTEST_XDIST_PROC_NR:4} {posargs} {env:PYTHONPATH}/ml_datasets

# ---------------- checkers ------------------------

[testenv:spell]
setenv =
    SPELLCHECK=1
commands =
    sphinx-build -b spelling docs dist/docs
skip_install = true
deps =
    -r{toxinidir}/docs/requirements.txt
    sphinxcontrib-spelling
    pyenchant

[testenv:docs]
usedevelop = true
deps =
    {[base]deps}
    -r{toxinidir}/docs/requirements.txt
commands =
    sphinx-build {posargs:-E} -b html docs dist/docs
    sphinx-build -b linkcheck docs dist/docs -j {env:GH_ACTIONS_PROC_NR:8}
install_command =
    pip install {packages} -f https://download.pytorch.org/whl/torch_stable.html

[testenv:report]
deps =
    coverage
skip_install = true
commands =
    coverage report
    coverage html

[testenv:clean]
skip_install = true
deps =
    coverage
commands =
    coverage erase

# ------------------- mypy ----------------------

[testenv:mypy]
basepython = {env:TOXPYTHON:python3.8}
skip_install = False
commands =
    {posargs:mypy --config-file mypy.ini src/hangar}
deps =
    {[base]deps}
    mypy >= 0.701
    mypy-protobuf
    grpcio_tools
install_command =
    pip install {packages} -f https://download.pytorch.org/whl/torch_stable.html