Repository: liormizr/s3path Branch: master Commit: 66546e09f37e Files: 24 Total size: 199.0 KB Directory structure: gitextract_j1w6eiqo/ ├── .github/ │ └── workflows/ │ ├── deploying.yml │ └── testing.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Makefile ├── Pipfile ├── README.rst ├── docs/ │ ├── advance.rst │ ├── comparison.rst │ └── interface.rst ├── s3path/ │ ├── __init__.py │ ├── accessor.py │ ├── current_version.py │ ├── old_versions.py │ └── py.typed ├── setup.cfg ├── setup.py └── tests/ ├── __init__.py ├── conftest.py ├── test_not_supported.py ├── test_path_operations.py ├── test_pure_path_operations.py └── test_s3path_configuration.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/deploying.yml ================================================ name: S3Path Deplyer on: # workflow_dispatch release: types: [published] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.9' - name: Install dependencies run: | python -m pip install --upgrade pip pip install build - name: Build package run: python -m build - name: Publish package uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} ================================================ FILE: .github/workflows/testing.yml ================================================ name: S3Path Tester on: push: branches: [ master ] pull_request: branches: [ master ] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: [3.9, "3.10", 3.11, 3.12, 3.13] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Display Python version run: python -c "import sys; print(sys.version)" - name: Install dependencies env: PIPENV_DEFAULT_PYTHON_VERSION: ${{ matrix.python-version }} run: make init - name: Run Tests run: make tests ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Sphinx documentation docs/_build/ # PyBuilder target/ # pyenv .python-version # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mypy .mypy_cache/ # PyCharm .idea/ # Pipfile Pipfile.lock ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2019 Lior Mizrahi Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ include s3path.py include setup.py include README.rst include LICENSE ================================================ FILE: Makefile ================================================ .PHONY: docs tests init: python -m pip install --upgrade pip python -m pip install --upgrade pipenv pipenv install --skip-lock pipenv run pip freeze developer: pipenv install --dev --skip-lock tests: pipenv run pytest publish: pipenv run python setup.py sdist bdist_wheel pipenv run twine upload dist/* rm -fr build dist .egg s3path.egg-info check: tests ================================================ FILE: Pipfile ================================================ [[source]] url = "https://pypi.org/simple" verify_ssl = true name = "pypi" python_version = "3.13" [packages] moto = "*" pytest = "*" sphinx = "*" twine = "*" pytest-cov = "*" smart-open = "*" packaging = "*" mypy = "*" [dev-packages] ipython = "*" ipdb = "*" s3path = {editable = true, path = "."} ================================================ FILE: README.rst ================================================ S3Path ====== .. image:: https://badgen.net/pypi/v/s3path :target: https://pypi.org/project/s3path/ :alt: Latest version .. image:: https://github.com/liormizr/s3path/actions/workflows/testing.yml/badge.svg?branch=master&event=push :target: https://github.com/liormizr/s3path/actions/workflows/testing.yml :alt: S3Path CI S3Path provide a Python convenient File-System/Path like interface for AWS S3 Service using boto3 S3 resource as a driver. Like pathlib, but for S3 Buckets ________________________________ AWS S3 is among the most popular cloud storage solutions. It's object storage, is built to store and retrieve various amounts of data from anywhere. Currently, Python developers use Boto3 as the default API to connect / put / get / list / delete files from S3. S3Path blends Boto3's ease of use and the familiarity of pathlib api. Install: ======== From PyPI: .. code:: bash $ pip install s3path From Conda: .. code:: bash $ conda install -c conda-forge s3path Basic use: ========== The following example assumes an s3 bucket setup as specified bellow: .. code:: bash $ aws s3 ls s3://pypi-proxy/ 2018-04-24 22:59:59 186 requests/index.html 2018-04-24 22:59:57 485015 requests/requests-2.9.1.tar.gz 2018-04-24 22:35:01 89112 boto3/boto3-1.4.1.tar.gz 2018-04-24 22:35:02 180 boto3/index.html 2018-04-24 22:35:19 3308919 botocore/botocore-1.4.93.tar.gz 2018-04-24 22:35:36 188 botocore/index.html Importing the main class: .. code:: python >>> from s3path import S3Path Listing "subdirectories" - s3 keys can be split like file-system with a `/` in s3path we: .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> [path for path in bucket_path.iterdir() if path.is_dir()] [S3Path('/pypi-proxy/requests/'), S3Path('/pypi-proxy/boto3/'), S3Path('/pypi-proxy/botocore/')] Listing html source files in this "directory" tree: .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> list(bucket_path.glob('**/*.html')) [S3Path('/pypi-proxy/requests/index.html'), S3Path('/pypi-proxy/boto3/index.html'), S3Path('/pypi-proxy/botocore/index.html')] Navigating inside a "directory" tree: .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> boto3_package_path = bucket_path / 'boto3' / 'boto3-1.4.1.tar.gz' >>> boto3_package_path S3Path('/pypi-proxy/boto3/boto3-1.4.1.tar.gz') Querying path properties: .. code:: python >>> boto3_package_path = S3Path('/pypi-proxy/boto3/boto3-1.4.1.tar.gz') >>> boto3_package_path.exists() True >>> boto3_package_path.is_dir() False >>> boto3_package_path.is_file() True Opening a "file" (s3 key): .. code:: python >>> botocore_index_path = S3Path('/pypi-proxy/botocore/index.html') >>> with botocore_index_path.open() as f: >>> print(f.read()) """ Package Index botocore-1.4.93.tar.gz
""" Or Simply reading: .. code:: python >>> botocore_index_path = S3Path('/pypi-proxy/botocore/index.html') >>> botocore_index_path.read_text() """ Package Index botocore-1.4.93.tar.gz
""" Versioned S3 Objects: ===================== s3path supports versioned objects for S3 buckets that have versioning enabled. ``VersionedS3Path`` is a subclass of ``S3Path`` that supports all of its features. The main difference is an additional required ``version_id`` keyword parameter in each of its constructor methods. .. code:: python >>> from s3path import VersionedS3Path >>> bucket, key, version_id = 'my-bucket', 'my-key', 'my-version-id' >>> VersionedS3Path(f'/{bucket}/{key}', version_id=version_id) VersionedS3Path('/my-bucket/my-key', version_id='my-version-id') >>> VersionedS3Path.from_uri(f's3://{bucket}/{key}', version_id=version_id) VersionedS3Path('/my-bucket/my-key', version_id='my-version-id') >>> VersionedS3Path.from_bucket_key(bucket=bucket, key=key, version_id=version_id) VersionedS3Path('/my-bucket/my-key', version_id='my-version-id') New in version 0.5.0 Requirements: ============= * Python >= 3.4 * boto3 * smart-open Further Documentation: ====================== * `Advanced S3Path configuration`_ (S3 parameters, S3-compatible storage, etc.) * `Abstract pathlib interface`_ implemented by S3Path * `Boto3 vs S3Path usage examples`_ .. _Abstract pathlib interface: https://github.com/liormizr/s3path/blob/master/docs/interface.rst .. _Boto3 vs S3Path usage examples: https://github.com/liormizr/s3path/blob/master/docs/comparison.rst .. _Advanced S3Path configuration: https://github.com/liormizr/s3path/blob/master/docs/advance.rst ================================================ FILE: docs/advance.rst ================================================ Advance features (configurations/s3 parameters): ================================================ Basically s3path is trying to be as pure as possible from any non `pathlib`_ features. The goal is to take the AWS S3 service and integrate it into `pathlib`_'s interface without changes. Only then s3path provides a Python-convenient File-System/Path like interface for AWS's S3 service using `boto3`_ S3 resource as a driver. Configurations: --------------- s3path uses `boto3`_ as the SDK for AWS S3 service. To use `boto3`_ you first need to configure it. For the full documentation see `configuration`_. `boto3`_ has multiple ways to input configurations, s3path only supportes the following: 1. Environment variables #. Shared credential file (~/.aws/credentials) #. AWS config file (~/.aws/config) #. Assume Role provider #. Instance metadata service on an Amazon EC2 instance that has an IAM role configured. With s3path, you can't specify configurations. The only way to specify configurations in code, is with `setup_default_session`_. For Example: .. code:: python >>> import boto3 >>> from s3path import S3Path >>> boto3.setup_default_session( ... region_name='us-east-1', ... aws_access_key_id='', ... aws_secret_access_key='') >>> >>> bucket_path = S3Path('/pypi-proxy/') >>> [path for path in bucket_path.iterdir() if path.is_dir()] ... [S3Path('/pypi-proxy/requests/'), ... S3Path('/pypi-proxy/boto3/'), ... S3Path('/pypi-proxy/botocore/')] Parameters: ----------- We can map any kind of parameters that `boto3`_ `s3-resource`_ methods supports per path. For Example: If you want to add Server-side encryption to your Bucket, you may do it per path like this: .. code:: python >>> from s3path import S3Path, register_configuration_parameter >>> bucket = S3Path('/my-bucket/') >>> register_configuration_parameter(bucket, parameters={'ServerSideEncryption': 'AES256'}) This will work for every s3path. S3Path('/') - parameters that will be used as default S3Path('/bucket/') - parameters that will be used per bucket S3Path('/bucket/key-prefix-directory/') - parameters that will be used per bucket, key prefix **NOTE:** We recommend configuring everything only in one place and not in the code. S3 Compatible Storage: ---------------------- There are some cases that we want to use s3path for S3-Compatible Storage. Some examples for S3-Compatible Storage can be: * `LocalStack`_ - A fully functional local AWS cloud stack * `MinIO`_ - MinIO is a High Performance Object Storage released under Apache License v2.0 `boto3`_ can be used as a SDK for such scenarios. Therefor you can use s3path for them as well. And even specify per "Bucket" what is the source. This example show how to specify default AWS S3 parameters, a `LocalStack`_ Bucket, and a `MinIO`_ Bucket: .. code:: python >>> import boto3 >>> from botocore.client import Config >>> from s3path import PureS3Path, register_configuration_parameter >>> # Define path's for configuration >>> default_aws_s3_path = PureS3Path('/') >>> local_stack_bucket_path = PureS3Path('/LocalStackBucket/') >>> minio_bucket_path = PureS3Path('/MinIOBucket/') >>> # Define boto3 s3 resources >>> local_stack_resource = boto3.resource('s3', endpoint_url='http://localhost:4566') >>> minio_resource = boto3.resource( 's3', endpoint_url='http://localhost:9000', aws_access_key_id='minio', aws_secret_access_key='minio123', config=Config(signature_version='s3v4'), region_name='us-east-1') >>> # Configure and map root path's per boto3 parameters or resources >>> register_configuration_parameter(default_aws_s3_path, parameters={'ServerSideEncryption': 'AES256'}) >>> register_configuration_parameter(local_stack_bucket_path, resource=local_stack_resource) >>> register_configuration_parameter(minio_bucket_path, resource=minio_resource) s3path library general options: ------------------------------- In Version 0.4.0 we added a new algorithm for the r/glob methods. To enable the old (pathlib common) Algorithm you can configure it like this: .. code:: python >>> from s3path import PureS3Path, register_configuration_parameter >>> # Define path's for configuration >>> path = PureS3Path('/') >>> register_configuration_parameter(path, glob_new_algorithm=False) **Note: from version 0.6.0 glob implementation will work only with the new algorithm, there for the glob_new_algorithm arg is in depreciation cycle** .. _pathlib : https://docs.python.org/3/library/pathlib.html .. _boto3 : https://github.com/boto/boto3 .. _configuration: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html .. _profiles: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#shared-credentials-file .. _setup_default_session: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/boto3.html?highlight=setup_default_session#boto3.setup_default_session .. _s3-resource: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#service-resource .. _LocalStack: https://github.com/localstack/localstack .. _MinIO: https://docs.min.io/ ================================================ FILE: docs/comparison.rst ================================================ S3Path VS Boto3 S3 SDK ====================== Most of the boto3 examples are taken from here: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-examples.html Buckets List: ------------- S3Path Example: .. code:: python >>> from s3path import S3Path >>> for bucket in S3Path('/').iterdir(): ... print(bucket) boto3 Example: .. code:: python >>> import boto3 >>> # Create an S3 client >>> s3 = boto3.client('s3') >>> # Call S3 to list current buckets >>> response = s3.list_buckets() >>> # Get a list of all bucket names from the response >>> buckets = [bucket['Name'] for bucket in response['Buckets']] >>> # Print out the bucket list >>> for bucket in buckets: ... print(bucket) Create an Amazon S3 Bucket -------------------------- S3Path Example: .. code:: python >>> from s3path import S3Path >>> S3Path('/my-bucket/').mkdir() boto3 Example: .. code:: python >>> import boto3 >>> s3 = boto3.resource('s3') >>> s3.create_bucket(Bucket='my-bucket') Upload a File to an Amazon S3 Bucket ------------------------------------ S3Path Example: .. code:: python >>> from pathlib import Path >>> from s3path import S3Path >>> local_path = Path('/tmp/hello.txt') >>> S3Path('/my-bucket/hello.txt').write_text(local_path.read_text()) S3Path Example (buffered, to avoid loading large files into memory): .. code:: python >>> import shutil >>> from pathlib import Path >>> from s3path import S3Path >>> local_path = Path('/tmp/hello.txt') >>> remote_path = S3Path('/my-bucket/hello.txt') >>> with local_path.open('rb') as src, remote_path.open('wb') as dst: >>> shutil.copyfileobj(src, dst) boto3 Example: .. code:: python >>> import boto3 >>> s3 = boto3.resource('s3') >>> bucket = s3.Bucket('my-bucket') >>> bucket.upload_file(Fileobj='/tmp/hello.txt', Key='hello.txt') Downloading a File ------------------ S3Path Example: .. code:: python >>> from pathlib import Path >>> from s3path import S3Path >>> local_path = Path('./my_local_image.jpg') >>> local_path.write_text(S3Path('/my-bucket/my_image_in_s3.jpg').read_text()) boto3 Example: .. code:: python >>> import boto3 >>> import botocore >>> s3 = boto3.resource('s3') >>> >>> try: >>> bucket = s3.Bucket('my-bucket') >>> bucket.download_file(Key='my_image_in_s3.jpg', Filename='my_local_image.jpg') >>> except botocore.exceptions.ClientError as e: >>> if e.response['Error']['Code'] == "404": >>> print("The object does not exist.") >>> else: >>> raise Retrieving subfolders names in S3 bucket ---------------------------------------- S3Path Example: .. code:: python >>> from s3path import S3Path >>> for path in S3Path('/my-bucket/prefix-name-with-slash/').iterdir(): >>> if path.is_dir(): >>> print('sub folder : ', path) boto3 Example: .. code:: python >>> import boto3 >>> s3_client = boto3.client('s3') >>> result = client.list_objects(Bucket='my-bucket', Prefix='prefix-name-with-slash/', Delimiter='/') >>> for o in result.get('CommonPrefixes'): >>> print('sub folder : ', o.get('Prefix')) ================================================ FILE: docs/interface.rst ================================================ .. image:: s3path_graph.svg Concrete paths: =============== Full basic Path documentation linked here: `PathDocs`_. .. _S3Path: S3Path(\*pathsegments) ^^^^^^^^^^^^^^^^^^^^^^ A subclass of `Path`_ and PureS3Path_, this class represents a concrete paths of AWS S3 Service. All actions are use `boto3`_ as the SKD for AWS S3 Service: .. code:: python >>> S3Path('//') S3Path('//') pathsegments are specified similarly to `Path`_. You can't use S3Path if you doesn't have boto3 installed in your environment: .. code:: python >>> import boto3 Traceback (most recent call last): File "", line 1, in ModuleNotFoundError: No module named 'boto3' >>> from s3path import S3Path >>> S3Path('//') Traceback (most recent call last): File "", line 1, in File "pathlib.py", line 798, in __new__ % (cls.__name__,)) NotImplementedError: cannot instantiate 'S3Path' on your system .. _VersionedS3Path: VersionedS3Path(\*pathsegments, version_id) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A subclass of `S3Path`_ and `PureVersionedS3Path`_, this class represents a concrete path of the AWS S3 Service for buckets in which `S3 versioning`_ is enabled. All actions use `boto3`_ as the SKD for AWS S3 Service: .. code:: python >>> from s3path import VersionedS3Path >>> VersionedS3Path('//', version_id='') VersionedS3Path('//', version_id='') | pathsegments are specified similarly to `Path`_ | version_id is a string that can be any valid `AWS S3 version identifier`_ | | New in version 0.5.0 Methods: ======== S3Path and VersionedS3Path provide the following methods in addition to pure paths methods. All the methods below will raise a `ValueError`_ if the path isn't absolute. Many of these methods can raise a `botocore.exceptions.ClientError` if `boto3`_ call fails (for example because the path doesn't exist). **NOTE:** The following signatures are shown for `S3Path`_ but are equally valid for `VersionedS3Path`_ as well. Any behavioral differences between `S3Path`_ methods and their `VersionedS3Path`_ equivalents are explicitly detailed below (i.e. if a given `VersionedS3Path`_ method signature is not listed below, it is assumed that it behaves identically to its `S3Path`_ equivalent). .. _S3Path.stat: S3Path.stat(*, follow_symlinks=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Returns information about this path (similarly to boto3's `ObjectSummary`_). For compatibility with `pathlib`_, the returned object some similar attributes like `os.stat_result`_. The result is looked up at each call to this method: .. code:: python >>> path_stat = S3Path('/pypi-proxy/boto3/index.html').stat() >>> path_stat StatResult(size=188, last_modified=datetime.datetime(2018, 4, 4, 12, 26, 3, tzinfo=tzutc()), version_id=None) >>> path_stat.st_size 188 >>> path_stat.st_mtime 1522833963.0 >>> print(path_stat.st_version_id) None >>> path_stat.st_atime Traceback (most recent call last): ... io.UnsupportedOperation: StatResult do not support st_atime attribute **NOTES:** * ``follow_symlinks`` option must be always set to ``True``. * The returned object will contain an additional ``st_version_id`` attribute that is not part of the `os.stat_result`_ API. The value of ``st_version_id`` will be ``None``. VersionedS3Path.stat(*, follow_symlinks=True) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Behaves the same as `S3Path.stat`_ with the exception that the ``st_version_id`` attribute of the returned object will contain the version ID of the underlying S3 object. .. _S3Path.exists: S3Path.exists() ^^^^^^^^^^^^^^^ Whether the path points to an existing Bucket, key or key prefix: .. code:: python >>> S3Path('/pypi-proxy/boto3/index.html').exists() True >>> S3Path('/pypi-proxy/boto3/').exists() True >>> S3Path('/fake-bucket/').exists() False VersionedS3Path.exists() ^^^^^^^^^^^^^^^^^^^^^^^^ Behaves the same as `S3Path.exists`_ except that the version ID must match in addition to the bucket and key. .. _S3Path.glob: S3Path.glob(pattern) ^^^^^^^^^^^^^^^^^^^^ Glob the given relative pattern in the Bucket / key prefix represented by this path, yielding all matching files (of any kind): .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> [path for path in bucket_path.glob('boto*')] [S3Path('/pypi-proxy/boto3/'), S3Path('/pypi-proxy/botocore/')] >>> [path for path in bucket_path.glob('*/*.html')] [S3Path('/pypi-proxy/requests/index.html'), S3Path('/pypi-proxy/boto3/index.html'), S3Path('/pypi-proxy/botocore/index.html')]] The "**" pattern means "this Bucket / key prefix and all sub key prefixes, recursively". In other words, it enables recursive globbing: .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> list(bucket_path.glob('**/*.html')) [S3Path('/pypi-proxy/requests/index.html'), S3Path('/pypi-proxy/index.html'), S3Path('/pypi-proxy/boto3/index.html'), S3Path('/pypi-proxy/botocore/index.html')] In version 0.4.0: New Algorithm that better suited to s3 API. Especially for recursive searches. To enable the old (pathlib common) Algorithm you can configure it like this: .. code:: python register_configuration_parameter(path, glob_new_algorithm=False) New version 0.6.0: glob implementation will work only with the new algorithm, there for the glob_new_algorithm arg is in depreciation cycle For more configuration details please see this `Advanced S3Path configuration`_ **NOTE:** Using the "**" pattern in large Buckets may consume an inordinate amount of time in the old algorithm. S3Path.is_dir() ^^^^^^^^^^^^^^^ Returns ``True`` if the path points to a Bucket or a key prefix, ``False`` if it points to a full key path. ``False`` is also returned if the path doesn’t exist. Other errors (such as permission errors) are propagated. S3Path.is_file() ^^^^^^^^^^^^^^^^ Returns ``True`` if the path points to a Bucket key, ``False`` if it points to Bucket or a key prefix. ``False`` is also returned if the path doesn’t exist. Other errors (such as permission errors) are propagated. S3Path.is_mount() ^^^^^^^^^^^^^^^^^ AWS S3 Service doesn't have mounting feature, There for this method will always return ``False`` S3Path.is_symlink() ^^^^^^^^^^^^^^^^^^^ AWS S3 Service doesn't have symlink feature, There for this method will always return ``False`` S3Path.is_socket() ^^^^^^^^^^^^^^^^^^ AWS S3 Service doesn't have sockets feature, There for this method will always return ``False`` S3Path.is_fifo() ^^^^^^^^^^^^^^^^ AWS S3 Service doesn't have fifo feature, There for this method will always return ``False`` S3Path.iterdir() ^^^^^^^^^^^^^^ When the path points to a Bucket or a key prefix, yield path objects of the directory contents: .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> [path for path in bucket_path.iterdir() if path.is_dir()] [S3Path('/pypi-proxy/requests/'), S3Path('/pypi-proxy/boto3/'), S3Path('/pypi-proxy/botocore/')] >>> boto3_path = bucket_path.joinpath('boto3') >>> [path for path in bucket_path.boto3_path()] [S3Path('/pypi-proxy/boto3/boto3-1.4.1.tar.gz'), S3Path('/pypi-proxy/boto3/index.html')] .. _S3Path.open: S3Path.open(mode='r', buffering=-1, encoding=None, errors=None, newline=None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Opens the Bucket key pointed to by the path. This delegates to the smart_open library that handles the file streaming. returns a file like object that you can read or write with: .. code:: python >>> with S3Path('/pypi-proxy/botocore/index.html').open() as f: >>> print(f.read()) ' Package Index botocore-1.4.93.tar.gz
' VersionedS3Path.open(mode='r', buffering=-1, encoding=None, errors=None, newline=None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Behaves the same as `S3Path.open`_ except that ``VersionedS3Path.version_id`` will be used to open the specified version of the object pointed to by the `VersionedS3Path`_ object. S3Path.owner() ^^^^^^^^^^^^^^ Returns the name of the user owning the Bucket or key. Similarly to boto3's `ObjectSummary`_ owner attribute S3Path.read_bytes() ^^^^^^^^^^^^^^^^^^^ Return the binary contents of the Bucket key as a bytes object: .. code:: python >>> S3Path('/test_bucket/test.txt').write_bytes(b'Binary file contents') >>> S3Path('/test_bucket/test.txt').read_bytes() b'Binary file contents' S3Path.read_text(encoding=None, errors=None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Returns the decoded contents of the Bucket key as a string: .. code:: python >>> S3Path('/test_bucket/test.txt').write_text('Text file contents') >>> S3Path('/test_bucket/test.txt').read_text() 'Text file contents' S3Path.rename(target) ^^^^^^^^^^^^^^^^^^^^^ Renames this file or Bucket / key prefix / key to the given target. If target exists and is a file, it will be replaced silently if the user has permission. If path is a key prefix, it will replace all the keys with the same prefix to the new target prefix. target can be either a string or another S3Path_ object: .. code:: python >>> path = S3Path('/test_bucket/test.txt').write_text('Text file contents') >>> target = S3Path('/test_bucket/new_test.txt') >>> path.rename(target) >>> target.read_text() 'Text file contents' S3Path.replace(target) ^^^^^^^^^^^^^^^^^^^^^^ Renames this Bucket / key prefix / key to the given target. If target points to an existing Bucket / key prefix / key, it will be unconditionally replaced. S3Path.rglob(pattern) ^^^^^^^^^^^^^^^^^^^^^ This is like calling S3Path.glob_ with ``"**/"`` added in front of the given relative pattern: .. code:: python >>> bucket_path = S3Path('/pypi-proxy/') >>> list(bucket_path.rglob('*.html')) [S3Path('/pypi-proxy/requests/index.html'), S3Path('/pypi-proxy/index.html'), S3Path('/pypi-proxy/botocore/index.html')] Version 0.4.0: New Algorithm that better suited to s3 API. Especially for recursive searches. New version 0.6.0: glob implementation will work only with the new algorithm, there for the glob_new_algorithm arg is in depreciation cycle S3Path.rmdir() ^^^^^^^^^^^^^^ Removes this Bucket / key prefix. The Bucket / key prefix must be empty. S3Path.unlink(missing_ok=False) ^^^^^^^^^^^^^^^ Removes this key from S3. Note that this will not remove directories or buckets, but will instead raise an `IsADirectoryError`_. If the key does is not present in the given bucket, or if the bucket is not present, raises a `FileNotFoundError`_. If `missing_ok` is `True` then no exception will be raised. S3Path.samefile(other_path) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Returns whether this path points to the same Bucket key as other_path, which can be either a Path object, or a string: .. code:: python >>> path = S3Path('/test_bucket/test.txt') >>> path.samefile(S3Path('/test_bucket/test.txt')) True >>> path.samefile('/test_bucket/fake') False S3Path.touch(exist_ok=True, \**kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Creates a key at this given path. If the key already exists, the function succeeds if exist_ok is true (and its modification time is updated to the current time), otherwise `FileExistsError`_ is raised. S3Path.write_bytes(data) ^^^^^^^^^^^^^^^^^^^^^^^^ Opens the key pointed to in bytes mode, write data to it, and close / save the key: .. code:: python >>> S3Path('/test_bucket/test.txt').write_bytes(b'Binary file contents') >>> S3Path('/test_bucket/test.txt').read_bytes() b'Binary file contents' S3Path.write_text(data, encoding=None, errors=None, newline=None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Opens the key pointed to in text mode, writes data to it, and close / save the key: .. code:: python >>> S3Path('/test_bucket/test.txt').write_text('Text file contents') >>> S3Path('/test_bucket/test.txt').read_text() 'Text file contents' **NOTE:** ``newline`` option is only available on Python 3.10 and greater. S3Path.mkdir(mode=0o777, parents=False, exist_ok=False) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Create a path bucket. AWS S3 Service doesn't support folders, therefore the mkdir method will only create the current bucket. If the bucket path already exists, FileExistsError is raised. If exist_ok is false (the default), FileExistsError is raised if the target Bucket already exists. If exist_ok is true, OSError exceptions will be ignored. if parents is false (the default), mkdir will create the bucket only if this is a Bucket path. if parents is true, mkdir will create the bucket even if the path have a Key path. mode argument is ignored. S3Path.get_presigned_url(expire_in: timedelta | int = 3600) -> str ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Returns a pre-signed url. Anyone with the url can make a GET request to get the file. You can set an expiration date with the expire_in argument (integer or timedelta object). Note that generating a presigned url may require more information or setup than to use other S3Path functions. It's because it needs to know the exact aws region and use s3v4 as signature version. Meaning you may have to do this: .. code:: python >>> import boto3 >>> from botocore.config import Config >>> from s3path import S3Path, register_configuration_parameter >>> resource = boto3.resource( ... "s3", ... config=Config(signature_version="s3v4"), ... region_name="the aws region name" ... ) >>> register_configuration_parameter(S3Path("/"), resource=resource) Here is an example of using a presigned url: .. code:: python >>> from s3path import S3Path >>> import requests >>> file = S3Path("/my-bucket/toto.txt") >>> file.write_text("hello world") >>> presigned_url = file.get_presigned_url() >>> print(requests.get(presigned_url).content) b"hello world" S3Path.walk(top_down=True, on_error=None, follow_symlinks=False) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Walks the directory tree rooted at this path, yielding a 3-tuple (dirpath, dirnames, filenames). The dirpath is a string, and dirnames and filenames are lists of strings. Note that this method in AWS S3 Service is very heavy on API calls. It will be better to glob recursively instead of using this method for most cases. Pure paths: =========== Full basic PurePath documentation linked here: `PurePathDocs`_. .. _PureS3Path: PureS3Path(\*pathsegments) ^^^^^^^^^^^^^^^^^^^^^^^^^^ A subclass of `PurePath`_, this path flavour represents AWS S3 Service semantics. .. code:: python >>> PureS3Path('//') PureS3Path('//') pathsegments are specified similarly to `PurePath`_. .. _PureVersionedS3Path: PureVersionedS3Path(\*pathsegments, version_id) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A subclass of `PureS3Path`_, this path flavour represents AWS S3 Service semantics for buckets in which `S3 versioning`_ is enabled. .. code:: python >>> from s3path import PureVersionedS3Path >>> PureVersionedS3Path('//', version_id='') PureVersionedS3Path('//', version_id='') | pathsegments are specified similarly to `PurePath`_. | version_id is a string that can be any valid `AWS S3 version identifier`_ | | New in version 0.5.0 PureS3Path has a similar behavior to `PurePosixPath`_, except for the below changes: ------------------------------------------------------------------------------------ Double dots (``'..'``) are treated as follows. This is different then PurePath since AWS S3 Service doesn't support symbolic links: .. code:: python >>> PureS3Path('foo/../bar') PureS3Path('bar') **NOTE:** All The methods below will raise `ValueError`_ if the path isn't absolute. PureS3Path.joinpath(*other) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ If the final element of ``other`` is a `PureVersionedS3Path`_ instance, the resulting object will also be a `PureVersionedS3Path`_ instance with ``version_id`` set to ``other[-1].version_id``. Otherwise, the resulting object will be a `PureS3Path`_ instance. PureS3Path.as_uri() ^^^^^^^^^^^^^^^^^^^ Represents the path as a AWS S3 URI: .. code:: python >>> p = PureS3Path('/pypi-proxy/boto3/') >>> p.as_uri() 's3://pypi-proxy/boto3/' >>> p = PureS3Path('/pypi-proxy/boto3/index.html') >>> p.as_uri() 's3://pypi-proxy/boto3/index.html' PureS3Path.from_uri(uri) ^^^^^^^^^^^^^^^^^^^^^^^^ Represents a AWS S3 URI as a PureS3Path: .. code:: python >>> PureS3Path.from_uri('s3://pypi-proxy/boto3/index.html') PureS3Path('/pypi-proxy/boto3/index.html') This is a new class method. PureS3Path.from_bucket_key(bucket, key) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Represents a AWS S3 Bucket and Key pairs as a PureS3Path: .. code:: python >>> PureS3Path.from_bucket_key('pypi-proxy', 'boto3/index.html') PureS3Path('/pypi-proxy/boto3/index.html') This is a new class method. New in version 0.3.0. PureS3Path.bucket ^^^^^^^^^^^^^^^^^ A string representing the AWS S3 Bucket name, if any: .. code:: python >>> PureS3Path.from_uri('s3://pypi-proxy/boto3/').bucket 'pypi-proxy' >>> PureS3Path('/').bucket '' This is a new property. PureS3Path.key ^^^^^^^^^^^^^^ A string representing the AWS S3 Key name, if any: .. code:: python >>> PureS3Path('/pypi-proxy/boto3/').key 'boto3' >>> PureS3Path('/pypi-proxy/boto3/index.html').key 'boto3/index.html' >>> PureS3Path.from_uri('s3://pypi-proxy/').key '' This is a new property. PureVersionedS3Path has a similar behavior to `PureS3Path`_, except for the below changes: ------------------------------------------------------------------------------------------ PureVersionedS3Path.from_uri(uri, *, version_id) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Represents a versioned AWS S3 URI as a `PureVersionedS3Path`_: .. code:: python >>> from s3path import PureVersionedS3Path >>> PureVersionedS3Path.from_uri('s3://pypi-proxy/boto3/index.html', version_id='') PureVersionedS3Path('/pypi-proxy/boto3/index.html', version_id='') This is a new class method. PureVersionedS3Path.from_bucket_key(bucket, key, *, version_id) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Represents a versioned AWS S3 Bucket and Key pairs as a `PureVersionedS3Path`_: .. code:: python >>> from s3path import PureVersionedS3Path >>> PureVersionedS3Path.from_bucket_key('pypi-proxy', 'boto3/index.html', version_id='') PureVersionedS3Path('/pypi-proxy/boto3/index.html', version_id='') This is a new class method. Division Operator with PureVersionedS3Path ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The division of `PureVersionedS3Path`_ instances with other objects will yield the following types: * ``PureVersionedS3Path / PureVersionedS3Path -> PureVersionedS3Path`` * ``PureS3Path / PureVersionedS3Path -> PureVersionedS3Path`` * ``str / PureVersionedS3Path -> PureVersionedS3Path`` * ``PureVersionedS3Path / PureS3Path -> PureS3Path`` * ``PureVersionedS3Path / str -> PureS3Path`` .. code:: python >>> from s3path import S3Path, VersionedS3Path >>> str_path = "example/path" >>> s3_path = S3Path("example/path") >>> versioned_s3_path = VersionedS3Path("example/path", version_id="") >>> type(versioned_s3_path / versioned_s3_path) <<< s3path.VersionedS3Path >>> type(s3_path / versioned_s3_path) <<< s3path.VersionedS3Path >>> type(str_path / versioned_s3_path) <<< s3path.VersionedS3Path >>> type(versioned_s3_path / s3_path) <<< s3path.S3Path >>> type(versioned_s3_path / str_path) <<< s3path.S3Path Unsupported Methods: ==================== There are several methods that are not supported in S3Path. All of them will raise `NotImplementedError`_. For example AWS S3 Service doesn't have a current directory: .. code:: python >>> S3Path('/test_bucket/test.txt').cwd() Traceback (most recent call last): File "", line 1, in File "/home/lior/lior_env/s3path/s3path.py", line 235, in cwd raise NotImplementedError(message) NotImplementedError: PathNotSupportedMixin.cwd is unsupported on AWS S3 service Here is a list of all unsupported methods: - classmethod S3Path.cwd() - classmethod S3Path.home() - S3Path.chmod(mode, *, follow_symlinks=True) - S3Path.expanduser() - S3Path.lchmod(mode) - S3Path.group() - S3Path.is_block_device() - S3Path.is_char_device() - S3Path.lstat() - S3Path.resolve() - S3Path.symlink_to(target, target_is_directory=False) .. _pathlib : https://docs.python.org/3/library/pathlib.html .. _os.stat_result : https://docs.python.org/3/library/os.html#os.stat_result .. _PurePathDocs : https://docs.python.org/3/library/pathlib.html#pure-paths .. _PurePath : https://docs.python.org/3/library/pathlib.html#pathlib.PurePath .. _PurePosixPath : https://docs.python.org/3/library/pathlib.html#pathlib.PurePosixPath .. _PathDocs : https://docs.python.org/3/library/pathlib.html#concrete-paths .. _Path : https://docs.python.org/3/library/pathlib.html#pathlib.Path .. _boto3 : https://github.com/boto/boto3 .. _ValueError : https://docs.python.org/3/library/exceptions.html#ValueError .. _FileExistsError : https://docs.python.org/3/library/exceptions.html#FileExistsError .. _IsADirectoryError : https://docs.python.org/3/library/exceptions.html#IsADirectoryError .. _NotImplementedError : https://docs.python.org/3/library/exceptions.html#NotImplementedError .. _ObjectSummary : https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#objectsummary .. _Abstract pathlib interface: https://github.com/liormizr/s3path/blob/master/docs/interface.rst .. _S3 versioning : https://docs.aws.amazon.com/AmazonS3/latest/userguide/versioning-workflows.html .. _AWS S3 version identifier : https://docs.aws.amazon.com/AmazonS3/latest/userguide/versioning-workflows.html#version-ids ================================================ FILE: s3path/__init__.py ================================================ """ s3path provides a Pythonic API to S3 by wrapping boto3 with pathlib interface """ import sys from pathlib import Path from . import accessor __version__ = '0.6.5' __all__ = ( 'Path', 'register_configuration_parameter', 'configuration_map', 'StatResult', 'PureS3Path', 'S3Path', 'VersionedS3Path', 'PureVersionedS3Path', ) if sys.version_info >= (3, 12): from .accessor import StatResult, configuration_map from .current_version import ( S3Path, PureS3Path, VersionedS3Path, PureVersionedS3Path, register_configuration_parameter, ) else: from .old_versions import ( StatResult, S3Path, PureS3Path, _s3_accessor, VersionedS3Path, PureVersionedS3Path, register_configuration_parameter, ) configuration_map = _s3_accessor.configuration_map ================================================ FILE: s3path/accessor.py ================================================ import sys import importlib.util from warnings import warn from os import stat_result from threading import Lock from itertools import chain from collections import deque from functools import lru_cache from contextlib import suppress from collections import namedtuple from io import UnsupportedOperation def _lazy_import_resources(name): if name in sys.modules: return sys.modules[name] spec = importlib.util.find_spec(name) loader = importlib.util.LazyLoader(spec.loader) spec.loader = loader module = importlib.util.module_from_spec(spec) sys.modules[name] = module loader.exec_module(module) return module boto3 = _lazy_import_resources('boto3') smart_open = _lazy_import_resources('smart_open') # For Development on Cli, or in general application that require fast startup # This will lazy load boto3 resources # boto3 increase startup time by X10! class StatResult(namedtuple('BaseStatResult', 'size, last_modified, version_id', defaults=(None,))): """ Base of os.stat_result but with boto3 s3 features """ def __getattr__(self, item): if item in vars(stat_result): raise UnsupportedOperation(f'{type(self).__name__} do not support {item} attribute') return super().__getattribute__(item) @property def st_size(self) -> int: return self.size @property def st_mtime(self) -> float: return self.last_modified.timestamp() @property def st_version_id(self) -> str: return self.version_id def stat(path, *, follow_symlinks=True): if not follow_symlinks: raise NotImplementedError( f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') resource, config = configuration_map.get_configuration(path) if _is_versioned_path(path): object_summary = _boto3_method_with_parameters( resource.ObjectVersion(path.bucket, path.key, path.version_id).get, config=config, ) return StatResult( size=object_summary.get('ContentLength'), last_modified=object_summary.get('LastModified'), version_id=object_summary.get('VersionId')) object_summary = resource.ObjectSummary(path.bucket, path.key) return StatResult( size=object_summary.size, last_modified=object_summary.last_modified, version_id=None) def owner(path): bucket_name = path.bucket key_name = path.key resource, config = configuration_map.get_configuration(path) object_summary = resource.ObjectSummary(bucket_name, key_name) # return object_summary.owner['DisplayName'] # This is a hack till boto3 resolve this issue: # https://github.com/boto/boto3/issues/1950 response = _boto3_method_with_parameters( object_summary.meta.client.list_objects_v2, kwargs={ 'Bucket': object_summary.bucket_name, 'Prefix': object_summary.key, 'FetchOwner': True, }, config=config, ) return response['Contents'][0]['Owner']['DisplayName'] def rename(path, target): source_bucket_name = path.bucket source_key_name = path.key target_bucket_name = target.bucket target_key_name = target.key resource, config = configuration_map.get_configuration(path) allowed_copy_args = boto3.s3.transfer.TransferManager.ALLOWED_COPY_ARGS if not is_dir(path): target_bucket = resource.Bucket(target_bucket_name) object_summary = resource.ObjectSummary(source_bucket_name, source_key_name) old_source = {'Bucket': object_summary.bucket_name, 'Key': object_summary.key} _boto3_method_with_extraargs( target_bucket.copy, config=config, args=(old_source, target_key_name), allowed_extra_args=allowed_copy_args) _boto3_method_with_parameters(object_summary.delete) return bucket = resource.Bucket(source_bucket_name) target_bucket = resource.Bucket(target_bucket_name) for object_summary in bucket.objects.filter(Prefix=source_key_name): old_source = {'Bucket': object_summary.bucket_name, 'Key': object_summary.key} new_key = object_summary.key.replace(source_key_name, target_key_name) _, config = configuration_map.get_configuration(type(path)(target_bucket_name, new_key)) _boto3_method_with_extraargs( target_bucket.copy, config=config, args=(old_source, new_key), allowed_extra_args=allowed_copy_args) _boto3_method_with_parameters(object_summary.delete) replace = rename def rmdir(path): bucket_name = path.bucket key_name = path.key resource, config = configuration_map.get_configuration(path) bucket = resource.Bucket(bucket_name) for object_summary in bucket.objects.filter(Prefix=key_name): _boto3_method_with_parameters(object_summary.delete, config=config) if path.is_bucket: _boto3_method_with_parameters(bucket.delete, config=config) def mkdir(path, mode): resource, config = configuration_map.get_configuration(path) _boto3_method_with_parameters( resource.create_bucket, config=config, kwargs={'Bucket': path.bucket}, ) def is_dir(path): if str(path) == path.root: return True resource, config = configuration_map.get_configuration(path) bucket = resource.Bucket(path.bucket) query = _boto3_method_with_parameters( bucket.objects.filter, kwargs={'Prefix': _generate_prefix(path)}, config=config) return any(query) def exists(path): bucket_name = path.bucket resource, config = configuration_map.get_configuration(path) if not path.key: # Check whether or not the bucket exists. # See https://stackoverflow.com/questions/26871884 try: _boto3_method_with_parameters( resource.meta.client.head_bucket, kwargs={'Bucket': bucket_name}, config=config) return True except Exception as client_error: with suppress(AttributeError, KeyError): error_code = client_error.response['Error']['Code'] if error_code == '404': # Not found return False raise client_error bucket = resource.Bucket(bucket_name) key_name = str(path.key) def query_method(): method = bucket.object_versions.filter if _is_versioned_path(path) else bucket.objects.filter return _boto3_method_with_parameters( method, kwargs={'Prefix': key_name}, config=config) if _is_versioned_path(path): for object in query_method(): if object.version_id != path.version_id: continue if object.key == key_name: return True if object.key.startswith(key_name + path._flavour.sep): return True return False for object in query_method(): if object.key == key_name: return True if object.key.startswith(key_name + path._flavour.sep): return True return False def iter_keys(path, *, prefix=None, full_keys=True): resource, config = configuration_map.get_configuration(path) bucket_name = path.bucket def get_keys(): continuation_token = None while True: if continuation_token: kwargs['ContinuationToken'] = continuation_token response = _boto3_method_with_parameters( resource.meta.client.list_objects_v2, kwargs=kwargs, config=config, ) for file in response.get('Contents', ()): yield file['Key'] for folder in response.get('CommonPrefixes', ()): yield folder['Prefix'] if not response.get('IsTruncated'): break continuation_token = response.get('NextContinuationToken') # get buckets if not bucket_name and not full_keys: query = _boto3_method_with_parameters( resource.buckets.filter, config=config) for bucket in query: yield bucket.name return # get keys in buckets if not bucket_name: query = _boto3_method_with_parameters( resource.buckets.filter, config=config) for bucket in query: kwargs = {'Bucket': bucket.name} yield from get_keys() return # get keys or part of keys in buckets kwargs = {'Bucket': bucket_name} if prefix: kwargs['Prefix'] = prefix if not full_keys: kwargs['Delimiter'] = path._flavour.sep yield from get_keys() def scandir(path): return _S3Scandir(path=path) def open(path, *, mode='r', buffering=-1, encoding=None, errors=None, newline=None): resource, config = configuration_map.get_configuration(path) dummy_object = resource.Object('bucket', 'key') get_object_kwargs = _update_kwargs_with_config( dummy_object.meta.client.get_object, config=config) create_multipart_upload_kwargs = _update_kwargs_with_config( dummy_object.meta.client.create_multipart_upload, config=config) transport_params = {'defer_seek': True} if _is_versioned_path(path): transport_params['version_id'] = path.version_id transport_params.update( client=resource.meta.client, client_kwargs={ 'S3.Client.get_object': get_object_kwargs, 'S3.Client.create_multipart_upload': create_multipart_upload_kwargs, }, ) return smart_open.open( uri="s3:/" + str(path), mode=mode, buffering=buffering, encoding=encoding, errors=errors, newline=newline, compression='disable', transport_params=transport_params) def get_presigned_url(path, expire_in: int) -> str: resource, config = configuration_map.get_configuration(path) return _boto3_method_with_parameters( resource.meta.client.generate_presigned_url, config=config, kwargs={ 'ClientMethod': 'get_object', 'Params': {'Bucket': path.bucket, 'Key': path.key}, 'ExpiresIn': expire_in, } ) def _generate_prefix(path): sep = path._flavour.sep if not path.key: return '' key_name = path.key if not key_name.endswith(sep): return key_name + sep return key_name def unlink(path, *args, **kwargs): bucket_name = path.bucket key_name = path.key resource, config = configuration_map.get_configuration(path) bucket = resource.Bucket(bucket_name) try: _boto3_method_with_parameters( bucket.meta.client.delete_object, config=config, kwargs={"Bucket": bucket_name, "Key": key_name} ) except Exception as error: raise OSError(f'/{bucket_name}/{key_name}') from error def walk(path, *, topdown=True, onerror=None, followlinks=False): try: if not exists(path): raise FileNotFoundError(f'No such file or directory: {path}') except FileNotFoundError as error: if onerror is not None: onerror(error) return stack = deque([path]) while stack: top = stack.pop() if isinstance(top, tuple): yield top continue dirs = [] nondirs = [] walk_dirs = [] cont = False with scandir(top) as scandir_iter: scandir_iter = iter(scandir_iter) while True: try: entry = next(scandir_iter) is_dir = entry.is_dir() if is_dir: dirs.append(entry.name) else: nondirs.append(entry.name) if not topdown and is_dir: walk_dirs.append(top / entry.name) except StopIteration: break except Exception as error: if onerror is not None: onerror(error) cont = True break if cont: continue if topdown: # Yield before sub-directory traversal if going top down yield top, dirs, nondirs # Traverse into sub-directories for dirname in reversed(dirs): new_path = top / dirname stack.append(new_path) else: # Yield after sub-directory traversal if going bottom up stack.append((top, dirs, nondirs)) # Traverse into sub-directories for new_path in reversed(walk_dirs): stack.append(new_path) def _is_versioned_path(path): return hasattr(path, 'version_id') and bool(path.version_id) def _update_kwargs_with_config(boto3_method, config, kwargs=None): kwargs = kwargs or {} if config is not None: kwargs.update({ key: value for key, value in config.items() if key in _get_action_arguments(boto3_method) }) return kwargs def _boto3_method_with_parameters(boto3_method, config=None, args=(), kwargs=None): kwargs = _update_kwargs_with_config(boto3_method, config, kwargs) return boto3_method(*args, **kwargs) def _boto3_method_with_extraargs( boto3_method, config=None, args=(), kwargs=None, extra_args=None, allowed_extra_args=()): kwargs = kwargs or {} extra_args = extra_args or {} if config is not None: extra_args.update({ key: value for key, value in config.items() if key in allowed_extra_args }) kwargs["ExtraArgs"] = extra_args return boto3_method(*args, **kwargs) @lru_cache() def _get_action_arguments(action): docs = action.__doc__ with suppress(AttributeError): docs = action.__doc__._generate() return set( line.replace(':param ', '').strip().strip(':') for line in docs.splitlines() if line.startswith(':param ') ) class _S3Scandir: def __init__(self, *, path): self._path = path def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): return def __iter__(self): bucket_name = self._path.bucket resource, config = configuration_map.get_configuration(self._path) if not bucket_name: query = _boto3_method_with_parameters( resource.buckets.all, config=config) for bucket in query: yield _S3DirEntry(bucket.name, is_dir=True) return bucket = resource.Bucket(bucket_name) sep = self._path._flavour.sep kwargs = { 'Bucket': bucket.name, 'Prefix': _generate_prefix(self._path), 'Delimiter': sep} continuation_token = None while True: if continuation_token: kwargs['ContinuationToken'] = continuation_token response = _boto3_method_with_parameters( bucket.meta.client.list_objects_v2, kwargs=kwargs, config=config) for folder in response.get('CommonPrefixes', ()): full_name = folder['Prefix'][:-1] if folder['Prefix'].endswith(sep) else folder['Prefix'] name = full_name.split(sep)[-1] yield _S3DirEntry(name, is_dir=True) for file in response.get('Contents', ()): if file['Key'] == response['Prefix']: continue name = file['Key'].split(sep)[-1] yield _S3DirEntry(name=name, is_dir=False, size=file['Size'], last_modified=file['LastModified']) if not response.get('IsTruncated'): break continuation_token = response.get('NextContinuationToken') class _S3DirEntry: def __init__(self, name, is_dir, size=None, last_modified=None): self.name = name self._is_dir = is_dir self._stat = StatResult(size=size, last_modified=last_modified) def __repr__(self): return f'{type(self).__name__}(name={self.name}, is_dir={self._is_dir}, stat={self._stat})' def inode(self, *args, **kwargs): return None def is_dir(self, follow_symlinks=False): if follow_symlinks: raise TypeError('AWS S3 Service does not have symlink feature') return self._is_dir def is_file(self): return not self._is_dir def is_symlink(self, *args, **kwargs): return False def stat(self): return self._stat class _S3ConfigurationMap: def __init__(self): self.arguments = None self.resources = None self.general_options = None self.setup_lock = Lock() self.is_setup = False def __repr__(self): return f'{type(self).__name__}' \ f'(arguments={self.arguments}, resources={self.resources}, is_setup={self.is_setup})' @property def default_resource(self): return boto3.resource('s3') def set_configuration(self, path, *, resource=None, arguments=None): self._delayed_setup() path_name = str(path) if arguments is not None: self.arguments[path_name] = arguments if resource is not None: self.resources[path_name] = resource self.get_configuration.cache_clear() @lru_cache() def get_configuration(self, path): self._delayed_setup() resources = arguments = None for path in chain([path], path.parents): path_name = str(path) if resources is None and path_name in self.resources: resources = self.resources[path_name] if arguments is None and path_name in self.arguments: arguments = self.arguments[path_name] return resources, arguments @lru_cache() def get_general_options(self, path): self._delayed_setup() for path in chain([path], path.parents): path_name = str(path) if path_name in self.general_options: return self.general_options[path_name] return def _delayed_setup(self): """ Resolves a circular dependency between us and PureS3Path """ with self.setup_lock: if not self.is_setup: self.arguments = {'/': {}} self.resources = {'/': self.default_resource} self.is_setup = True configuration_map = _S3ConfigurationMap() ================================================ FILE: s3path/current_version.py ================================================ from __future__ import annotations import re import sys import fnmatch import posixpath from datetime import timedelta from contextlib import suppress from urllib.parse import unquote from pathlib import PurePath, Path from typing import TYPE_CHECKING, Literal, Self, Generator from io import DEFAULT_BUFFER_SIZE, TextIOWrapper from botocore.exceptions import ClientError if TYPE_CHECKING: from os import PathLike import smart_open from boto3.resources.base import ServiceResource KeyFileObjectType = TextIOWrapper | smart_open.s3.Reader | smart_open.s3.MultipartWriter from . import accessor def register_configuration_parameter( path: PureS3Path, *, parameters: dict | None = None, resource: ServiceResource | None = None): if not isinstance(path, PureS3Path): raise TypeError(f'path argument have to be a {PurePath} type. got {type(path)}') if parameters and not isinstance(parameters, dict): raise TypeError(f'parameters argument have to be a dict type. got {type(path)}') if parameters is None and resource is None: raise ValueError('user have to specify parameters or resource arguments') accessor.configuration_map.set_configuration( path, resource=resource, arguments=parameters) class _S3Parser: def __getattr__(self, name): return getattr(posixpath, name) class PureS3Path(PurePath): """ PurePath subclass for AWS S3 service. S3 is not a file-system, but we can look at it like a POSIX system. """ parser = _flavour = _S3Parser() # _flavour is not relevant after Python version 3.13 __slots__ = () def __init__(self, *args): super().__init__(*args) new_parts = list(self.parts) for part in new_parts[1:]: if part == '..': index = new_parts.index(part) new_parts.pop(index - 1) new_parts.remove(part) self._raw_paths = new_parts if sys.version_info >= (3, 13): self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) else: self._load_parts() @classmethod def from_uri(cls, uri: str) -> Self: """ from_uri class method create a class instance from url >> from s3path import PureS3Path >> PureS3Path.from_uri('s3:///') << PureS3Path('//') """ if not uri.startswith('s3://'): raise ValueError('Provided uri seems to be no S3 URI!') unquoted_uri = unquote(uri) return cls(unquoted_uri[4:]) @classmethod def from_bucket_key(cls, bucket: str | PathLike, key: str | PathLike) -> Self: """ from_bucket_key class method create a class instance from bucket, key pair's >> from s3path import PureS3Path >> PureS3Path.from_bucket_key(bucket='', key='') << PureS3Path('//') """ bucket = cls(cls.parser.sep, bucket) if len(bucket.parts) != 2: raise ValueError(f'bucket argument contains more then one path element: {bucket}') key = cls(key) if key.is_absolute(): key = key.relative_to('/') return bucket / key @property def bucket(self) -> str: """ The AWS S3 Bucket name, or '' """ self._absolute_path_validation() with suppress(ValueError): _, bucket, *_ = self.parts return bucket return '' @property def is_bucket(self) -> bool: """ Check if Path is a bucket """ return self.is_absolute() and self == PureS3Path(f"/{self.bucket}") @property def key(self) -> str: """ The AWS S3 Key name, or '' """ self._absolute_path_validation() key = self.parser.sep.join(self.parts[2:]) return key def as_uri(self) -> str: """ Return the path as a 's3' URI. """ uri = super().as_uri() return uri.replace('file:///', 's3://') def _absolute_path_validation(self): if not self.is_absolute(): raise ValueError('relative path have no bucket, key specification') class _PathNotSupportedMixin: _NOT_SUPPORTED_MESSAGE = '{method} is unsupported on S3 service' @classmethod def cwd(cls): """ cwd class method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = cls._NOT_SUPPORTED_MESSAGE.format(method=cls.cwd.__qualname__) raise NotImplementedError(message) @classmethod def home(cls): """ home class method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = cls._NOT_SUPPORTED_MESSAGE.format(method=cls.home.__qualname__) raise NotImplementedError(message) def chmod(self, mode, *, follow_symlinks=True): """ chmod method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.chmod.__qualname__) raise NotImplementedError(message) def expanduser(self): """ expanduser method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.expanduser.__qualname__) raise NotImplementedError(message) def lchmod(self, mode): """ lchmod method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.lchmod.__qualname__) raise NotImplementedError(message) def group(self): """ group method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.group.__qualname__) raise NotImplementedError(message) def is_block_device(self): """ is_block_device method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.is_block_device.__qualname__) raise NotImplementedError(message) def is_char_device(self): """ is_char_device method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.is_char_device.__qualname__) raise NotImplementedError(message) def lstat(self): """ lstat method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.lstat.__qualname__) raise NotImplementedError(message) def resolve(self): """ resolve method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.resolve.__qualname__) raise NotImplementedError(message) def symlink_to(self, *args, **kwargs): """ symlink_to method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.symlink_to.__qualname__) raise NotImplementedError(message) def hardlink_to(self, *args, **kwargs): """ hardlink_to method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.hardlink_to.__qualname__) raise NotImplementedError(message) def readlink(self): """ readlink method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.readlink.__qualname__) raise NotImplementedError(message) def is_symlink(self) -> Literal[False]: """ AWS S3 Service doesn't have symlink feature, There for this method will always return False """ return False def is_socket(self) -> Literal[False]: """ AWS S3 Service doesn't have sockets feature, There for this method will always return False """ return False def is_fifo(self) -> Literal[False]: """ AWS S3 Service doesn't have fifo feature, There for this method will always return False """ return False def is_mount(self) -> Literal[False]: """ AWS S3 Service doesn't have mounting feature, There for this method will always return False """ return False class S3Path(_PathNotSupportedMixin, PureS3Path, Path): def stat(self, *, follow_symlinks: bool = True) -> accessor.StatResult | None: """ Returns information about this path (similarly to boto3's ObjectSummary). For compatibility with pathlib, the returned object some similar attributes like os.stat_result. The result is looked up at each call to this method """ if not follow_symlinks: raise NotImplementedError( f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') self._absolute_path_validation() if not self.key: return None return accessor.stat(self, follow_symlinks=follow_symlinks) def absolute(self) -> Self: """ Handle absolute method only if the path is already an absolute one since we have no way to compute an absolute path from a relative one in S3. """ if self.is_absolute(): return self # We can't compute the absolute path from a relative one raise ValueError("Absolute path can't be determined for relative S3Path objects") def owner(self, *, follow_symlinks: bool = False) -> str: """ Returns the name of the user owning the Bucket or key. Similarly to boto3's ObjectSummary owner attribute """ self._absolute_path_validation() if follow_symlinks: raise NotImplementedError(f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') if not self.is_file(): raise KeyError('file not found') return accessor.owner(self) def rename(self, target) -> Self: """ Renames this file or Bucket / key prefix / key to the given target. If target exists and is a file, it will be replaced silently if the user has permission. If path is a key prefix, it will replace all the keys with the same prefix to the new target prefix. Target can be either a string or another S3Path object. """ self._absolute_path_validation() if not isinstance(target, type(self)): target = type(self)(target) target._absolute_path_validation() accessor.rename(self, target) return type(self)(target) def replace(self, target) -> Self: """ Renames this Bucket / key prefix / key to the given target. If target points to an existing Bucket / key prefix / key, it will be unconditionally replaced. """ return self.rename(target) def rmdir(self): """ Removes this Bucket / key prefix. The Bucket / key prefix must be empty """ self._absolute_path_validation() if self.is_file(): raise NotADirectoryError() if not self.is_dir(): raise FileNotFoundError() accessor.rmdir(self) def samefile(self, other_path: str | PathLike) -> bool: """ Returns whether this path points to the same Bucket key as other_path, Which can be either a Path object, or a string """ self._absolute_path_validation() if not isinstance(other_path, S3Path): other_path = type(self)(other_path) return self.bucket == other_path.bucket and self.key == other_path.key and self.is_file() def touch(self, mode: int = 0o666, exist_ok: bool = True): """ Creates a key at this given path. If the key already exists, the function succeeds if exist_ok is true (and its modification time is updated to the current time), otherwise FileExistsError is raised """ if self.exists() and not exist_ok: raise FileExistsError() self.write_text('') def mkdir(self, mode: int = 0o777, parents: bool = False, exist_ok: bool = False): """ Create a path bucket. AWS S3 Service doesn't support folders, therefore the mkdir method will only create the current bucket. If the bucket path already exists, FileExistsError is raised. If exist_ok is false (the default), FileExistsError is raised if the target Bucket already exists. If exist_ok is true, OSError exceptions will be ignored. if parents is false (the default), mkdir will create the bucket only if this is a Bucket path. if parents is true, mkdir will create the bucket even if the path have a Key path. mode argument is ignored. """ try: if not self.bucket: raise FileNotFoundError(f'No bucket in {type(self)} {self}') if self.key and not parents: raise FileNotFoundError(f'Only bucket path can be created, got {self}') if type(self)(self.parser.sep, self.bucket).exists(): raise FileExistsError(f'Bucket {self.bucket} already exists') accessor.mkdir(self, mode) except OSError: if not exist_ok: raise def is_dir(self, *, follow_symlinks: bool = False) -> bool: """ Returns True if the path points to a Bucket or a key prefix, False if it points to a full key path. False is also returned if the path doesn’t exist. Other errors (such as permission errors) are propagated. """ self._absolute_path_validation() if follow_symlinks: raise NotImplementedError(f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') if self.bucket and not self.key: return True return accessor.is_dir(self) def is_file(self, *, follow_symlinks: bool = False) -> bool: """ Returns True if the path points to a Bucket key, False if it points to Bucket or a key prefix. False is also returned if the path doesn’t exist. Other errors (such as permission errors) are propagated. """ self._absolute_path_validation() if follow_symlinks: raise NotImplementedError(f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') if not self.bucket or not self.key: return False try: return bool(self.stat()) except ClientError: return False def exists(self, *, follow_symlinks: bool = False) -> bool: """ Whether the path points to an existing Bucket, key or key prefix. """ self._absolute_path_validation() if follow_symlinks: raise NotImplementedError(f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') if not self.bucket: return True return accessor.exists(self) def iterdir(self) -> Generator[Self]: """ When the path points to a Bucket or a key prefix, yield path objects of the directory contents """ self._absolute_path_validation() with accessor.scandir(self) as scandir_iter: for entry in scandir_iter: path = self / entry.name yield path def open( self, mode: Literal['r', 'w', 'rb', 'wb'] = 'r', buffering: int = DEFAULT_BUFFER_SIZE, encoding: str | None = None, errors: str | None = None, newline: str | None = None) -> KeyFileObjectType: """ Opens the Bucket key pointed to by the path, returns a Key file object that you can read/write with """ self._absolute_path_validation() if 'r' in mode and not self.exists(): raise FileNotFoundError(f'No such file or directory: {self}') return accessor.open( self, mode=mode, buffering=buffering, encoding=encoding, errors=errors, newline=newline) def glob( self, pattern: str, *, case_sensitive: bool | None = None, recurse_symlinks: bool = False) -> Generator[Self]: """ Glob the given relative pattern in the Bucket / key prefix represented by this path, yielding all matching files (of any kind) The glob method is using a new Algorithm that better fit S3 API """ self._absolute_path_validation() if case_sensitive is False or recurse_symlinks is True: raise ValueError('Glob is case-sensitive and no symbolic links are allowed') sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError(f'Unacceptable pattern: {pattern}') drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") for part in pattern_parts: if part != '**' and '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") selector = _Selector(self, pattern=pattern) yield from selector.select() def rglob( self, pattern: str, *, case_sensitive: bool | None = None, recurse_symlinks: bool = False) -> Generator[Self]: """ This is like calling S3Path.glob with "**/" added in front of the given relative pattern The rglob method is using a new Algorithm that better fit S3 API """ self._absolute_path_validation() sys.audit("pathlib.Path.rglob", self, pattern) if not pattern: raise ValueError(f'Unacceptable pattern: {pattern}') drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") for part in pattern_parts: if part != '**' and '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") pattern = f'**{self.parser.sep}{pattern}' selector = _Selector(self, pattern=pattern) yield from selector.select() def get_presigned_url(self, expire_in: timedelta | int = 3600) -> str: """ Returns a pre-signed url. Anyone with the url can make a GET request to get the file. You can set an expiration date with the expire_in argument (integer or timedelta object). Note that generating a presigned url may require more information or setup than to use other S3Path functions. It's because it needs to know the exact aws region and use s3v4 as signature version. Meaning you may have to do this: ```python import boto3 from botocore.config import Config from s3path import S3Path, register_configuration_parameter resource = boto3.resource( "s3", config=Config(signature_version="s3v4"), region_name="the aws region name" ) register_configuration_parameter(S3Path("/"), resource=resource) ``` A simple example: ```python from s3path import S3Path import requests file = S3Path("/my-bucket/toto.txt") file.write_text("hello world") presigned_url = file.get_presigned_url() print(requests.get(presigned_url).content) b"hello world" """ self._absolute_path_validation() if isinstance(expire_in, timedelta): expire_in = int(expire_in.total_seconds()) if expire_in <= 0: raise ValueError( f"The expire_in argument can't represent a negative or null time delta. " f'You provided expire_in = {expire_in} seconds which is below or equal to 0 seconds.') return accessor.get_presigned_url(self, expire_in) def unlink(self, missing_ok: bool = False): """ Remove this key from its bucket. """ self._absolute_path_validation() # S3 doesn't care if you remove full prefixes or buckets with its delete API # so unless we manually check, this call will be dropped through without any # validation and could result in data loss try: if self.is_dir(): raise IsADirectoryError(str(self)) if not self.is_file(): raise FileNotFoundError(str(self)) except (IsADirectoryError, FileNotFoundError): if missing_ok: return raise try: # XXX: Note: If we don't check if the file exists here, S3 will always return # success even if we try to delete a key that doesn't exist. So, if we want # to raise a `FileNotFoundError`, we need to manually check if the file exists # before we make the API call -- since we want to delete the file anyway, # we can just ignore this for now and be satisfied that the file will be removed accessor.unlink(self) except FileNotFoundError: if not missing_ok: raise def walk( self, top_down: bool = True, on_error:bool = None, follow_symlinks: bool = False) -> Generator[tuple[Self, list[str], list[str]]]: if follow_symlinks: raise NotImplementedError(f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) yield from accessor.walk(self, topdown=top_down, onerror=on_error) class PureVersionedS3Path(PureS3Path): """ PurePath subclass for AWS S3 service Keys with Versions. S3 is not a file-system, but we can look at it like a POSIX system. """ def __new__(cls, *args, version_id: str): self = super().__new__(cls, *args) self.version_id = version_id return self def __repr__(self) -> str: return f'{type(self).__name__}({self.as_posix()}, version_id={self.version_id})' def __truediv__(self, key): if not isinstance(key, (PureS3Path, str)): return NotImplemented key = S3Path(key) if isinstance(key, str) else key return key.__rtruediv__(self) def __rtruediv__(self, key): if not isinstance(key, (PureS3Path, str)): return NotImplemented new_path = super().__rtruediv__(key) new_path.version_id = self.version_id return new_path @classmethod def from_uri(cls, uri: str, *, version_id: str) -> Self: """ from_uri class method creates a class instance from uri and version id >> from s3path import VersionedS3Path >> VersionedS3Path.from_uri('s3:///', version_id='') << VersionedS3Path('//', version_id='') """ self = PureS3Path.from_uri(uri) return cls(self, version_id=version_id) @classmethod def from_bucket_key(cls, bucket: str, key: str, *, version_id: str) -> Self: """ from_bucket_key class method creates a class instance from bucket, key and version id >> from s3path import VersionedS3Path >> VersionedS3Path.from_bucket_key('', '', version_id='') << VersionedS3Path('//', version_id='') """ self = PureS3Path.from_bucket_key(bucket=bucket, key=key) return cls(self, version_id=version_id) def with_segments(self, *pathsegments) -> Self: """Construct a new path object from any number of path-like objects. Subclasses may override this method to customize how new path objects are created from methods like `iterdir()`. """ return type(self)(*pathsegments, version_id=self.version_id) def joinpath(self, *args): if not args: return self new_path = super().joinpath(*args) if isinstance(args[-1], PureVersionedS3Path): new_path.version_id = args[-1].version_id else: new_path = S3Path(new_path) return new_path class VersionedS3Path(PureVersionedS3Path, S3Path): """ S3Path subclass for AWS S3 service Keys with Versions. >> from s3path import VersionedS3Path >> VersionedS3Path('//', version_id='') << VersionedS3Path('//', version_id='') """ def __init__(self, *args, version_id): super().__init__(*args) def _is_wildcard_pattern(pat): # Whether this pattern needs actual matching using fnmatch, or can # be looked up directly as a file. return "*" in pat or "?" in pat or "[" in pat class _Selector: def __init__(self, path, *, pattern): self._path = path self._prefix, pattern = self._prefix_splitter(pattern) self._full_keys = self._calculate_full_or_just_folder(pattern) self._target_level = self._calculate_pattern_level(pattern) self.match = self._compile_pattern_parts(self._prefix, pattern, path.bucket) def select(self): for target in self._deep_cached_dir_scan(): target = f'{self._path.parser.sep}{self._path.bucket}{target}' if self.match(target): yield type(self._path)(target) def _prefix_splitter(self, pattern): if not _is_wildcard_pattern(pattern): if self._path.key: return f'{self._path.key}{self._path.parser.sep}{pattern}', '' return pattern, '' *_, pattern_parts = self._path._parse_path(pattern) prefix = '' for index, part in enumerate(pattern_parts): if _is_wildcard_pattern(part): break prefix += f'{part}{self._path.parser.sep}' if pattern.startswith(prefix): pattern = pattern.replace(prefix, '', 1) key_prefix = self._path.key if key_prefix: prefix = self._path.parser.sep.join((key_prefix, prefix)) return prefix, pattern def _calculate_pattern_level(self, pattern): if '**' in pattern: return None if self._prefix: pattern = f'{self._prefix}{self._path.parser.sep}{pattern}' *_, pattern_parts = self._path._parse_path(pattern) return len(pattern_parts) def _calculate_full_or_just_folder(self, pattern): if '**' in pattern: return True *_, pattern_parts = self._path._parse_path(pattern) for part in pattern_parts[:-1]: if '*' in part: return True return False def _deep_cached_dir_scan(self): cache = set() prefix_sep_count = self._prefix.count(self._path.parser.sep) for key in accessor.iter_keys(self._path, prefix=self._prefix, full_keys=self._full_keys): key_sep_count = key.count(self._path.parser.sep) + 1 key_parts = key.rsplit(self._path.parser.sep, maxsplit=key_sep_count - prefix_sep_count) target_path_parts = key_parts[:self._target_level] target_path = '' for part in target_path_parts: if not part: continue target_path += f'{self._path.parser.sep}{part}' if target_path in cache: continue yield target_path cache.add(target_path) def _compile_pattern_parts(self, prefix, pattern, bucket): pattern = self._path.parser.sep.join(( '', bucket, prefix, pattern, )) *_, pattern_parts = self._path._parse_path(pattern) new_regex_pattern = '' for part in pattern_parts: if part == self._path.parser.sep: continue if '**' in part: new_regex_pattern += f'{self._path.parser.sep}*(?s:{part.replace("**", ".*")})' continue if '*' == part: new_regex_pattern += f'{self._path.parser.sep}(?s:[^/]+)' continue new_regex_pattern += f'{self._path.parser.sep}{fnmatch.translate(part)[:-2]}' new_regex_pattern += r'/*\Z' return re.compile(new_regex_pattern).fullmatch ================================================ FILE: s3path/old_versions.py ================================================ """ s3path provides a Pythonic API to S3 by wrapping boto3 with pathlib interface """ from __future__ import annotations import re import sys import fnmatch from os import stat_result from threading import Lock from itertools import chain from datetime import timedelta from functools import lru_cache from contextlib import suppress from urllib.parse import unquote from collections import namedtuple, deque from typing import Union, Generator, Literal, Optional from io import DEFAULT_BUFFER_SIZE, UnsupportedOperation, TextIOWrapper from pathlib import _PosixFlavour, _is_wildcard_pattern, PurePath, Path import boto3 from boto3.s3.transfer import TransferManager from boto3.resources.factory import ServiceResource from botocore.exceptions import ClientError from botocore.docs.docstring import LazyLoadedDocstring import smart_open import smart_open.s3 __all__ = ( 'register_configuration_parameter', 'S3Path', 'VersionedS3Path', 'PureS3Path', 'PureVersionedS3Path', 'StatResult', ) ALLOWED_COPY_ARGS = TransferManager.ALLOWED_COPY_ARGS class _S3Flavour(_PosixFlavour): is_supported = bool(boto3) def parse_parts(self, parts): drv, root, parsed = super().parse_parts(parts) for part in parsed[1:]: if part == '..': index = parsed.index(part) parsed.pop(index - 1) parsed.remove(part) return drv, root, parsed def make_uri(self, path): uri = super().make_uri(path) return uri.replace('file:///', 's3://') def compile_pattern_parts(self, path, prefix, pattern, bucket): pattern = self.sep.join(( '', bucket, prefix, pattern, )) *_, pattern_parts = self.parse_parts((pattern,)) new_regex_pattern = '' for part in pattern_parts: if part == self.sep: continue if '**' in part: new_regex_pattern += f'{self.sep}*(?s:{part.replace("**", ".*")})' continue if '*' == part: new_regex_pattern += f'{path._flavour.sep}(?s:[^/]+)' continue new_regex_pattern += f'{self.sep}{fnmatch.translate(part)[:-2]}' new_regex_pattern += r'/*\Z' return re.compile(new_regex_pattern).fullmatch class _S3ConfigurationMap: def __init__(self, default_resource_kwargs, **default_arguments): self.default_resource_kwargs = default_resource_kwargs self.default_arguments = default_arguments self.arguments = None self.resources = None self.general_options = None self.setup_lock = Lock() self.is_setup = False @property def default_resource(self): return boto3.resource('s3', **self.default_resource_kwargs) def _delayed_setup(self): """ Resolves a circular dependency between us and PureS3Path """ with self.setup_lock: if not self.is_setup: self.arguments = {PureS3Path('/'): self.default_arguments} self.resources = {PureS3Path('/'): self.default_resource} self.general_options = {PureS3Path('/'): {'glob_new_algorithm': True}} self.is_setup = True def __repr__(self): return f'{type(self).__name__}' \ f'(arguments={self.arguments}, resources={self.resources}, is_setup={self.is_setup})' def set_configuration(self, path, *, resource=None, arguments=None, glob_new_algorithm=None): self._delayed_setup() if arguments is not None: self.arguments[path] = arguments if resource is not None: self.resources[path] = resource if glob_new_algorithm is not None: self.general_options[path] = {'glob_new_algorithm': glob_new_algorithm} self.get_configuration.cache_clear() @lru_cache() def get_configuration(self, path): self._delayed_setup() resources = arguments = None for path in chain([path], path.parents): if resources is None and path in self.resources: resources = self.resources[path] if arguments is None and path in self.arguments: arguments = self.arguments[path] return resources, arguments @lru_cache() def get_general_options(self, path): self._delayed_setup() for path in chain([path], path.parents): if path in self.general_options: return self.general_options[path] return class _S3Scandir: def __init__(self, *, s3_accessor, path): self._s3_accessor = s3_accessor self._path = path def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): return def __iter__(self) -> Generator[_S3DirEntry, None, None]: bucket_name = self._path.bucket resource, _ = self._s3_accessor.configuration_map.get_configuration(self._path) if not bucket_name: for bucket in resource.buckets.all(): yield _S3DirEntry(bucket.name, is_dir=True) return bucket = resource.Bucket(bucket_name) sep = self._path._flavour.sep kwargs = { 'Bucket': bucket.name, 'Prefix': self._s3_accessor.generate_prefix(self._path), 'Delimiter': sep} continuation_token = None while True: if continuation_token: kwargs['ContinuationToken'] = continuation_token response = bucket.meta.client.list_objects_v2(**kwargs) for folder in response.get('CommonPrefixes', ()): full_name = folder['Prefix'][:-1] if folder['Prefix'].endswith(sep) else folder['Prefix'] name = full_name.split(sep)[-1] yield _S3DirEntry(name, is_dir=True) for file in response.get('Contents', ()): if file['Key'] == response['Prefix']: continue name = file['Key'].split(sep)[-1] yield _S3DirEntry(name=name, is_dir=False, size=file['Size'], last_modified=file['LastModified']) if not response.get('IsTruncated'): break continuation_token = response.get('NextContinuationToken') class _S3Accessor: """ An accessor implements a particular (system-specific or not) way of accessing paths on the filesystem. In this case this will access AWS S3 service """ def __init__(self, **kwargs): self.configuration_map = _S3ConfigurationMap(default_resource_kwargs=kwargs) def stat(self, path, *, follow_symlinks=True): if not follow_symlinks: raise NotImplementedError( f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') resource, _ = self.configuration_map.get_configuration(path) object_summary = resource.ObjectSummary(path.bucket, path.key) return StatResult( size=object_summary.size, last_modified=object_summary.last_modified, ) def is_dir(self, path): if str(path) == path.root: return True resource, _ = self.configuration_map.get_configuration(path) bucket = resource.Bucket(path.bucket) return any(bucket.objects.filter(Prefix=self.generate_prefix(path))) def exists(self, path): bucket_name = path.bucket resource, _ = self.configuration_map.get_configuration(path) if not path.key: # Check whether or not the bucket exists. # See https://stackoverflow.com/questions/26871884 try: resource.meta.client.head_bucket(Bucket=bucket_name) return True except ClientError as e: error_code = e.response['Error']['Code'] if error_code == '404': # Not found return False raise e bucket = resource.Bucket(bucket_name) key_name = str(path.key) for object in bucket.objects.filter(Prefix=key_name): if object.key == key_name: return True if object.key.startswith(key_name + path._flavour.sep): return True return False def scandir(self, path) -> _S3Scandir: return _S3Scandir(s3_accessor=self, path=path) def listdir(self, path): with self.scandir(path) as scandir_iter: return [entry.name for entry in scandir_iter] def open(self, path, *, mode='r', buffering=-1, encoding=None, errors=None, newline=None): resource, config = self.configuration_map.get_configuration(path) smart_open_kwargs = { 'uri': "s3:/" + str(path), 'mode': mode, 'buffering': buffering, 'encoding': encoding, 'errors': errors, 'newline': newline, } transport_params = {'defer_seek': True} dummy_object = resource.Object('bucket', 'key') self._update_smart_open_kwargs( dummy_object, resource, config, transport_params, smart_open_kwargs, ) file_object = smart_open.open(**smart_open_kwargs) return file_object def owner(self, path): bucket_name = path.bucket key_name = path.key resource, _ = self.configuration_map.get_configuration(path) object_summary = resource.ObjectSummary(bucket_name, key_name) # return object_summary.owner['DisplayName'] # This is a hack till boto3 resolve this issue: # https://github.com/boto/boto3/issues/1950 responce = object_summary.meta.client.list_objects_v2( Bucket=object_summary.bucket_name, Prefix=object_summary.key, FetchOwner=True) return responce['Contents'][0]['Owner']['DisplayName'] def rename(self, path, target): source_bucket_name = path.bucket source_key_name = path.key target_bucket_name = target.bucket target_key_name = target.key resource, config = self.configuration_map.get_configuration(path) if not self.is_dir(path): target_bucket = resource.Bucket(target_bucket_name) object_summary = resource.ObjectSummary(source_bucket_name, source_key_name) old_source = {'Bucket': object_summary.bucket_name, 'Key': object_summary.key} self._boto3_method_with_extraargs( target_bucket.copy, config=config, args=(old_source, target_key_name), allowed_extra_args=ALLOWED_COPY_ARGS, ) self._boto3_method_with_parameters(object_summary.delete) return bucket = resource.Bucket(source_bucket_name) target_bucket = resource.Bucket(target_bucket_name) for object_summary in bucket.objects.filter(Prefix=source_key_name): old_source = {'Bucket': object_summary.bucket_name, 'Key': object_summary.key} new_key = object_summary.key.replace(source_key_name, target_key_name) _, config = self.configuration_map.get_configuration(S3Path(target_bucket_name, new_key)) self._boto3_method_with_extraargs( target_bucket.copy, config=config, args=(old_source, new_key), allowed_extra_args=ALLOWED_COPY_ARGS, ) self._boto3_method_with_parameters(object_summary.delete) def replace(self, path, target): return self.rename(path, target) def rmdir(self, path): bucket_name = path.bucket key_name = path.key resource, config = self.configuration_map.get_configuration(path) bucket = resource.Bucket(bucket_name) for object_summary in bucket.objects.filter(Prefix=key_name): self._boto3_method_with_parameters(object_summary.delete, config=config) if path.is_bucket: self._boto3_method_with_parameters(bucket.delete, config=config) def mkdir(self, path, mode): resource, config = self.configuration_map.get_configuration(path) self._boto3_method_with_parameters( resource.create_bucket, config=config, kwargs={'Bucket': path.bucket}, ) def generate_prefix(self, path): sep = path._flavour.sep if not path.key: return '' key_name = path.key if not key_name.endswith(sep): return key_name + sep return key_name def unlink(self, path, *args, **kwargs): bucket_name = path.bucket key_name = path.key resource, config = self.configuration_map.get_configuration(path) bucket = resource.Bucket(bucket_name) try: self._boto3_method_with_parameters( bucket.meta.client.delete_object, config=config, kwargs={"Bucket": bucket_name, "Key": key_name} ) except ClientError: raise OSError(f'/{bucket_name}/{key_name}') def get_presigned_url(self,path, expire_in: int) -> str: resource, config = self.configuration_map.get_configuration(path) return self._boto3_method_with_parameters( resource.meta.client.generate_presigned_url, config=config, kwargs=dict( ClientMethod="get_object", Params={"Bucket": path.bucket, "Key": path.key}, ExpiresIn=expire_in, ) ) def iter_keys(self, path, *, prefix=None, full_keys=True): resource, _ = self.configuration_map.get_configuration(path) bucket_name = path.bucket def get_keys(): continuation_token = None while True: if continuation_token: kwargs['ContinuationToken'] = continuation_token response = resource.meta.client.list_objects_v2(**kwargs) for file in response.get('Contents', ()): yield file['Key'] for folder in response.get('CommonPrefixes', ()): yield folder['Prefix'] if not response.get('IsTruncated'): break continuation_token = response.get('NextContinuationToken') # get buckets if not bucket_name and not full_keys: for bucket in resource.buckets.filter(): yield bucket.name return # get keys in buckets if not bucket_name: for bucket in resource.buckets.filter(): kwargs = {'Bucket': bucket.name} yield from get_keys() return # get keys or part of keys in buckets kwargs = {'Bucket': bucket_name} if prefix: kwargs['Prefix'] = prefix if not full_keys: kwargs['Delimiter'] = path._flavour.sep yield from get_keys() def _update_kwargs_with_config(self, boto3_method, config, kwargs=None): kwargs = kwargs or {} if config is not None: kwargs.update({ key: value for key, value in config.items() if key in self._get_action_arguments(boto3_method) }) return kwargs @lru_cache() def _get_action_arguments(self, action): if isinstance(action.__doc__, LazyLoadedDocstring): docs = action.__doc__._generate() else: docs = action.__doc__ return set( line.replace(':param ', '').strip().strip(':') for line in docs.splitlines() if line.startswith(':param ') ) def _boto3_method_with_parameters(self, boto3_method, config=None, args=(), kwargs=None): kwargs = self._update_kwargs_with_config(boto3_method, config, kwargs) return boto3_method(*args, **kwargs) def _boto3_method_with_extraargs( self, boto3_method, config=None, args=(), kwargs=None, extra_args=None, allowed_extra_args=()): kwargs = kwargs or {} extra_args = extra_args or {} if config is not None: extra_args.update({ key: value for key, value in config.items() if key in allowed_extra_args }) kwargs["ExtraArgs"] = extra_args return boto3_method(*args, **kwargs) def _update_smart_open_kwargs( self, dummy_object, resource, config, transport_params, smart_open_kwargs): """ New Smart-Open (>=5.1.0) api Doc: https://github.com/RaRe-Technologies/smart_open/blob/develop/MIGRATING_FROM_OLDER_VERSIONS.rst """ get_object_kwargs = self._update_kwargs_with_config( dummy_object.meta.client.get_object, config=config) create_multipart_upload_kwargs = self._update_kwargs_with_config( dummy_object.meta.client.create_multipart_upload, config=config) transport_params.update( client=resource.meta.client, client_kwargs={ 'S3.Client.create_multipart_upload': create_multipart_upload_kwargs, 'S3.Client.get_object': get_object_kwargs }, ) smart_open_kwargs.update( compression='disable', transport_params=transport_params, ) class _VersionedS3Accessor(_S3Accessor): def stat(self, path, *, follow_symlinks=True): if not follow_symlinks: raise NotImplementedError( f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') resource, _ = self.configuration_map.get_configuration(path) object_summary = resource.ObjectVersion(path.bucket, path.key, path.version_id).get() return StatResult( size=object_summary.get('ContentLength'), last_modified=object_summary.get('LastModified'), version_id=object_summary.get('VersionId'), ) def exists(self, path): resource, _ = self.configuration_map.get_configuration(path) bucket = resource.Bucket(path.bucket) key = path.key for obj in bucket.object_versions.filter(Prefix=key): key_match = (obj.key == key) or obj.key.startswith(key + path._flavour.sep) if key_match and (obj.version_id == path.version_id): return True return False def open(self, path, *, mode='r', buffering=-1, encoding=None, errors=None, newline=None): resource, config = self.configuration_map.get_configuration(path) smart_open_kwargs = { 'uri': "s3:/" + str(path), 'mode': mode, 'buffering': buffering, 'encoding': encoding, 'errors': errors, 'newline': newline, } transport_params = {'defer_seek': True, "version_id": path.version_id} dummy_object = resource.Object('bucket', 'key') self._update_smart_open_kwargs( dummy_object, resource, config, transport_params, smart_open_kwargs, ) file_object = smart_open.open(**smart_open_kwargs) return file_object class _PathNotSupportedMixin: _NOT_SUPPORTED_MESSAGE = '{method} is unsupported on S3 service' @classmethod def cwd(cls): """ cwd class method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = cls._NOT_SUPPORTED_MESSAGE.format(method=cls.cwd.__qualname__) raise NotImplementedError(message) @classmethod def home(cls): """ home class method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = cls._NOT_SUPPORTED_MESSAGE.format(method=cls.home.__qualname__) raise NotImplementedError(message) def chmod(self, mode, *, follow_symlinks=True): """ chmod method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.chmod.__qualname__) raise NotImplementedError(message) def expanduser(self): """ expanduser method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.expanduser.__qualname__) raise NotImplementedError(message) def lchmod(self, mode): """ lchmod method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.lchmod.__qualname__) raise NotImplementedError(message) def group(self): """ group method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.group.__qualname__) raise NotImplementedError(message) def is_block_device(self): """ is_block_device method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.is_block_device.__qualname__) raise NotImplementedError(message) def is_char_device(self): """ is_char_device method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.is_char_device.__qualname__) raise NotImplementedError(message) def lstat(self): """ lstat method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.lstat.__qualname__) raise NotImplementedError(message) def resolve(self): """ resolve method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.resolve.__qualname__) raise NotImplementedError(message) def symlink_to(self, *args, **kwargs): """ symlink_to method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.symlink_to.__qualname__) raise NotImplementedError(message) def hardlink_to(self, *args, **kwargs): """ hardlink_to method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.hardlink_to.__qualname__) raise NotImplementedError(message) def readlink(self): """ readlink method is unsupported on S3 service AWS S3 don't have this file system action concept """ message = self._NOT_SUPPORTED_MESSAGE.format(method=self.readlink.__qualname__) raise NotImplementedError(message) class _Selector: def __init__(self, path, *, pattern): self._path = path self._prefix, pattern = self._prefix_splitter(pattern) self._full_keys = self._calculate_full_or_just_folder(pattern) self._target_level = self._calculate_pattern_level(pattern) self.match = self._path._flavour.compile_pattern_parts(self._path, self._prefix, pattern, path.bucket) def select(self): for target in self._deep_cached_dir_scan(): target = f'{self._path._flavour.sep}{self._path.bucket}{target}' if self.match(target): yield type(self._path)(target) def _prefix_splitter(self, pattern): if not _is_wildcard_pattern(pattern): if self._path.key: return f'{self._path.key}{self._path._flavour.sep}{pattern}', '' return pattern, '' *_, pattern_parts = self._path._flavour.parse_parts((pattern,)) prefix = '' for index, part in enumerate(pattern_parts): if _is_wildcard_pattern(part): break prefix += f'{part}{self._path._flavour.sep}' if pattern.startswith(prefix): pattern = pattern.replace(prefix, '', 1) key_prefix = self._path.key if key_prefix: prefix = self._path._flavour.sep.join((key_prefix, prefix)) return prefix, pattern def _calculate_pattern_level(self, pattern): if '**' in pattern: return None if self._prefix: pattern = f'{self._prefix}{self._path._flavour.sep}{pattern}' *_, pattern_parts = self._path._flavour.parse_parts((pattern,)) return len(pattern_parts) def _calculate_full_or_just_folder(self, pattern): if '**' in pattern: return True *_, pattern_parts = self._path._flavour.parse_parts((pattern,)) for part in pattern_parts[:-1]: if '*' in part: return True return False def _deep_cached_dir_scan(self): cache = set() prefix_sep_count = self._prefix.count(self._path._flavour.sep) for key in self._path._accessor.iter_keys(self._path, prefix=self._prefix, full_keys=self._full_keys): key_sep_count = key.count(self._path._flavour.sep) + 1 key_parts = key.rsplit(self._path._flavour.sep, maxsplit=key_sep_count - prefix_sep_count) target_path_parts = key_parts[:self._target_level] target_path = '' for part in target_path_parts: if not part: continue target_path += f'{self._path._flavour.sep}{part}' if target_path in cache: continue yield target_path cache.add(target_path) _s3_flavour = _S3Flavour() _s3_accessor = _S3Accessor() _versioned_s3_accessor = _VersionedS3Accessor() def register_configuration_parameter( path: PureS3Path, *, parameters: Optional[dict] = None, resource: Optional[ServiceResource] = None, glob_new_algorithm: Optional[bool] = None): if not isinstance(path, PureS3Path): raise TypeError(f'path argument have to be a {PurePath} type. got {type(path)}') if parameters and not isinstance(parameters, dict): raise TypeError(f'parameters argument have to be a dict type. got {type(path)}') if parameters is None and resource is None and glob_new_algorithm is None: raise ValueError('user have to specify parameters or resource arguments') _s3_accessor.configuration_map.set_configuration( path, resource=resource, arguments=parameters, glob_new_algorithm=glob_new_algorithm) class PureS3Path(PurePath): """ PurePath subclass for AWS S3 service. S3 is not a file-system but we can look at it like a POSIX system. """ _flavour = _s3_flavour __slots__ = () @classmethod def from_uri(cls, uri: str): """ from_uri class method create a class instance from url >> from s3path import PureS3Path >> PureS3Path.from_uri('s3:///') << PureS3Path('//') """ if not uri.startswith('s3://'): raise ValueError('Provided uri seems to be no S3 URI!') unquoted_uri = unquote(uri) return cls(unquoted_uri[4:]) @property def bucket(self) -> str: """ The AWS S3 Bucket name, or '' """ self._absolute_path_validation() with suppress(ValueError): _, bucket, *_ = self.parts return bucket return '' @property def is_bucket(self) -> bool: """ Check if Path is a bucket """ return self.is_absolute() and self == PureS3Path(f"/{self.bucket}") @property def key(self) -> str: """ The AWS S3 Key name, or '' """ self._absolute_path_validation() key = self._flavour.sep.join(self.parts[2:]) return key @classmethod def from_bucket_key(cls, bucket: str, key: str): """ from_bucket_key class method create a class instance from bucket, key pair's >> from s3path import PureS3Path >> PureS3Path.from_bucket_key(bucket='', key='') << PureS3Path('//') """ bucket = cls(cls._flavour.sep, bucket) if len(bucket.parts) != 2: raise ValueError(f'bucket argument contains more then one path element: {bucket}') key = cls(key) if key.is_absolute(): key = key.relative_to('/') return bucket / key def as_uri(self) -> str: """ Return the path as a 's3' URI. """ return super().as_uri() def _absolute_path_validation(self): if not self.is_absolute(): raise ValueError('relative path have no bucket, key specification') class S3Path(_PathNotSupportedMixin, Path, PureS3Path): """ Path subclass for AWS S3 service. S3Path provide a Python convenient File-System/Path like interface for AWS S3 Service using boto3 S3 resource as a driver. If boto3 isn't installed in your environment NotImplementedError will be raised. """ _accessor = _s3_accessor __slots__ = () def _init(self, template=None): super()._init(template) if template is None: self._accessor = _s3_accessor def stat(self, *, follow_symlinks: bool = True) -> StatResult: """ Returns information about this path (similarly to boto3's ObjectSummary). For compatibility with pathlib, the returned object some similar attributes like os.stat_result. The result is looked up at each call to this method """ if not follow_symlinks: raise NotImplementedError( f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.') self._absolute_path_validation() if not self.key: return None return self._accessor.stat(self, follow_symlinks=follow_symlinks) def exists(self) -> bool: """ Whether the path points to an existing Bucket, key or key prefix. """ self._absolute_path_validation() if not self.bucket: return True return self._accessor.exists(self) def is_dir(self) -> bool: """ Returns True if the path points to a Bucket or a key prefix, False if it points to a full key path. False is also returned if the path doesn’t exist. Other errors (such as permission errors) are propagated. """ self._absolute_path_validation() if self.bucket and not self.key: return True return self._accessor.is_dir(self) def is_file(self) -> bool: """ Returns True if the path points to a Bucket key, False if it points to Bucket or a key prefix. False is also returned if the path doesn’t exist. Other errors (such as permission errors) are propagated. """ self._absolute_path_validation() if not self.bucket or not self.key: return False try: return bool(self.stat()) except ClientError: return False def iterdir(self) -> Generator[S3Path, None, None]: """ When the path points to a Bucket or a key prefix, yield path objects of the directory contents """ self._absolute_path_validation() for name in self._accessor.listdir(self): yield self._make_child_relpath(name) def glob(self, pattern: str) -> Generator[S3Path, None, None]: """ Glob the given relative pattern in the Bucket / key prefix represented by this path, yielding all matching files (of any kind) """ self._absolute_path_validation() general_options = self._accessor.configuration_map.get_general_options(self) glob_new_algorithm = general_options['glob_new_algorithm'] if not glob_new_algorithm: yield from super().glob(pattern) return yield from self._glob(pattern) def _glob(self, pattern): """ Glob with new Algorithm that better fit S3 API """ sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError(f'Unacceptable pattern: {pattern}') drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") for part in pattern_parts: if part != '**' and '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") selector = _Selector(self, pattern=pattern) yield from selector.select() def _scandir(self): """ Override _scandir so _Selector will rely on an S3 compliant implementation """ return self._accessor.scandir(self) def rglob(self, pattern: str) -> Generator[S3Path, None, None]: """ This is like calling S3Path.glob with "**/" added in front of the given relative pattern """ self._absolute_path_validation() general_options = self._accessor.configuration_map.get_general_options(self) glob_new_algorithm = general_options['glob_new_algorithm'] if not glob_new_algorithm: yield from super().rglob(pattern) return yield from self._rglob(pattern) def _rglob(self, pattern): """ RGlob with new Algorithm that better fit S3 API """ sys.audit("pathlib.Path.rglob", self, pattern) if not pattern: raise ValueError(f'Unacceptable pattern: {pattern}') drv, root, pattern_parts = self._flavour.parse_parts((pattern,)) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") for part in pattern_parts: if part != '**' and '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") pattern = f'**{self._flavour.sep}{pattern}' selector = _Selector(self, pattern=pattern) yield from selector.select() def open( self, mode: Literal["r", "w", "rb", "wb"] = 'r', buffering: int = DEFAULT_BUFFER_SIZE, encoding: Optional[str] = None, errors: Optional[str] = None, newline: Optional[str] = None ) -> Union[TextIOWrapper, smart_open.s3.Reader, smart_open.s3.MultipartWriter]: """ Opens the Bucket key pointed to by the path, returns a Key file object that you can read/write with """ self._absolute_path_validation() return self._accessor.open( self, mode=mode, buffering=buffering, encoding=encoding, errors=errors, newline=newline) def owner(self) -> str: """ Returns the name of the user owning the Bucket or key. Similarly to boto3's ObjectSummary owner attribute """ self._absolute_path_validation() if not self.is_file(): return KeyError('file not found') return self._accessor.owner(self) def rename(self, target: Union[str, S3Path]) -> S3Path: """ Renames this file or Bucket / key prefix / key to the given target. If target exists and is a file, it will be replaced silently if the user has permission. If path is a key prefix, it will replace all the keys with the same prefix to the new target prefix. Target can be either a string or another S3Path object. """ self._absolute_path_validation() if not isinstance(target, type(self)): target = type(self)(target) target._absolute_path_validation() self._accessor.rename(self, target) return self.__class__(target) def replace(self, target: Union[str, S3Path]) -> S3Path: """ Renames this Bucket / key prefix / key to the given target. If target points to an existing Bucket / key prefix / key, it will be unconditionally replaced. """ return self.rename(target) def unlink(self, missing_ok: bool = False): """ Remove this key from its bucket. """ self._absolute_path_validation() # S3 doesn't care if you remove full prefixes or buckets with its delete API # so unless we manually check, this call will be dropped through without any # validation and could result in data loss try: if self.is_dir(): raise IsADirectoryError(str(self)) if not self.is_file(): raise FileNotFoundError(str(self)) except (IsADirectoryError, FileNotFoundError): if missing_ok: return raise try: # XXX: Note: If we don't check if the file exists here, S3 will always return # success even if we try to delete a key that doesn't exist. So, if we want # to raise a `FileNotFoundError`, we need to manually check if the file exists # before we make the API call -- since we want to delete the file anyway, # we can just ignore this for now and be satisfied that the file will be removed self._accessor.unlink(self) except FileNotFoundError: if not missing_ok: raise def rmdir(self): """ Removes this Bucket / key prefix. The Bucket / key prefix must be empty """ self._absolute_path_validation() if self.is_file(): raise NotADirectoryError() if not self.is_dir(): raise FileNotFoundError() self._accessor.rmdir(self) def samefile(self, other_path: Union[str, S3Path]) -> bool: """ Returns whether this path points to the same Bucket key as other_path, Which can be either a Path object, or a string """ self._absolute_path_validation() if not isinstance(other_path, Path): other_path = type(self)(other_path) return self.bucket == other_path.bucket and self.key == other_path.key and self.is_file() def touch(self, mode: int = 0o666, exist_ok: bool = True): """ Creates a key at this given path. If the key already exists, the function succeeds if exist_ok is true (and its modification time is updated to the current time), otherwise FileExistsError is raised """ if self.exists() and not exist_ok: raise FileExistsError() self.write_text('') def mkdir(self, mode: int = 0o777, parents: bool = False, exist_ok: bool = False): """ Create a path bucket. AWS S3 Service doesn't support folders, therefore the mkdir method will only create the current bucket. If the bucket path already exists, FileExistsError is raised. If exist_ok is false (the default), FileExistsError is raised if the target Bucket already exists. If exist_ok is true, OSError exceptions will be ignored. if parents is false (the default), mkdir will create the bucket only if this is a Bucket path. if parents is true, mkdir will create the bucket even if the path have a Key path. mode argument is ignored. """ try: if not self.bucket: raise FileNotFoundError(f'No bucket in {type(self)} {self}') if self.key and not parents: raise FileNotFoundError(f'Only bucket path can be created, got {self}') if type(self)(self._flavour.sep, self.bucket).exists(): raise FileExistsError(f'Bucket {self.bucket} already exists') self._accessor.mkdir(self, mode) except OSError: if not exist_ok: raise def is_mount(self) -> Literal[False]: """ AWS S3 Service doesn't have mounting feature, There for this method will always return False """ return False def is_symlink(self) -> Literal[False]: """ AWS S3 Service doesn't have symlink feature, There for this method will always return False """ return False def is_socket(self) -> Literal[False]: """ AWS S3 Service doesn't have sockets feature, There for this method will always return False """ return False def is_fifo(self) -> Literal[False]: """ AWS S3 Service doesn't have fifo feature, There for this method will always return False """ return False def absolute(self) -> S3Path: """ Handle absolute method only if the path is already an absolute one since we have no way to compute an absolute path from a relative one in S3. """ if self.is_absolute(): return self # We can't compute the absolute path from a relative one raise ValueError("Absolute path can't be determined for relative S3Path objects") def get_presigned_url(self, expire_in: Union[timedelta, int] = 3600) -> str: """ Returns a pre-signed url. Anyone with the url can make a GET request to get the file. You can set an expiration date with the expire_in argument (integer or timedelta object). Note that generating a presigned url may require more information or setup than to use other S3Path functions. It's because it needs to know the exact aws region and use s3v4 as signature version. Meaning you may have to do this: ```python import boto3 from botocore.config import Config from s3path import S3Path, register_configuration_parameter resource = boto3.resource( "s3", config=Config(signature_version="s3v4"), region_name="the aws region name" ) register_configuration_parameter(S3Path("/"), resource=resource) ``` A simple example: ```python from s3path import S3Path import requests file = S3Path("/my-bucket/toto.txt") file.write_text("hello world") presigned_url = file.get_presigned_url() print(requests.get(presigned_url).content) b"hello world" """ self._absolute_path_validation() if isinstance(expire_in, timedelta): expire_in = int(expire_in.total_seconds()) if expire_in <= 0: raise ValueError( f"The expire_in argument can't represent a negative or null time delta. " f"You provided expire_in = {expire_in} seconds which is below or equal to 0 seconds.") return self._accessor.get_presigned_url(self, expire_in) class PureVersionedS3Path(PureS3Path): """ PurePath subclass for AWS S3 service Keys with Versions. S3 is not a file-system, but we can look at it like a POSIX system. """ def __new__(cls, *args, version_id: str): self = super().__new__(cls, *args) self.version_id = version_id return self @classmethod def from_uri(cls, uri: str, *, version_id: str): """ from_uri class method creates a class instance from uri and version id >> from s3path import VersionedS3Path >> VersionedS3Path.from_uri('s3:///', version_id='') << VersionedS3Path('//', version_id='') """ self = PureS3Path.from_uri(uri) return cls(self, version_id=version_id) @classmethod def from_bucket_key(cls, bucket: str, key: str, *, version_id: str): """ from_bucket_key class method creates a class instance from bucket, key and version id >> from s3path import VersionedS3Path >> VersionedS3Path.from_bucket_key('', '', version_id='') << VersionedS3Path('//', version_id='') """ self = PureS3Path.from_bucket_key(bucket=bucket, key=key) return cls(self, version_id=version_id) def __repr__(self) -> str: return f'{type(self).__name__}({self.as_posix()}, version_id={self.version_id})' def joinpath(self, *args): if not args: return self new_path = super().joinpath(*args) if isinstance(args[-1], PureVersionedS3Path): new_path.version_id = args[-1].version_id else: new_path = S3Path(new_path) return new_path def __truediv__(self, key): if not isinstance(key, (PureS3Path, str)): return NotImplemented key = S3Path(key) if isinstance(key, str) else key return key.__rtruediv__(self) def __rtruediv__(self, key): if not isinstance(key, (PureS3Path, str)): return NotImplemented new_path = super().__rtruediv__(key) new_path.version_id = self.version_id return new_path class VersionedS3Path(PureVersionedS3Path, S3Path): """ S3Path subclass for AWS S3 service Keys with Versions. >> from s3path import VersionedS3Path >> VersionedS3Path('//', version_id='') << VersionedS3Path('//', version_id='') """ _accessor = _versioned_s3_accessor def _init(self, template=None): super()._init(template) if template is None: self._accessor = _versioned_s3_accessor class StatResult(namedtuple('BaseStatResult', 'size, last_modified, version_id', defaults=(None,))): """ Base of os.stat_result but with boto3 s3 features """ def __getattr__(self, item): if item in vars(stat_result): raise UnsupportedOperation(f'{type(self).__name__} do not support {item} attribute') return super().__getattribute__(item) @property def st_size(self) -> int: return self.size @property def st_mtime(self) -> float: return self.last_modified.timestamp() @property def st_version_id(self) -> str: return self.version_id class _S3DirEntry: def __init__(self, name, is_dir, size=None, last_modified=None): self.name = name self._is_dir = is_dir self._stat = StatResult(size=size, last_modified=last_modified) def __repr__(self): return f'{type(self).__name__}(name={self.name}, is_dir={self._is_dir}, stat={self._stat})' def inode(self, *args, **kwargs): return None def is_dir(self, follow_symlinks=False): if follow_symlinks: raise TypeError('AWS S3 Service does not have symlink feature') return self._is_dir def is_file(self): return not self._is_dir def is_symlink(self, *args, **kwargs): return False def stat(self): return self._stat ================================================ FILE: s3path/py.typed ================================================ # Marker file for PEP 561. The mypy package uses inline types. ================================================ FILE: setup.cfg ================================================ [bdist_rpm] doc_files = LICENSE README.rst [metadata] license_files = LICENSE ================================================ FILE: setup.py ================================================ #!/usr/bin/env python from setuptools import setup with open("README.rst", "r") as fh: long_description = fh.read() setup( name='s3path', version='0.6.5', url='https://github.com/liormizr/s3path', author='Lior Mizrahi', author_email='li.mizr@gmail.com', packages=['s3path'], package_data={'s3path': ["py.typed"]}, install_requires=['boto3>=1.16.35','smart-open>=5.1.0',], license='Apache 2.0', long_description=long_description, long_description_content_type='text/x-rst', python_requires='>=3.9', include_package_data=True, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', 'Natural Language :: English', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', ], ) ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/conftest.py ================================================ import sys import boto3 import pytest from moto import mock_aws from s3path import register_configuration_parameter, PureS3Path if sys.version_info >= (3, 12): from s3path import accessor def _cleanup(): accessor.configuration_map.get_configuration.cache_clear() accessor.configuration_map.get_general_options.cache_clear() accessor.configuration_map.is_setup = False else: from s3path import S3Path def _cleanup(): S3Path._accessor.configuration_map.get_configuration.cache_clear() S3Path._accessor.configuration_map.get_general_options.cache_clear() S3Path._accessor.configuration_map.is_setup = False @pytest.fixture() def reset_configuration_cache(): try: _cleanup() yield finally: _cleanup() @pytest.fixture() def s3_mock(reset_configuration_cache): with mock_aws(): register_configuration_parameter(PureS3Path('/'), resource=boto3.resource('s3')) yield ================================================ FILE: tests/test_not_supported.py ================================================ import pytest from s3path import S3Path def test_cwd(): with pytest.raises(NotImplementedError): S3Path.cwd() def test_expanduser(): with pytest.raises(NotImplementedError): S3Path('/').expanduser() def test_readlink(): with pytest.raises(NotImplementedError): S3Path('/').readlink() def test_home(): with pytest.raises(NotImplementedError): S3Path.home() def test_chmod(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.chmod(0o666) def test_lchmod(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.lchmod(0o666) def test_group(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.group() def test_is_mount(): assert not S3Path('/fake-bucket/fake-key').is_mount() def test_is_symlink(): assert not S3Path('/fake-bucket/fake-key').is_symlink() def test_is_socket(): assert not S3Path('/fake-bucket/fake-key').is_socket() def test_is_fifo(): assert not S3Path('/fake-bucket/fake-key').is_fifo() def test_is_block_device(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.is_block_device() def test_is_char_device(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.is_char_device() def test_lstat(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.lstat() def test_resolve(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.resolve() def test_symlink_to(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.symlink_to('file_name') def test_stat(): path = S3Path('/fake-bucket/fake-key') with pytest.raises(NotImplementedError): path.stat(follow_symlinks=False) ================================================ FILE: tests/test_path_operations.py ================================================ import shutil import sys from datetime import timedelta from pathlib import Path, PosixPath from io import UnsupportedOperation from tempfile import NamedTemporaryFile import boto3 import requests from botocore.exceptions import ClientError import pytest from s3path import PureS3Path, S3Path, StatResult, VersionedS3Path # todo: test samefile/touch method # todo: test security and boto config changes def test_path_support(): assert PureS3Path in S3Path.mro() assert Path in S3Path.mro() def test_stat(s3_mock): path = S3Path('fake-bucket/fake-key') with pytest.raises(ValueError): path.stat() path = S3Path('/fake-bucket/fake-key') with pytest.raises(ClientError): path.stat() s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/Test.test') stat = path.stat() assert isinstance(stat, StatResult) assert stat == StatResult( size=object_summary.size, last_modified=object_summary.last_modified, ) with NamedTemporaryFile() as local_file: local_file.write(path.read_bytes()) local_file.flush() local_path = Path(local_file.name) local_stat = local_path.stat() s3_stat = path.stat() assert s3_stat.st_size == local_stat.st_size == s3_stat.size assert s3_stat.last_modified.timestamp() == s3_stat.st_mtime assert s3_stat.st_mtime < local_stat.st_mtime with pytest.raises(UnsupportedOperation): path.stat().st_atime path = S3Path('/test-bucket') assert path.stat() is None def test_exists(s3_mock): path = S3Path('./fake-key') with pytest.raises(ValueError): path.exists() path = S3Path('/fake-bucket/fake-key') with pytest.raises(ClientError): path.exists() s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') assert not S3Path('/test-bucket/Test.test').exists() path = S3Path('/test-bucket/directory/Test.test') assert path.exists() for parent in path.parents: assert parent.exists() assert S3Path('/').exists() def test_glob(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') assert list(S3Path('/test-bucket/').glob('*.test')) == [] assert list(S3Path('/test-bucket/directory/').glob('*.test')) == [S3Path('/test-bucket/directory/Test.test')] assert list(S3Path('/test-bucket/').glob('**/*.test')) == [S3Path('/test-bucket/directory/Test.test')] object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert sorted(S3Path.from_uri('s3://test-bucket/').glob('*.py')) == [ S3Path('/test-bucket/pathlib.py'), S3Path('/test-bucket/setup.py'), S3Path('/test-bucket/test_pathlib.py')] assert sorted(S3Path.from_uri('s3://test-bucket/').glob('*/*.py')) == [S3Path('/test-bucket/docs/conf.py')] assert sorted(S3Path.from_uri('s3://test-bucket/').glob('**/*.py')) == [ S3Path('/test-bucket/build/lib/pathlib.py'), S3Path('/test-bucket/docs/conf.py'), S3Path('/test-bucket/pathlib.py'), S3Path('/test-bucket/setup.py'), S3Path('/test-bucket/test_pathlib.py')] assert sorted(S3Path.from_uri('s3://test-bucket/').glob('*cs')) == [S3Path('/test-bucket/docs/')] assert sorted(S3Path.from_uri('s3://test-bucket/').glob('docs/')) == [S3Path('/test-bucket/docs/')] def test_glob_nested_folders_issue_no_115(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='my-bucket') full_folder_tree = '' object_summary = s3.ObjectSummary('my-bucket', 'test.txt') object_summary.put(Body=b'test data') for folder in range(6): object_summary = s3.ObjectSummary('my-bucket', f'{full_folder_tree}test.txt') object_summary.put(Body=b'test data') full_folder_tree += f'{folder}/' bucket = S3Path("/my-bucket/") path = bucket assert list(path.glob('*.txt')) == [S3Path('/my-bucket/test.txt')] path /= S3Path('0/') assert list(path.glob('*.txt')) == [S3Path('/my-bucket/0/test.txt')] path /= S3Path('1/') assert list(path.glob('*.txt')) == [S3Path('/my-bucket/0/1/test.txt')] path /= S3Path('2/') assert list(path.glob('*.txt')) == [S3Path('/my-bucket/0/1/2/test.txt')] path /= S3Path('3/') assert list(path.glob('*.txt')) == [S3Path('/my-bucket/0/1/2/3/test.txt')] path /= S3Path('4/') assert list(path.glob('*.txt')) == [S3Path('/my-bucket/0/1/2/3/4/test.txt')] bucket = S3Path("/my-bucket/") path = bucket for index, folder in enumerate(range(6)): assert sum(1 for _ in path.rglob('*.txt')) == 6 - index path /= S3Path(f'{folder}/') def test_glob_nested_folders_issue_no_120(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='my-bucket') object_summary = s3.ObjectSummary('my-bucket', 's3path-test/nested/further/test.txt') object_summary.put(Body=b'test data') path = S3Path("/my-bucket/s3path-test/nested/") assert list(path.glob("further/*")) == [S3Path('/my-bucket/s3path-test/nested/further/test.txt')] def test_glob_issue_160(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='my-bucket') example_paths = [ 's3path/output', 's3path/1/output', 's3path/2/output', 's3path/3/output', ] for example_path in example_paths: object_summary = s3.ObjectSummary('my-bucket', f'{example_path}/test.txt') object_summary.put(Body=b'test data') path = S3Path.from_uri("s3://my-bucket/s3path") assert set(path.glob('**/output/')) == { S3Path('/my-bucket/s3path/output'), S3Path('/my-bucket/s3path/1/output'), S3Path('/my-bucket/s3path/2/output'), S3Path('/my-bucket/s3path/3/output'), } assert sum(1 for _ in path.glob('**/output/')) == 4 assert set(path.rglob('output/')) == { S3Path('/my-bucket/s3path/output'), S3Path('/my-bucket/s3path/1/output'), S3Path('/my-bucket/s3path/2/output'), S3Path('/my-bucket/s3path/3/output'), } assert sum(1 for _ in path.rglob('output/')) == 4 def test_glob_issue_160_weird_behavior(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='my-bucket') first_dir = S3Path.from_uri(f"s3://my-bucket/first_dir/") new_file = first_dir / "some_dir" / "empty.txt" new_file.touch() assert list(first_dir.glob("*")) == [S3Path('/my-bucket/first_dir/some_dir/')] second_dir = S3Path.from_uri(f"s3://my-bucket/first_dir/second_dir/") new_file = second_dir / "some_dir" / "empty.txt" new_file.touch() assert list(second_dir.glob("*")) == [S3Path('/my-bucket/first_dir/second_dir/some_dir/')] third_dir = S3Path.from_uri(f"s3://my-bucket/first_dir/second_dir/third_dir/") new_file = third_dir / "some_dir" / "empty.txt" new_file.touch() assert list(third_dir.glob("*")) == [S3Path('/my-bucket/first_dir/second_dir/third_dir/some_dir/')] def test_glob_nested_folders_issue_no_179(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='my-bucket') example_paths = [ 's3path/nested/further/andfurther/too_far_1.txt', 's3path/nested/further/andfurther/too_far_2.txt', ] for example_path in example_paths: object_summary = s3.ObjectSummary('my-bucket', f'{example_path}/test.txt') object_summary.put(Body=b'test data') path = S3Path.from_uri("s3://my-bucket/s3path/nested") assert list(path.glob("*/*")) == [ S3Path('/my-bucket/s3path/nested/further/andfurther')] def test_rglob(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') assert list(S3Path('/test-bucket/').rglob('*.test')) == [S3Path('/test-bucket/directory/Test.test')] assert list(S3Path('/test-bucket/').rglob('**/*.test')) == [S3Path('/test-bucket/directory/Test.test')] object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert sorted(S3Path.from_uri('s3://test-bucket/').rglob('*.py')) == [ S3Path('/test-bucket/build/lib/pathlib.py'), S3Path('/test-bucket/docs/conf.py'), S3Path('/test-bucket/pathlib.py'), S3Path('/test-bucket/setup.py'), S3Path('/test-bucket/test_pathlib.py')] def test_accessor_scandir(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert sorted(S3Path.from_uri('s3://test-bucket/').rglob('*.py')) == [ S3Path('/test-bucket/build/lib/pathlib.py'), S3Path('/test-bucket/docs/conf.py'), S3Path('/test-bucket/pathlib.py'), S3Path('/test-bucket/setup.py'), S3Path('/test-bucket/test_pathlib.py')] def test_is_dir(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert S3Path('/').is_dir() assert not S3Path('/test-bucket/fake.test').is_dir() assert not S3Path('/test-bucket/fake/').is_dir() assert S3Path('/test-bucket/directory').is_dir() assert not S3Path('/test-bucket/directory/Test.test').is_dir() assert not S3Path('/test-bucket/pathlib.py').is_dir() assert not S3Path('/test-bucket/docs/conf.py').is_dir() assert S3Path('/test-bucket/docs/').is_dir() assert S3Path('/test-bucket/build/').is_dir() assert S3Path('/test-bucket/build/lib').is_dir() assert not S3Path('/test-bucket/build/lib/pathlib.py').is_dir() def test_is_file(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') assert not S3Path('/test-bucket/fake.test').is_file() assert not S3Path('/test-bucket/fake/').is_file() assert not S3Path('/test-bucket/directory').is_file() assert S3Path('/test-bucket/directory/Test.test').is_file() assert S3Path('/test-bucket/pathlib.py').is_file() assert S3Path('/test-bucket/docs/conf.py').is_file() assert not S3Path('/test-bucket/docs/').is_file() assert not S3Path('/test-bucket/build/').is_file() assert not S3Path('/test-bucket/build/lib').is_file() assert S3Path('/test-bucket/build/lib/pathlib.py').is_file() def test_read_line(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data') with S3Path('/test-bucket/directory/Test.test').open("r") as fp: assert fp.readline() == "test data\n" assert fp.readline() == "test data" assert fp.readline() == "" def test_read_lines(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data') with S3Path('/test-bucket/directory/Test.test').open("r") as fp: assert len(fp.readlines()) == 2 def test_fix_url_encoding_issue(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'paramA=valueA/paramB=valueB/name') object_summary.put(Body=b'test data\ntest data') assert S3Path('/test-bucket/paramA=valueA/paramB=valueB/name').read_bytes() == b'test data\ntest data' def test_read_lines_hint(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data') with S3Path('/test-bucket/directory/Test.test').open() as fp: assert len(fp.readlines(1)) == 1 with S3Path('/test-bucket/directory/Test.test').open('br') as fp: assert len(fp.readlines(1)) == 1 # work only in binary mode def test_iter_lines(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data\ntest data\n') with S3Path('/test-bucket/directory/Test.test').open("r") as fp: for line in fp: assert line == "test data\n" def test_write_lines(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') path = S3Path('/test-bucket/directory/Test.test') with path.open("w") as fp: fp.writelines(["line 1\n", "line 2\n"]) res = path.read_text().splitlines() assert len(res) == 2 def test_iterdir(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'setup.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'test_pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'build/lib/pathlib.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/make.bat') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/index.rst') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/Makefile') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_templates/11conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_build/22conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_static/conf.py') object_summary.put(Body=b'test data') s3_path = S3Path('/test-bucket/docs') assert sorted(s3_path.iterdir()) == sorted([ S3Path('/test-bucket/docs/_build'), S3Path('/test-bucket/docs/_static'), S3Path('/test-bucket/docs/_templates'), S3Path('/test-bucket/docs/conf.py'), S3Path('/test-bucket/docs/index.rst'), S3Path('/test-bucket/docs/make.bat'), S3Path('/test-bucket/docs/Makefile'), ]) def test_iterdir_on_buckets(s3_mock): s3 = boto3.resource('s3') for index in range(4): s3.create_bucket(Bucket='test-bucket{}'.format(index)) s3_root_path = S3Path('/') assert sorted(s3_root_path.iterdir()) == [ S3Path('/test-bucket{}'.format(index)) for index in range(4) ] def test_empty_directory(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') assert list(S3Path('/test-bucket').iterdir()) == [] s3.meta.client.put_object(Bucket='test-bucket', Key='to/empty/dir/') assert list(S3Path('/test-bucket/to/empty/dir/').iterdir()) == [] def test_open_for_reading(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') file_obj = path.open() assert file_obj.read() == 'test data' def test_presigned_url(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') presigned_url = path.get_presigned_url() assert requests.get(presigned_url).content == b'test data' def test_presigned_url_expire(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') presigned_url = path.get_presigned_url(expire_in=123) assert requests.get(presigned_url).content == b'test data' def test_presigned_url_expire_with_timedelta(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') presigned_url = path.get_presigned_url(expire_in=timedelta(seconds=123)) assert requests.get(presigned_url).content == b'test data' def test_presigned_url_expire_with_negative_timedelta(s3_mock): path = S3Path('/test-bucket/directory/Test.test') with pytest.raises(ValueError) as err: path.get_presigned_url(expire_in=timedelta(seconds=-123)) assert str(err.value) == ( "The expire_in argument can't represent a negative or null time delta. " "You provided expire_in = -123 seconds which is below or equal to 0 seconds." ) def test_presigned_url_expire_with_negative_seconds(s3_mock): path = S3Path('/test-bucket/directory/Test.test') with pytest.raises(ValueError) as err: path.get_presigned_url(expire_in=-123) assert str(err.value) == ( "The expire_in argument can't represent a negative or null time delta. " "You provided expire_in = -123 seconds which is below or equal to 0 seconds." ) def test_presigned_url_malformed_path(s3_mock): path = S3Path('Test.test') with pytest.raises(ValueError) as err: path.get_presigned_url(expire_in=timedelta(seconds=123)) assert str(err.value) == "relative path have no bucket, key specification" def test_open_for_write(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') bucket = s3.Bucket('test-bucket') assert sum(1 for _ in bucket.objects.all()) == 0 path = S3Path('/test-bucket/directory/Test.test') with path.open(mode='bw') as file_obj: assert file_obj.writable() file_obj.write(b'test data\n') file_obj.writelines([b'test data']) assert sum(1 for _ in bucket.objects.all()) == 1 object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') streaming_body = object_summary.get()['Body'] assert list(streaming_body.iter_lines()) == [ b'test data', b'test data' ] def test_open_binary_read(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') with path.open(mode='br') as file_obj: assert file_obj.readlines() == [b'test data'] with path.open(mode='rb') as file_obj: assert file_obj.readline() == b'test data' assert file_obj.readline() == b'' assert file_obj.readline() == b'' def test_read_bytes(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') assert path.read_bytes() == b'test data' def test_open_text_read(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') with path.open(mode='r') as file_obj: assert file_obj.readlines() == ['test data'] with path.open(mode='rt') as file_obj: assert file_obj.readline() == 'test data' assert file_obj.readline() == '' assert file_obj.readline() == '' def test_read_text(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') assert path.read_text() == 'test data' def test_owner(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'directory/Test.test') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/directory/Test.test') assert path.owner() == 'webfile' def test_rename_s3_to_s3(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/make.bat') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/index.rst') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/Makefile') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_templates/11conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_build/22conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_static/conf.py') object_summary.put(Body=b'test data') s3.create_bucket(Bucket='target-bucket') S3Path('/test-bucket/docs/conf.py').rename('/test-bucket/docs/conf1.py') assert not S3Path('/test-bucket/docs/conf.py').exists() assert S3Path('/test-bucket/docs/conf1.py').is_file() path = S3Path('/test-bucket/docs/') path.rename(S3Path('/target-bucket') / S3Path('folder')) assert not path.exists() assert S3Path('/target-bucket/folder/conf1.py').is_file() assert S3Path('/target-bucket/folder/make.bat').is_file() assert S3Path('/target-bucket/folder/index.rst').is_file() assert S3Path('/target-bucket/folder/Makefile').is_file() assert S3Path('/target-bucket/folder/_templates/11conf.py').is_file() assert S3Path('/target-bucket/folder/_build/22conf.py').is_file() assert S3Path('/target-bucket/folder/_static/conf.py').is_file() def test_replace_s3_to_s3(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/make.bat') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/index.rst') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/Makefile') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_templates/11conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_build/22conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_static/conf.py') object_summary.put(Body=b'test data') s3.create_bucket(Bucket='target-bucket') S3Path('/test-bucket/docs/conf.py').replace('/test-bucket/docs/conf1.py') assert not S3Path('/test-bucket/docs/conf.py').exists() assert S3Path('/test-bucket/docs/conf1.py').is_file() path = S3Path('/test-bucket/docs/') path.replace(S3Path('/target-bucket') / S3Path('folder')) assert not path.exists() assert S3Path('/target-bucket/folder/conf1.py').is_file() assert S3Path('/target-bucket/folder/make.bat').is_file() assert S3Path('/target-bucket/folder/index.rst').is_file() assert S3Path('/target-bucket/folder/Makefile').is_file() assert S3Path('/target-bucket/folder/_templates/11conf.py').is_file() assert S3Path('/target-bucket/folder/_build/22conf.py').is_file() assert S3Path('/target-bucket/folder/_static/conf.py').is_file() def test_rmdir(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'docs/conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/make.bat') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/index.rst') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/Makefile') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_templates/11conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_build/22conf.py') object_summary.put(Body=b'test data') object_summary = s3.ObjectSummary('test-bucket', 'docs/_static/conf.py') object_summary.put(Body=b'test data') conf_path = S3Path('/test-bucket/docs/_templates') assert conf_path.is_dir() conf_path.rmdir() assert not conf_path.exists() path = S3Path('/test-bucket/docs/') path.rmdir() assert not path.exists() def test_rmdir_can_remove_bucket(s3_mock): s3 = boto3.resource('s3') bucket = S3Path('/test-bucket/') bucket.mkdir() assert bucket.exists() bucket.rmdir() assert not bucket.exists() def test_mkdir(s3_mock): s3 = boto3.resource('s3') S3Path('/test-bucket/').mkdir() assert s3.Bucket('test-bucket') in s3.buckets.all() S3Path('/test-bucket/').mkdir(exist_ok=True) with pytest.raises(FileExistsError): S3Path('/test-bucket/').mkdir(exist_ok=False) with pytest.raises(FileNotFoundError): S3Path('/test-second-bucket/test-directory/file.name').mkdir() S3Path('/test-second-bucket/test-directory/file.name').mkdir(parents=True) assert s3.Bucket('test-second-bucket') in s3.buckets.all() def test_write_text(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'temp_key') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/temp_key') data = path.read_text() assert isinstance(data, str) path.write_text(data) assert path.read_text() == data def test_write_bytes(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'temp_key') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/temp_key') data = path.read_bytes() assert isinstance(data, bytes) path.write_bytes(data) assert path.read_bytes() == data def test_unlink(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') object_summary = s3.ObjectSummary('test-bucket', 'temp_key') object_summary.put(Body=b'test data') path = S3Path('/test-bucket/temp_key') subdir_key = S3Path('/test-bucket/fake_folder/some_key') subdir_key.write_text("some text") assert path.exists() is True assert subdir_key.exists() is True path.unlink() assert path.exists() is False with pytest.raises(FileNotFoundError): S3Path("/test-bucket/fake_subfolder/fake_subkey").unlink() with pytest.raises(IsADirectoryError): S3Path("/test-bucket/fake_folder").unlink() with pytest.raises(IsADirectoryError): S3Path("/fake-bucket/").unlink() S3Path("/test-bucket/fake_subfolder/fake_subkey").unlink(missing_ok=True) S3Path("/test-bucket/fake_folder").unlink(missing_ok=True) S3Path("/fake-bucket/").unlink(missing_ok=True) def test_absolute(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') absolute_path = S3Path('/test-bucket/directory/Test.test') assert absolute_path.absolute() is absolute_path relative_path = S3Path('./Test.test') with pytest.raises(ValueError): relative_path.absolute() def test_versioned_bucket(s3_mock): bucket, key = 'test-versioned-bucket', 'versioned_file.txt' s3 = boto3.resource('s3') s3.create_bucket(Bucket=bucket) s3.BucketVersioning(bucket).enable() object_summary = s3.ObjectSummary(bucket, key) file_contents_by_version = (b'Test', b'Test updated', b'Test', b'Test final') version_id_to_file_content = {} for file_content in file_contents_by_version: version_id = object_summary.put(Body=file_content).get('VersionId') version_id_to_file_content[version_id] = file_content assert len(version_id_to_file_content) == len(file_contents_by_version) # Test that we can read specific versions of the file for version_id, expected_file_content in version_id_to_file_content.items(): versioned_paths = ( VersionedS3Path(f'/{bucket}/{key}', version_id=version_id), VersionedS3Path(f'/{bucket}', f'{key}', version_id=version_id), VersionedS3Path.from_uri(f's3://{bucket}/{key}', version_id=version_id), VersionedS3Path.from_bucket_key(bucket=bucket, key=key, version_id=version_id), ) for versioned_path in versioned_paths: assert versioned_path.exists() and versioned_path.is_file() assert versioned_path.stat().st_version_id == version_id assert versioned_path.read_bytes() == expected_file_content # Test that we receive the latest version of the file when S3Path is used or no version_id is specified paths = ( S3Path(f'/{bucket}/{key}'), S3Path(f'/{bucket}', f'{key}'), S3Path.from_uri(f's3://{bucket}/{key}'), S3Path.from_bucket_key(bucket=bucket, key=key), ) for path in paths: assert not isinstance(path, VersionedS3Path) assert path.read_bytes() == file_contents_by_version[-1] def test_buffered_copy(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') data = b'test data' * 10_000_000 source_path = S3Path('/test-bucket/source') source_path.write_bytes(data) target_path = S3Path('/test-bucket/target') with source_path.open('rb') as source, target_path.open('wb') as target: shutil.copyfileobj(source, target) assert target_path.read_bytes() == data @pytest.mark.skipif(sys.version_info < (3, 12), reason="requires python 3.12 or higher") def test_walk(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') walk_test_results = [ (PosixPath('.'), ['.pytest_cache', 'tests', 'docs', 's3path', '.github', '.git', 's3path.egg-info', '.idea'], ['LICENSE', 'Makefile', 'MANIFEST.in', 'Pipfile', 'setup.py', '.gitignore', 'setup.cfg', 'README.rst', 'Pipfile.lock']), (PosixPath('.pytest_cache'), ['v'], ['CACHEDIR.TAG', 'README.md', '.gitignore']), (PosixPath('.pytest_cache/v'), ['cache'], []), (PosixPath('.pytest_cache/v/cache'), [], ['nodeids', 'lastfailed', 'stepwise']), (PosixPath('tests'), [], ['test_not_supported.py', 'conftest.py', 'test_path_operations.py', '__init__.py', 'test_s3path_configuration.py', 'test_pure_path_operations.py']), (PosixPath('docs'), [], ['advance.rst', 's3path_graph.jpg', 's3path_graph.svg', 'comparison.rst', 'interface.rst']), (PosixPath('s3path'), [], ['accessor.py', 'old_versions.py', '__init__.py', 'py.typed', 'current_version.py']), (PosixPath('.github'), ['workflows'], []), (PosixPath('.github/workflows'), [], ['deploying.yml', 'testing.yml']), (PosixPath('.git'), ['objects', 'info', 'logs', 'hooks', 'refs'], ['config', 'HEAD', 'description', 'index', 'packed-refs']), (PosixPath('.git/objects'), ['pack'], []), (PosixPath('.git/objects/pack'), [], ['pack-746373b9d83ac407488288f60747a6de8ac71439.idx', 'pack-746373b9d83ac407488288f60747a6de8ac71439.pack']), (PosixPath('.git/info'), [], ['exclude']), (PosixPath('.git/logs'), ['refs'], ['HEAD']), (PosixPath('.git/logs/refs'), ['heads', 'remotes'], []), (PosixPath('.git/logs/refs/heads'), [], ['master']), (PosixPath('.git/logs/refs/remotes'), ['origin'], []), (PosixPath('.git/logs/refs/remotes/origin'), [], ['HEAD']), (PosixPath('.git/hooks'), [], ['commit-msg.sample', 'pre-rebase.sample', 'pre-commit.sample', 'applypatch-msg.sample', 'fsmonitor-watchman.sample', 'pre-receive.sample', 'prepare-commit-msg.sample', 'post-update.sample', 'pre-merge-commit.sample', 'pre-applypatch.sample', 'pre-push.sample', 'update.sample', 'push-to-checkout.sample']), (PosixPath('.git/refs'), ['heads', 'remotes'], []), (PosixPath('.git/refs/heads'), [], ['master']), (PosixPath('.git/refs/remotes'), ['origin'], []), (PosixPath('.git/refs/remotes/origin'), [], ['HEAD']), (PosixPath('s3path.egg-info'), [], ['PKG-INFO', 'SOURCES.txt', 'requires.txt', 'top_level.txt', 'dependency_links.txt']), (PosixPath('.idea'), ['inspectionProfiles'], ['s3path.iml', 'vcs.xml', '.gitignore', 'workspace.xml', 'modules.xml', 'misc.xml']), (PosixPath('.idea/inspectionProfiles'), [], ['profiles_settings.xml']), ] for path, directories, files in walk_test_results: for file in files: key = str(path / file) object_summary = s3.ObjectSummary('test-bucket', key) object_summary.put(Body=b'test data') compare = {} for (local_path, local_directories, local_files), (s3_path, s3_directories, s3_files) in zip(walk_test_results, S3Path('/test-bucket').walk()): compare.setdefault(s3_path.key or '.', {})['s3'] = {'files': set(s3_files), 'directories': set(s3_directories)} compare.setdefault(str(local_path), {})['local'] = {'files': set(local_files), 'directories': set(local_directories)} for root, location in compare.items(): assert 's3' in location and 'local' in location assert location['s3']['files'] == location['local']['files'] assert location['s3']['directories'] == location['local']['directories'] @pytest.mark.skipif(sys.version_info < (3, 12), reason="requires python 3.12 or higher") def test_walk_order(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') walk_test_results = [ (PosixPath('.'), ['.pytest_cache'], ['LICENSE', 'Makefile', 'setup.cfg', 'README.rst']), (PosixPath('.pytest_cache'), ['v'], ['CACHEDIR.TAG', 'README.md', '.gitignore']), (PosixPath('.pytest_cache/v'), ['cache'], []), (PosixPath('.pytest_cache/v/cache'), [], ['nodeids', 'lastfailed', 'stepwise']), ] for path, directories, files in walk_test_results: for file in files: key = str(path / file) object_summary = s3.ObjectSummary('test-bucket', key) object_summary.put(Body=b'test data') for (local_path, local_directories, local_files), (s3_path, s3_directories, s3_files) in zip(walk_test_results, S3Path('/test-bucket').walk()): assert set(local_directories) == set(s3_directories) assert set(local_files) == set(s3_files) for (local_path, local_directories, local_files), (s3_path, s3_directories, s3_files) in zip(reversed(walk_test_results), S3Path('/test-bucket').walk(top_down=False)): assert set(local_directories) == set(s3_directories) assert set(local_files) == set(s3_files) assert list(p for p in S3Path('/test-bucket/fake/').walk()) == [] def on_error(exception): assert isinstance(exception, FileNotFoundError) print(exception, '0'*30) raise exception with pytest.raises(FileNotFoundError): for _ in S3Path('/test-bucket/fake/').walk(on_error=on_error): pass ================================================ FILE: tests/test_pure_path_operations.py ================================================ import os import pytest from pathlib import Path, PurePosixPath, PureWindowsPath from s3path import PureS3Path def test_paths_of_a_different_flavour(): with pytest.raises(TypeError): PureS3Path('/bucket/key') < PurePosixPath('/bucket/key') with pytest.raises(TypeError): PureWindowsPath('/bucket/key') > PureS3Path('/bucket/key') def test_repr(): assert repr(PureS3Path('setup.py')) == "PureS3Path('setup.py')" assert str(PureS3Path('setup.py')) == 'setup.py' assert bytes(PureS3Path('setup.py')) == b'setup.py' assert PureS3Path('/usr/bin').as_posix() == '/usr/bin' def test_fspath(): assert os.fspath(PureS3Path('/usr/bin')) == '/usr/bin' def test_from_uri_issue_150(): uri = 's3://bucket/test/2023-09-10T00%3A00%3A00.000Z.txt' string = '/bucket/test/2023-09-10T00:00:00.000Z.txt' path = PureS3Path.from_uri(uri) assert path.as_uri() == uri assert str(path) == string def test_join_strs(): assert PureS3Path('foo', 'some/path', 'bar') == PureS3Path('foo/some/path/bar') def test_join_paths(): assert PureS3Path(Path('foo'), Path('bar')) == PureS3Path('foo/bar') def test_empty(): assert PureS3Path() == PureS3Path('.') def test_absolute_paths(): assert PureS3Path('/etc', '/usr', 'lib64') == PureS3Path('/usr/lib64') def test_slashes_single_double_dots(): assert PureS3Path('foo//bar') == PureS3Path('foo/bar') assert PureS3Path('foo/./bar') == PureS3Path('foo/bar') assert PureS3Path('foo/../bar') == PureS3Path('bar') assert PureS3Path('../bar') == PureS3Path('../bar') assert PureS3Path('foo', '../bar') == PureS3Path('bar') def test_operators(): assert PureS3Path('/etc') / 'init.d' / 'apache2' == PureS3Path('/etc/init.d/apache2') assert '/usr' / PureS3Path('bin') == PureS3Path('/usr/bin') def test_parts(): assert PureS3Path('foo//bar').parts == ('foo', 'bar') assert PureS3Path('foo/./bar').parts == ('foo', 'bar') assert PureS3Path('foo/../bar').parts == ('bar',) assert PureS3Path('../bar').parts == ('..', 'bar') assert PureS3Path('foo', '../bar').parts == ('bar',) assert PureS3Path('/foo/bar').parts == ('/', 'foo', 'bar') @pytest.mark.parametrize("path", ["/foo", "/foo/"]) def test_is_bucket_with_valid_bucket_paths(path): assert PureS3Path(path).is_bucket @pytest.mark.parametrize("path", ["//foo", "foo/", "foo", "", "/foo/bar"]) def test_is_bucket_with_invalid_bucket_paths(path): assert not PureS3Path(path).is_bucket def test_drive(): assert PureS3Path('foo//bar').drive == '' assert PureS3Path('foo/./bar').drive == '' assert PureS3Path('foo/../bar').drive == '' assert PureS3Path('../bar').drive == '' assert PureS3Path('foo', '../bar').drive == '' assert PureS3Path('/foo/bar').drive == '' def test_root(): assert PureS3Path('foo//bar').root == '' assert PureS3Path('foo/./bar').root == '' assert PureS3Path('foo/../bar').root == '' assert PureS3Path('../bar').root == '' assert PureS3Path('foo', '../bar').root == '' assert PureS3Path('/foo/bar').root == '/' def test_anchor(): assert PureS3Path('foo//bar').anchor == '' assert PureS3Path('foo/./bar').anchor == '' assert PureS3Path('foo/../bar').anchor == '' assert PureS3Path('../bar').anchor == '' assert PureS3Path('foo', '../bar').anchor == '' assert PureS3Path('/foo/bar').anchor == '/' def test_parents(): assert tuple(PureS3Path('foo//bar').parents) == (PureS3Path('foo'), PureS3Path('.')) assert tuple(PureS3Path('foo/./bar').parents) == (PureS3Path('foo'), PureS3Path('.')) assert tuple(PureS3Path('foo/../bar').parents) == (PureS3Path('.'),) assert tuple(PureS3Path('../bar').parents) == (PureS3Path('..'), PureS3Path('.')) assert tuple(PureS3Path('foo', '../bar').parents) == (PureS3Path('.'),) assert tuple(PureS3Path('/foo/bar').parents) == (PureS3Path('/foo'), PureS3Path('/')) def test_parent(): assert PureS3Path('foo//bar').parent == PureS3Path('foo') assert PureS3Path('foo/./bar').parent == PureS3Path('foo') assert PureS3Path('foo/../bar').parent == PureS3Path('.') assert PureS3Path('../bar').parent == PureS3Path('..') assert PureS3Path('foo', '../bar').parent == PureS3Path('.') assert PureS3Path('/foo/bar').parent == PureS3Path('/foo') assert PureS3Path('.').parent == PureS3Path('.') assert PureS3Path('/').parent == PureS3Path('/') def test_name(): assert PureS3Path('my/library/setup.py').name == 'setup.py' def test_suffix(): assert PureS3Path('my/library/setup.py').suffix == '.py' assert PureS3Path('my/library.tar.gz').suffix == '.gz' assert PureS3Path('my/library').suffix == '' def test_suffixes(): assert PureS3Path('my/library.tar.gar').suffixes == ['.tar', '.gar'] assert PureS3Path('my/library.tar.gz').suffixes == ['.tar', '.gz'] assert PureS3Path('my/library').suffixes == [] def test_stem(): assert PureS3Path('my/library.tar.gar').stem == 'library.tar' assert PureS3Path('my/library.tar').stem == 'library' assert PureS3Path('my/library').stem == 'library' def test_uri(): assert PureS3Path('/etc/passwd').as_uri() == 's3://etc/passwd' assert PureS3Path('/etc/init.d/apache2').as_uri() == 's3://etc/init.d/apache2' assert PureS3Path('/bucket/key').as_uri() == 's3://bucket/key' def test_absolute(): assert PureS3Path('/a/b').is_absolute() assert not PureS3Path('a/b').is_absolute() def test_reserved(): assert not PureS3Path('/a/b').is_reserved() assert not PureS3Path('a/b').is_reserved() def test_joinpath(): assert PureS3Path('/etc').joinpath('passwd') == PureS3Path('/etc/passwd') assert PureS3Path('/etc').joinpath(PureS3Path('passwd')) == PureS3Path('/etc/passwd') assert PureS3Path('/etc').joinpath('init.d', 'apache2') == PureS3Path('/etc/init.d/apache2') def test_match(): assert PureS3Path('a/b.py').match('*.py') assert PureS3Path('/a/b/c.py').match('b/*.py') assert not PureS3Path('/a/b/c.py').match('a/*.py') assert PureS3Path('/a.py').match('/*.py') assert not PureS3Path('a/b.py').match('/*.py') assert not PureS3Path('a/b.py').match('*.Py') def test_relative_to(): s3_path = PureS3Path('/etc/passwd') assert s3_path.relative_to('/') == PureS3Path('etc/passwd') assert s3_path.relative_to('/etc') == PureS3Path('passwd') with pytest.raises(ValueError): s3_path.relative_to('/usr') def test_with_name(): s3_path = PureS3Path('/Downloads/pathlib.tar.gz') assert s3_path.with_name('setup.py') == PureS3Path('/Downloads/setup.py') s3_path = PureS3Path('/') with pytest.raises(ValueError): s3_path.with_name('setup.py') def test_with_suffix(): s3_path = PureS3Path('/Downloads/pathlib.tar.gz') assert s3_path.with_suffix('.bz2') == PureS3Path('/Downloads/pathlib.tar.bz2') s3_path = PureS3Path('README') assert s3_path.with_suffix('.txt') == PureS3Path('README.txt') s3_path = PureS3Path('README.txt') assert s3_path.with_suffix('') == PureS3Path('README') ================================================ FILE: tests/test_s3path_configuration.py ================================================ import sys import pytest import smart_open from pathlib import Path from packaging.version import Version import boto3 from botocore.client import Config from s3path import S3Path, PureS3Path, register_configuration_parameter if sys.version_info >= (3, 12): from s3path import accessor _config_key_parser = str else: accessor = S3Path._accessor _config_key_parser = lambda path: path def test_s3_configuration_map_repr(): assert repr(accessor.configuration_map) def test_basic_configuration(reset_configuration_cache): path = S3Path('/foo/') accessor.configuration_map.arguments = accessor.configuration_map.resources = None assert _config_key_parser(path) not in (accessor.configuration_map.arguments or ()) assert _config_key_parser(path) not in (accessor.configuration_map.resources or ()) assert accessor.configuration_map.get_configuration(path) == ( accessor.configuration_map.default_resource, {}) assert (accessor.configuration_map.get_configuration(S3Path('/foo/')) == accessor.configuration_map.get_configuration(PureS3Path('/foo/'))) def test_register_configuration_exceptions(reset_configuration_cache): with pytest.raises(TypeError): register_configuration_parameter(Path('/'), parameters={'ContentType': 'text/html'}) with pytest.raises(TypeError): register_configuration_parameter(S3Path('/foo/'), parameters=('ContentType', 'text/html')) with pytest.raises(ValueError): register_configuration_parameter(S3Path('/foo/')) def test_hierarchical_configuration(reset_configuration_cache): path = S3Path('/foo/') register_configuration_parameter(path, parameters={'ContentType': 'text/html'}) assert _config_key_parser(path) in accessor.configuration_map.arguments assert _config_key_parser(path) not in accessor.configuration_map.resources assert accessor.configuration_map.get_configuration(path) == ( accessor.configuration_map.default_resource, {'ContentType': 'text/html'}) assert (accessor.configuration_map.get_configuration(S3Path('/foo/')) == accessor.configuration_map.get_configuration(PureS3Path('/foo/'))) def test_boto_methods_with_configuration(s3_mock, reset_configuration_cache): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') bucket = S3Path('/test-bucket/') register_configuration_parameter(bucket, parameters={'ContentType': 'text/html'}) key = bucket.joinpath('bar.html') key.write_text('hello') def test_configuration_per_bucket(reset_configuration_cache): local_stack_bucket_path = PureS3Path('/LocalStackBucket/') minio_bucket_path = PureS3Path('/MinIOBucket/') default_aws_s3_path = PureS3Path('/') register_configuration_parameter( default_aws_s3_path, parameters={'ContentType': 'text/html'}) register_configuration_parameter( local_stack_bucket_path, parameters={}, resource=boto3.resource('s3', endpoint_url='http://localhost:4566')) register_configuration_parameter( minio_bucket_path, parameters={'OutputSerialization': {'CSV': {}}}, resource=boto3.resource( 's3', endpoint_url='http://localhost:9000', aws_access_key_id='minio', aws_secret_access_key='minio123', config=Config(signature_version='s3v4'), region_name='us-east-1')) assert accessor.configuration_map.get_configuration(PureS3Path('/')) == ( accessor.configuration_map.default_resource, {'ContentType': 'text/html'}) assert accessor.configuration_map.get_configuration(PureS3Path('/some_bucket')) == ( accessor.configuration_map.default_resource, {'ContentType': 'text/html'}) assert accessor.configuration_map.get_configuration(PureS3Path('/some_bucket')) == ( accessor.configuration_map.default_resource, {'ContentType': 'text/html'}) resources, arguments = accessor.configuration_map.get_configuration(minio_bucket_path) assert arguments == {'OutputSerialization': {'CSV': {}}} assert resources.meta.client._endpoint.host == 'http://localhost:9000' resources, arguments = accessor.configuration_map.get_configuration(minio_bucket_path / 'some_key') assert arguments == {'OutputSerialization': {'CSV': {}}} assert resources.meta.client._endpoint.host == 'http://localhost:9000' resources, arguments = accessor.configuration_map.get_configuration(local_stack_bucket_path) assert arguments == {} assert resources.meta.client._endpoint.host == 'http://localhost:4566' resources, arguments = accessor.configuration_map.get_configuration(local_stack_bucket_path / 'some_key') assert arguments == {} assert resources.meta.client._endpoint.host == 'http://localhost:4566' def test_open_method_with_custom_endpoint_url(s3_mock, reset_configuration_cache, monkeypatch): s3 = boto3.resource('s3') s3.create_bucket(Bucket='my-bucket') monkeypatch.setattr(S3Path, 'exists', lambda self: True) local_path = PureS3Path('/local/') register_configuration_parameter( local_path, parameters={}, resource=boto3.resource('s3', endpoint_url='http://localhost')) file_object = S3Path('/local/directory/Test.test').open('br') if Version(smart_open.__version__) <= Version('3.0.0'): assert file_object._object.meta.client._endpoint.host == 'http://localhost' else: assert file_object._client.client._endpoint.host == 'http://localhost' def test_issue_123(): path = S3Path('/bucket') old_resource, _ = accessor.configuration_map.get_configuration(path) boto3.setup_default_session() s3 = boto3.resource('s3') register_configuration_parameter(path, resource=s3) new_resource, _ = accessor.configuration_map.get_configuration(path) assert new_resource is s3 assert new_resource is not old_resource