[
  {
    "path": ".ci-coveragerc",
    "content": "[run]\nomit = *tests/*, */_version.py\n"
  },
  {
    "path": ".coveragerc",
    "content": "[run]\nomit =\n    */tests/*\n    */test_*.py\n    *_version.py\nsource =\n    intake\n[report]\nshow_missing = True\n"
  },
  {
    "path": ".gitattributes",
    "content": "intake/_version.py export-subst\n"
  },
  {
    "path": ".github/workflows/main.yaml",
    "content": "name: CI\n\non:\n  push:\n    branches: \"*\"\n  pull_request:\n    branches: master\n\njobs:\n  test:\n    name: ${{ matrix.OS }}-${{ matrix.CONDA_ENV }}-pytest\n    runs-on: ${{ matrix.OS }}\n    strategy:\n      fail-fast: false\n      matrix:\n        OS: [ubuntu-latest, windows-latest]\n        CONDA_ENV: [py310, py311, py312, py313, py314, pip]\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v4\n\n      - name: Setup conda\n        uses: conda-incubator/setup-miniconda@v3\n        with:\n          environment-file: scripts/ci/environment-${{ matrix.CONDA_ENV }}.yml\n\n      - name: pip-install\n        shell: bash -l {0}\n        run: |\n          pip install . --no-deps\n\n      - name: Run Tests\n        shell: bash -l {0}\n        run: |\n          pytest -v intake/readers\n"
  },
  {
    "path": ".github/workflows/pre-commit.yml",
    "content": "name: pre-commit\n\non:\n  pull_request:\n    branches:\n      - '*'\n  push:\n    branches: [master]\n  workflow_dispatch:\n\njobs:\n  pre-commit:\n    runs-on: ubuntu-latest\n    steps:\n    - uses: actions/checkout@v4\n    - uses: actions/setup-python@v4\n      with:\n          python-version: \"3.11\"\n    - uses: pre-commit/action@v3.0.0\n"
  },
  {
    "path": ".github/workflows/pypipublish.yaml",
    "content": "name: Upload Python Package\n\non:\n  release:\n    types: [created]\n\njobs:\n  deploy:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - name: Set up Python\n        uses: actions/setup-python@v4\n        with:\n          python-version: \"3.x\"\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip\n          pip install setuptools setuptools-scm wheel twine\n      - name: Build and publish\n        env:\n          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}\n          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}\n        run: |\n          python setup.py sdist bdist_wheel\n          twine upload dist/*\n"
  },
  {
    "path": ".gitignore",
    "content": ".DS_Store\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n_version.py\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\n.pytest_cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\ndocs/source/plugin-list.html\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# dotenv\n.env\n\n# virtualenv\n.venv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n# jetbrains/pycharm\n.idea/\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "content": "# This is the configuration for pre-commit, a local framework for managing pre-commit hooks\n#   Check out the docs at: https://pre-commit.com/\n\ndefault_stages: [pre-commit]\nrepos:\n-   repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.4.0\n    hooks:\n    -   id: check-builtin-literals\n    -   id: check-case-conflict\n    -   id: check-docstring-first\n    -   id: check-executables-have-shebangs\n    -   id: check-toml\n    -   id: detect-private-key\n    -   id: end-of-file-fixer\n    -   id: trailing-whitespace\n-   repo: https://github.com/ambv/black\n    rev: 23.1.0\n    hooks:\n    -   id: black\n-   repo: https://github.com/charliermarsh/ruff-pre-commit\n    rev: v0.0.249\n    hooks:\n    -   id: ruff  # See 'setup.cfg' for args\n        args: [intake]\n        files: intake/\n-   repo: https://github.com/hoxbro/clean_notebook\n    rev: 0.1.5\n    hooks:\n      - id: clean-notebook\nci:\n  autofix_prs: false\n  autoupdate_schedule: quarterly\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright (c) 2017, Anaconda, Inc.\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n\nRedistributions of source code must retain the above copyright notice,\nthis list of conditions and the following disclaimer.\n\nRedistributions in binary form must reproduce the above copyright\nnotice, this list of conditions and the following disclaimer in the\ndocumentation and/or other materials provided with the distribution.\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nHOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "prune .github\nprune docs\nprune examples\nprune scripts\nglobal-exclude test*.py *.yml *.yaml *.csv calvert* *.png *.json\n"
  },
  {
    "path": "README.md",
    "content": "# Intake: Take 2\n\n**A general python package for describing, loading and processing data**\n\n![Logo](https://github.com/intake/intake/raw/master/logo-small.png)\n\n[![Build Status](https://github.com/intake/intake/workflows/CI/badge.svg)](https://github.com/intake/intake/actions)\n[![Documentation Status](https://readthedocs.org/projects/intake/badge/?version=latest)](http://intake.readthedocs.io/en/latest/?badge=latest)\n\n\n*Taking the pain out of data access and distribution*\n\nIntake is an open-source package to:\n\n- describe your data declaratively\n- gather data sets into catalogs\n- search catalogs and services to find the right data you need\n- load, transform and output data in many formats\n- work with third party remote storage and compute platforms\n\nDocumentation is available at [Read the Docs](http://intake.readthedocs.io/en/latest).\n\nPlease report issues at https://github.com/intake/intake/issues\n\nInstall\n-------\n\nRecommended method using conda:\n```bash\nconda install -c conda-forge intake\n```\n\nYou can also install using `pip`, in which case you have a choice as to how many of the optional\ndependencies you install, with the simplest having least requirements\n\n```bash\npip install intake\n```\n\nNote that you may well need specific drivers and other plugins, which usually have additional\ndependencies of their own.\n\nDevelopment\n-----------\n * Create development Python environment with the required dependencies, ideally with `conda`.\n   The requirements can be found in the yml files in the `scripts/ci/` directory of this repo.\n   * e.g. `conda env create -f scripts/ci/environment-py311.yml` and then `conda activate test_env`\n * Install intake using `pip install -e .`\n * Use `pytest` to run tests.\n * Create a fork on github to be able to submit PRs.\n * We respect, but do not enforce, pep8 standards; all new code should be covered by tests.\n\nSupport\n-------\n\nWork on this repository is supported in part by:\n\n\"Anaconda, Inc. - Advancing AI through open source.\"\n\n.. raw:: html\n\n    <a href=\"https://anaconda.com/\"><img src=\"https://camo.githubusercontent.com/b8555ef2222598ed37ce38ac86955febbd25de7619931bb7dd3c58432181d3b6/68747470733a2f2f626565776172652e6f72672f636f6d6d756e6974792f6d656d626572732f616e61636f6e64612f616e61636f6e64612d6c617267652e706e67\" alt=\"anaconda logo\" width=\"40%\"/></a>\n"
  },
  {
    "path": "README_refactor.md",
    "content": "## Intake Take2\n\nIntake has been extensively rewritten to produce Intake Take2,\nhttps://github.com/intake/intake/pull/737 .\nThis will now become the version of the ``main`` branch and be released as v2.0.0. The\nmain documentation will move to describing V2, and V1 will not be further developed.\nExisting users of the legacy version (\"v1\") may find their code breaks and will need\na version pin, although we aim to support most legacy workflows via backward compatibility.\n\nTo install, you would do the following\n\n```shell\n> pip install intake\nor\n> conda install intake\n```\n\nTo get v1:\n\n```shell\n> pip install \"intake<2\"\nor\n> conda install \"intake<2\"\n```\n\nThis README is being kept to describe why the rewrite was done and considerations that\nwent into it.\n\n### Motivation for the rewrite.\n\nThe main way to get the most out of Intake v1 has been by editing YAML files. This is\nhow the documentation is structured. Yes, you could use intake.open_* to seed them, but then\nyou will find a strong discontinuity between the documentation of the driver and the third\nparty library that actually does the reading.\n\nThis made is very unlikely to convert a novice data-oriented python user into someone\nthat can create even the simplest catalogs. They will certainly never use more advanced\nfeatures like parametrisation or derived datasets. The new model eases users in and lends\nitself to being overlaid with graphical/wizard interfaces (i.e., in jupyterlab or in\npreparation for use with\n[anaconda.cloud](https://docs.anaconda.com/free/anaconda-notebooks/notebook-data-catalog/)).\n\n### Main changes\n\nThis is a total rewrite. Backward compatibility is desired and some v1 sources have been\nrewritten to use the v2 readers.\n\n#### Simplification\n\nWe are dropping features that added complexity but were only rarely used.\n\n- the server; the Intake server was never production-ready, and most\n use-cases can be provided by [tiled](https://blueskyproject.io/tiled/)\n- the caching/persist stuff; files can be persisted by fsspec, and we maintain the ability to\n write to various formats\n- explicit dependence on dask; dask is just one of many possible compute engines and\n an we should not be tied to one\n- less added functionality in the readers (like file pattern stuff)\n- explicit dependence on hvplot (but you can still choose to use it)\n- the CLI\n\n\n#### New structure\n\nMany new classes have appeared. From an intake-savy point of view, the biggest change is\nthe splitting of \"drivers\" into \"data\" and \"reader\". I view them as the objective description\nof what the dataset is (e.g., \"this is CSV at this URL\") versus how you might load it\n(\"call pandas with these arguments\"). This strongly implies that you might want to read the\nsame data in different ways. Crucially, it makes the readers much easier to write.\n\nHere is the Awkward reader for parquet files. Particularly for files, often all you need to do\nis specify which function will do the read and what keyword accepts the URL.\n```python\nclass AwkwardParquet(Awkward):\n    implements = {datatypes.Parquet}\n    imports = {\"awkward\", \"pyarrow\"}\n    func = \"awkward:from_parquet\"\n    url_arg = \"path\"\n```\n\nThe imports are declared and deferred until needed, so there is no need to make all those intake-*\nrepos with their own dependencies. (Of course, you might still want to declare packages\nand requirements; considering whether catalogs should have requirements, but this is better\nsuited for something like conda-project). The arguments accepted are the same as for the\ntarget function, and the method `.doc()` will show this.\n\n\n### New features\n\n- recommendation system to try to guess the right data type from a URL or existing function call,\n and readers that can use that type (and for each, tells you the instance it makes and provides docs).\n Can be extended to \"I have this type but I want this other type, what\n set of steps get me there\"\n- embracing any compute engines as first-class (e.g., duck, dask, ray, spark) or none\n- no constraints on the types of data that can/should be returned\n- pipeline building tools, including explicit conversion, types operations, generalised getattr and\n getitem (like dask delayed) and apply. Most of these available as \"transform\" attributes, including\n new namespaces like \"reader.np.max(..)\" will call numpy on whatever the reader makes, but lazily.\n- output functions, as a special type of \"conversion\", returning a new data description for further\n manipulation. This is effectively caching (would like to add conditions to the pipeline, only load and\n convert if converted version doesn't already exist).\n- generalised derived datasets, including functions of multiple intake inputs. A data or any reader\n output might be the input of any other reader, forming a graph. Picking a specific output from those\n possible gives you the pipeline, ready for execution. Any such pipelines could be encoded in a catalog.\n- user parameters are similar to before, but are also plugable; a few types are provided.\n Some helper methods have been made\n to walk data/reader kwargs and extract default values as parameters, replacing their original value\n with a reference to the parameter. The parameters are hierarchical catalog->data->reader\n\nSome examples of each of these exist in the current state of the code. There are many many more to\nwrite, but the functions themselves are really simple. This is aiming for composition and easy crowd\nsourcing, high bus factor.\n\n### Work to follow\n\n- thorough search capability, which will need some thoughts in this context\n- compatibility with remaining existing intake plugins\n- the catalog serialisation currently uses custom YAML tags, but this should not be necessary\n- add those magic methods that make pipelines work on descriptions on catalogs, not just\n materialised readers.\n- metadata conventions, to persist basic dataset properties (e.g., based on frictionlessdata spec)\n and validation as a pipeline operation you can do to any data entry using any available reader that\n can produce the info\n- probably much more - I will need help!\n\n### Unanswered questions\n\n- actual functions and classes are now embedded into any YAML serialised catalog as strings. These\n are imported/instantiated when the reader is instantiated from its description. So arbitrary\n code execution is possible, but not at catalog parse time. We only have a loose permissions config\n story around this\n- this implementation maintains the distinction between \"descriptions\" (which have templated values\n and user parameters) and readers (which only have concrete values and real instances). Is this a\n major confusion we somehow want to eliminate in V2?\n"
  },
  {
    "path": "docs/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS    =\nSPHINXBUILD   = sphinx-build\nSPHINXPROJ    = intake\nSOURCEDIR     = source\nBUILDDIR      = build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Run custom script to build HTML table of plugins\nhtml: Makefile\n\tpython plugins.py\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n"
  },
  {
    "path": "docs/README.md",
    "content": "# Building Documentation\n\nAn environment with several prerequisites is needed to build the\ndocumentation.  Create this with:\n\n## First option for environment\n\n```bash\nconda create -n intake-docs python=3.8 pandas dask python-snappy appdirs -c conda-forge -y\nconda activate intake-docs\n```\n\nAdditional pip packages are listed in `./requirements.txt` are required to\nbuild the docs:\n\n```bash\npip install -r requirements.txt\n```\n\n## Second option for environment\n\nA conda environment with pip packages included is in `environment.yml` of the current directory, and you may create it with:\n\n```bash\nconda env create\nconda activate intake-docs\n```\n\n## Build docs\n\nTo make HTML documentation:\n\n```bash\nmake html\n```\n\nOutputs to `build/html/index.html`\n"
  },
  {
    "path": "docs/environment.yml",
    "content": "name: intake-docs\nchannels:\n  - conda-forge\n\ndependencies:\n  - appdirs\n  - python=3.12\n  - dask\n  - numpy\n  - pandas\n  - msgpack-python\n  - msgpack-numpy\n  - requests\n  - tornado\n  - jinja2\n  - python-snappy\n  - pyyaml\n  - hvplot\n  - platformdirs\n  - panel\n  - bokeh\n  - docutils\n  - sphinx\n  - sphinx_rtd_theme\n  - numpydoc\n  - entrypoints\n  - aiohttp\n"
  },
  {
    "path": "docs/make.bat",
    "content": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sphinx-build\r\n)\r\nset SOURCEDIR=source\r\nset BUILDDIR=build\r\nset SPHINXPROJ=intake\r\n\r\nif \"%1\" == \"\" goto help\r\n\r\n%SPHINXBUILD% >NUL 2>NUL\r\nif errorlevel 9009 (\r\n\techo.\r\n\techo.The 'sphinx-build' command was not found. Make sure you have Sphinx\r\n\techo.installed, then set the SPHINXBUILD environment variable to point\r\n\techo.to the full path of the 'sphinx-build' executable. Alternatively you\r\n\techo.may add the Sphinx directory to PATH.\r\n\techo.\r\n\techo.If you don't have Sphinx installed, grab it from\r\n\techo.http://sphinx-doc.org/\r\n\texit /b 1\r\n)\r\n\r\n%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%\r\ngoto end\r\n\r\n:help\r\n%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%\r\n\r\n:end\r\npopd\r\n"
  },
  {
    "path": "docs/make_api.py",
    "content": "import os\nimport sys\nimport intake\n\n\ndef run(path):\n    fn = os.path.join(path, \"source\", \"api2.rst\")\n    with open(fn, \"w\") as f:\n        print(\n            f\"\"\"\nAPI Reference\n=============\n\nUser Functions\n--------------\n\n.. autosummary::\n    intake.config.Config\n    intake.readers.datatypes.recommend\n    intake.readers.convert.auto_pipeline\n    intake.readers.entry.Catalog\n    intake.readers.entry.DataDescription\n    intake.readers.entry.ReaderDescription\n    intake.readers.readers.recommend\n    intake.readers.readers.reader_from_call\n\n.. autoclass:: intake.config.Config\n    :members:\n\n.. autofunction:: intake.readers.datatypes.recommend\n\n.. autofunction:: intake.readers.convert.auto_pipeline\n\n.. autoclass:: intake.readers.entry.Catalog\n    :members:\n\n.. autoclass:: intake.readers.entry.DataDescription\n    :members:\n\n.. autoclass:: intake.readers.entry.ReaderDescription\n    :members:\n\n.. autofunction:: intake.readers.readers.recommend\n\n.. autofunction:: intake.readers.readers.reader_from_call\n\nBase Classes\n------------\n\nThese may be subclassed by developers\n\n.. autosummary::\"\"\",\n            file=f,\n        )\n        bases = (\n            \"intake.readers.datatypes.BaseData\",\n            \"intake.readers.readers.BaseReader\",\n            \"intake.readers.convert.BaseConverter\",\n            \"intake.readers.namespaces.Namespace\",\n            \"intake.readers.search.SearchBase\",\n            \"intake.readers.user_parameters.BaseUserParameter\",\n        )\n        for base in bases:\n            print(\"  \", base, file=f)\n        print(file=f)\n        for base in bases:\n            print(\n                f\"\"\".. autoclass:: {base}\n   :members:\n\"\"\",\n                file=f,\n            )\n\n        print(\n            \"\"\"\n\nData Classes\n------------\n\n.. autosummary::\"\"\",\n            file=f,\n        )\n        for cls in sorted(intake.readers.subclasses(intake.BaseData), key=lambda c: c.qname()):\n            print(\"  \", cls.qname().replace(\":\", \".\"), file=f)\n        print(\n            \"\"\"\n\nReader Classes\n--------------\n\nIncludes readers, transformers, converters and output classes.\n\n.. autosummary::\"\"\",\n            file=f,\n        )\n        for cls in sorted(intake.readers.subclasses(intake.BaseReader), key=lambda c: c.qname()):\n            print(\"  \", cls.qname().replace(\":\", \".\"), file=f)\n\n\nif __name__ == \"__main__\":\n    here = os.path.abspath(os.path.dirname(sys.argv[0]))\n    run(here)\nelse:\n    here = os.path.abspath(os.path.dirname(__file__))\n"
  },
  {
    "path": "docs/plugins.py",
    "content": "import asyncio\n\nimport aiohttp\nimport pandas as pd\nimport yaml\n\n\ndef format_package_links(package_name, repo_link):\n    return f'<a href=\"{repo_link}\">{package_name}</a>'\n\n\ndef format_repo_link(repo_link):\n    if \"http\" not in repo_link:\n        return f\"https://github.com/{repo_link}/\"\n    return repo_link\n\n\ndef format_badge_html(badge, link):\n    return f'<a href=\"{link}\"><img src=\"{badge}\"/></a>'\n\n\nasync def check_ok(client, url):\n    async with client.get(url) as r:\n        if \"anaconda.org\" in url:\n            body = await r.text()\n            if \"requires authentication\" in body:\n                return False\n        return r.ok\n\n\nasync def check_all_ok(urls):\n    async with aiohttp.client.ClientSession() as c:\n        coroutines = [check_ok(c, url) for url in urls]\n        return await asyncio.gather(*coroutines)\n\n\ndef generate_plugin_table():\n    plugins = yaml.safe_load(open(\"plugins.yaml\", \"rb\"))\n    plugin_df = pd.DataFrame(plugins)\n    plugin_df = plugin_df.rename(columns={\"description\": \"Description\", \"drivers\": \"Drivers\"})\n\n    plugin_df[\"short_name\"] = plugin_df[\"name\"].apply(lambda x: x.split(\"/\")[-1])\n    plugin_df[\"repo_links\"] = plugin_df[\"repo\"].apply(format_repo_link)\n    plugin_df[\"conda_package\"] = plugin_df[\"conda_package\"].fillna(plugin_df[\"short_name\"])\n    plugin_df[\"ci_yaml\"] = plugin_df[\"ci_yaml\"].fillna(\"main.yaml\")\n\n    # CI badges\n    plugin_df[\"ci_badges\"] = plugin_df[[\"repo_links\", \"ci_yaml\"]].apply(\n        lambda x: f\"{x[0]}/actions/workflows/{x[1]}/badge.svg\", axis=1\n    )\n    plugin_df[\"ci_links\"] = plugin_df[\"repo_links\"].apply(lambda x: f\"{x}/actions\")\n    ci_badges_ok = asyncio.run(check_all_ok(plugin_df[\"ci_badges\"]))\n\n    # Docs badges\n    plugin_df[\"docs_badges\"] = plugin_df[\"short_name\"].apply(\n        lambda x: f\"https://readthedocs.org/projects/{x}/badge/?version=latest\"\n    )\n    plugin_df[\"docs_links\"] = plugin_df[\"short_name\"].apply(\n        lambda x: f\"https://{x}.readthedocs.io/en/latest/?badge=latest\"\n    )\n    docs_badges_ok = asyncio.run(check_all_ok(plugin_df[\"docs_links\"]))\n\n    # PyPi badges\n    plugin_df[\"pypi_badges\"] = plugin_df[\"short_name\"].apply(\n        lambda x: f\"https://img.shields.io/pypi/v/{x}.svg?maxAge=3600\"\n    )\n    plugin_df[\"pypi_links\"] = plugin_df[\"short_name\"].apply(\n        lambda x: f\"https://pypi.org/project/{x}\"\n    )\n    pypi_badges_ok = asyncio.run(check_all_ok(plugin_df[\"pypi_links\"]))\n\n    # Conda badges\n    plugin_df[\"conda_badges\"] = plugin_df[[\"conda_channel\", \"conda_package\"]].apply(\n        lambda x: f\"https://img.shields.io/conda/vn/{x[0]}/{x[1]}.svg?colorB=4488ff&label={x[0]}&style=flat\",\n        axis=1,\n    )\n    plugin_df[\"conda_links\"] = plugin_df[[\"conda_channel\", \"conda_package\"]].apply(\n        lambda x: f\"https://anaconda.org/{x[0]}/{x[1]}\", axis=1\n    )\n    conda_badges_ok = asyncio.run(check_all_ok(plugin_df[\"conda_links\"]))\n\n    # Conda defaults badges\n    plugin_df[\"conda_defaults_links\"] = plugin_df[\"conda_package\"].apply(\n        lambda x: f\"https://anaconda.org/anaconda/{x}\"\n    )\n    plugin_df[\"conda_defaults_badges\"] = plugin_df[\"conda_package\"].apply(\n        lambda x: f\"https://img.shields.io/conda/vn/anaconda/{x}.svg?colorB=4488ff&label=defaults&style=flat\"\n    )\n    conda_defaults_badges_ok = asyncio.run(check_all_ok(plugin_df[\"conda_defaults_links\"]))\n\n    # Conda forge badges\n    plugin_df[\"conda_forge_links\"] = plugin_df[\"conda_package\"].apply(\n        lambda x: f\"https://anaconda.org/conda-forge/{x}\"\n    )\n    plugin_df[\"conda_forge_badges\"] = plugin_df[\"conda_package\"].apply(\n        lambda x: f\"https://img.shields.io/conda/vn/conda-forge/{x}.svg?colorB=4488ff&style=flat\"\n    )\n    conda_forge_badges_ok = asyncio.run(check_all_ok(plugin_df[\"conda_forge_links\"]))\n\n    plugin_df[\"Package Name\"] = plugin_df[[\"name\", \"repo_links\"]].apply(\n        lambda x: format_package_links(*x), axis=1\n    )\n    plugin_df[\"CI\"] = plugin_df[[\"ci_badges\", \"ci_links\"]][ci_badges_ok].apply(\n        lambda x: format_badge_html(*x), axis=1\n    )\n    plugin_df[\"Docs\"] = plugin_df[[\"docs_badges\", \"docs_links\"]][docs_badges_ok].apply(\n        lambda x: format_badge_html(*x), axis=1\n    )\n    plugin_df[\"PyPi\"] = plugin_df[[\"pypi_badges\", \"pypi_links\"]][pypi_badges_ok].apply(\n        lambda x: format_badge_html(*x), axis=1\n    )\n    plugin_df[\"conda\"] = plugin_df[[\"conda_badges\", \"conda_links\"]][conda_badges_ok].apply(\n        lambda x: format_badge_html(*x), axis=1\n    )\n    plugin_df[\"conda_forge\"] = plugin_df[[\"conda_forge_badges\", \"conda_forge_links\"]][\n        conda_forge_badges_ok\n    ].apply(lambda x: format_badge_html(*x), axis=1)\n    plugin_df[\"conda_defaults\"] = plugin_df[[\"conda_defaults_badges\", \"conda_defaults_links\"]][\n        conda_defaults_badges_ok\n    ].apply(lambda x: format_badge_html(*x), axis=1)\n\n    plugin_df = plugin_df.fillna(\"\")\n\n    # Concat conda badges\n    plugin_df[\"Conda\"] = plugin_df[\"conda\"] + plugin_df[\"conda_forge\"] + plugin_df[\"conda_defaults\"]\n\n    plugin_df.to_html(\n        \"source/plugin-list.html\",\n        escape=False,\n        justify=\"left\",\n        index=False,\n        border=0,\n        classes=\"table_wrapper\",\n        columns=[\"Package Name\", \"Description\", \"Drivers\", \"CI\", \"Docs\", \"PyPi\", \"Conda\"],\n        col_space=[\"auto\", \"auto\", \"auto\", \"90px\", \"90px\", \"90px\", \"90px\"],\n    )\n\n\nif __name__ == \"__main__\":\n    print(\"Generating custom plugin table... \", end=\"\")\n    generate_plugin_table()\n    print(\"done\")\n"
  },
  {
    "path": "docs/plugins.yaml",
    "content": "- name: intake\n  repo: intake/intake\n  description: Builtin to Intake\n  drivers: catalog, csv, intake_remote, ndzarr, numpy, textfiles, yaml_file_cat, yaml_files_cat, zarr_cat, json, jsonl\n\n- name: intake-astro\n  repo: intake/intake-astro\n  description: Table and array loading of FITS astronomical data\n  drivers: fits_array, fits_table\n  conda_channel: intake\n\n- name: intake-accumulo\n  repo: intake/intake-accumulo\n  description: Apache Accumulo clustered data storage\n  drivers: accumulo\n\n- name: intake-avro\n  repo: intake/intake-avro\n  description: Apache Avro data serialization format\n  drivers: avro_table, avro_sequence\n\n- name: intake-bluesky\n  repo: nsls-ii/intake-bluesky\n  description: Search and retrieve data in the <a href=\"https://nsls-ii.github.io/bluesky\">bluesky</a> data model\n\n- name: intake-dcat\n  repo: CityOfLosAngeles/intake-dcat\n  description: Browse and load data from <a href=\"https://www.w3.org/TR/vocab-dcat\">DCAT</a> catalogs\n  drivers: dcat\n\n- name: intake-dremio\n  repo: intake/intake-dremio\n  description: Scan tables and send SQL queries to a <a href=\"https://docs.dremio.com/\"> Dremio </a> server\n  drivers: dremio\n\n- name: intake-duckdb\n  repo: intake/intake-duckdb\n  description: Load DuckDB tables and build catalogs from DuckDB backends\n  drivers: duckdb, duckdb_cat\n\n- name: intake-dynamodb\n  repo: informatics-lab/intake-dynamodb>\n  description: Link to Amazon DynamoDB\n  drivers: dynamodb\n  conda_channel: informaticslab\n  conda_package: intake_dynamodb\n\n- name: intake-elasticsearch\n  repo: intake/intake-elasticsearch\n  description: Elasticsearch search and analytics engine\n  drivers: elasticsearch_seq, elasticsearch_table\n\n- name: intake-esm\n  repo: NCAR/intake-esm\n  description: Plugin for building and loading intake catalogs for earth system data sets holdings, such as <a href=\"https://cmip.llnl.gov/\">CMIP</a> (Coupled Model Intercomparison Project) and CESM Large Ensemble datasets\n\n- name: intake-geopandas\n  repo: informatics-lab/intake_geopandas\n  description: Load from ESRI Shape Files, GeoJSON, and geospatial databases with geopandas\n  drivers: geojson, postgis, shapefile, spatialite, regionmask\n  conda_channel: informaticslab\n\n- name: intake-google-analytics\n  repo: intake/intake-google-analytics\n  description: Run Google Analytics queries and load data as a DataFrame\n  drivers: google_analytics_query\n\n- name: intake-hbase\n  repo: intake/intake-hbase\n  description: Apache HBase database\n  drivers: hbase\n  conda_channel: intake\n\n- name: intake-iris\n  repo: informatics-lab/intake-iris\n  description: Load netCDF and GRIB files with IRIS\n  drivers: grib, netcdf\n  conda_channel: informaticslab\n  conda_package: intake_iris\n\n- name: intake-metabase\n  repo: continuumio/intake-metabase\n  description: Generate catalogs and load tables as DataFrames from Metabase\n  drivers: metabase_catalog, metabase_table\n\n- name: intake-mongo\n  repo: intake/intake-mongo\n  description: MongoDB noSQL query\n  drivers: mongo\n  conda_channel: intake\n\n- name: intake-nested-yaml-catalog\n  repo: zillow/intake-nested-yaml-catalog\n  description: Plugin supporting a single YAML hierarchical catalog to organize datasets and avoid a data swamp\n  drivers: nested_yaml_cat\n\n- name: intake-netflow\n  repo: intake/intake-netflow\n  description: Netflow packet format\n  drivers: netflow\n  conda_channel: intake\n\n- name: intake-notebook\n  repo: informatics-lab/intake-notebook\n  description: Experimental plugin to access parameterised notebooks through intake and executed via papermill\n  drivers: ipynb\n  conda_channel: informaticslab\n\n- name: intake-odbc\n  repo: intake/intake-odbc\n  description: ODBC database\n  drivers: odbc\n  conda_channel: intake\n\n- name: intake-parquet\n  repo: intake/intake-parquet\n  description: Apache Parquet file format\n  drivers: parquet\n\n- name: intake-pattern-catalog\n  repo: DTN-Public/intake-pattern-catalog\n  description: Plugin for specifying a file-path pattern which can represent a number of different entries\n  drivers: pattern_cat\n\n- name: intake-pcap\n  repo: intake/intake-pcap\n  description: PCAP network packet format\n  drivers: pcap\n\n- name: intake-postgres\n  repo: intake/intake-postgres\n  description: PostgreSQL database\n  drivers: postgres\n  conda_channel: intake\n\n- name: intake-s3-manifests\n  repo: informatics-lab/intake-s3-manifests\n  drivers: s3_manifest\n  conda_channel: informaticslab\n  conda_package: intake_s3_manifests\n\n- name: intake-salesforce\n  repo: sophiamyang/intake-salesforce\n  description: Generate catalogs and load tables as DataFrames from Salesforce\n  drivers: salesforce_catalog, salesforce_table\n\n- name: intake-sdmx\n  repo: dr-leo/intake_sdmx\n  description: Plugin for SDMX-compliant data sources such as BIS, ECB, ESTAT, INSEE, ILO, UN, UNICEF, World Bank and more\n  drivers: sdmx_dataset\n\n- name: intake-sklearn\n  repo: AlbertDeFusco/intake-sklearn\n  description: Load scikit-learn models from Pickle files\n  drivers: sklearn\n\n- name: intake-solr\n  repo: intake/intake-solr\n  description: Apache Solr search platform\n  drivers: solr\n  conda_channel: intake\n\n- name: intake-stac\n  repo: intake/intake-stac\n  description: Intake Driver for <a href=\"https://stacspec.org/\">SpatioTemporal Asset Catalogs (STAC)</a>\n\n- name: intake-stripe\n  repo: sophiamyang/intake-stripe\n  description: Generate catalogs and load tables as DataFrames from Stripe\n  drivers: stripe_catalog, stripe_table\n\n- name: intake-spark\n  repo: intake/intake-spark\n  description: Data processed by Apache Spark\n  drivers: spark_cat, spark_rdd, spark_dataframe\n\n- name: intake-sql\n  repo: intake/intake-sql\n  description: Generic SQL queries via SQLAlchemy\n  drivers: sql_cat, sql, sql_auto, sql_manual\n\n- name: intake-sqlite\n  repo: catalyst-cooperative/intake-sqlite\n  description: Local caching of remote SQLite DBs and queries via SQLAlchemy\n  drivers: sqlite_cat, sqlite, sqlite_auto, sqlite_manual\n\n- name: intake-splunk\n  repo: intake/intake-splunk\n  description: Splunk machine data query\n  drivers: splunk\n  conda_channel: intake\n\n- name: intake-streamz\n  repo: intake/intake-streamz\n  description: Real-time event processing using Streamz\n  drivers: streamz\n\n- name: intake-thredds\n  repo: NCAR/intake-thredds\n  ci_yaml: ci.yaml\n  description: Intake interface to THREDDS data catalogs\n  drivers: thredds_cat, thredds_merged_source\n\n- name: intake-xarray\n  repo: intake/intake-xarray\n  description: Load netCDF, Zarr and other multi-dimensional data\n  drivers: xarray_image, netcdf, grib, opendap, rasterio, remote-xarray, zarr\n\n- name: intake-dataframe-catalog\n  repo: ACCESS-NRI/intake-dataframe-catalog\n  ci_yaml: ci.yml\n  description: A searchable table of intake sources and associated metadata\n  drivers: df_catalog\n  conda_channel: accessnri\n"
  },
  {
    "path": "docs/requirements.txt",
    "content": "sphinx\nsphinx_rtd_theme\nnumpydoc\npanel\nhvplot\nentrypoints\n"
  },
  {
    "path": "docs/source/_static/.keep",
    "content": ""
  },
  {
    "path": "docs/source/_static/css/custom.css",
    "content": "div.prompt {\n  display: none\n}\n\ndiv.logo-block img {\n  display: none !important\n}\n\n.table_wrapper{\n  display: block;\n  overflow-x: auto;\n}\n\n.table_wrapper td, th {\n  padding: 2px;\n}\n\n.table_wrapper tr:nth-child(even) {\n  background: #E0E0E0;\n}\n"
  },
  {
    "path": "docs/source/_static/images/plotting_example.html",
    "content": "\n<!DOCTYPE html>\n<html lang=\"en\">\n    <head>\n        <meta charset=\"utf-8\">\n        <title>HoloPlot Plot</title>\n\n<link rel=\"stylesheet\" href=\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\" type=\"text/css\" />\n\n<script type=\"text/javascript\" src=\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\"></script>\n<script type=\"text/javascript\">\n    Bokeh.set_log_level(\"info\");\n</script>\n    </head>\n    <body>\n\n        <div class=\"bk-root\">\n            <div class=\"bk-plotdiv\" id=\"a54b0d41-2dbd-4cbd-abe8-b158862abe73\"></div>\n        </div>\n\n        <script type=\"application/json\" id=\"de6014ac-0cb8-4cb5-aff2-696c243a0f51\">\n          {\"cf966cb6-e460-4e42-870e-e1fe77bb6636\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"4c2875d5-77df-4591-9438-8d68a338a1d8\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"a9c6f8b1-9ea1-41ca-a889-60f708773bc7\",\"type\":\"LinearAxis\"}],\"min_border_bottom\":10,\"min_border_left\":10,\"min_border_right\":10,\"min_border_top\":10,\"plot_height\":300,\"plot_width\":700,\"renderers\":[{\"id\":\"4c2875d5-77df-4591-9438-8d68a338a1d8\",\"type\":\"LinearAxis\"},{\"id\":\"49b73a93-7160-40db-bf35-7a5671949656\",\"type\":\"Grid\"},{\"id\":\"a9c6f8b1-9ea1-41ca-a889-60f708773bc7\",\"type\":\"LinearAxis\"},{\"id\":\"c62b66c7-6be2-4af9-a12f-aafc8e32caff\",\"type\":\"Grid\"},{\"id\":\"8d9c612d-75cb-4181-a127-97ed515133b2\",\"type\":\"BoxAnnotation\"},{\"id\":\"2215446e-e645-4546-acea-13fc8fa81a4a\",\"type\":\"Legend\"},{\"id\":\"0d4d8d8c-cbc8-4eba-8c60-867a3ce11b30\",\"type\":\"GlyphRenderer\"},{\"id\":\"f5632049-54e4-4001-905f-2e37f0480b55\",\"type\":\"GlyphRenderer\"},{\"id\":\"125ade69-dfa3-4df1-90e0-238b813d7202\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"96b34104-9f23-4702-9783-868ea5114e49\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"aebf021b-befc-4d6b-9ffd-a4e1aadceb03\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"5059be37-ae81-463d-8db0-292d6b4b5a54\",\"type\":\"Range1d\"},\"x_scale\":{\"id\":\"8223a783-c027-473c-9217-f60e3ac36aa0\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"94497823-8347-4b0e-b26c-fe2aa2abbd31\",\"type\":\"Range1d\"},\"y_scale\":{\"id\":\"d0b1b2a0-7f3c-4115-be37-2b6913b3b801\",\"type\":\"LinearScale\"}},\"id\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"label\":{\"value\":\"Violent Crime rate\"},\"renderers\":[{\"id\":\"0d4d8d8c-cbc8-4eba-8c60-867a3ce11b30\",\"type\":\"GlyphRenderer\"}]},\"id\":\"44735ebd-4ba2-41b2-a24b-6c318fc39393\",\"type\":\"LegendItem\"},{\"attributes\":{},\"id\":\"5277ef26-8da1-45ef-b505-c9855b23f975\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"0d192563-9260-41da-a32c-f5ccbc73bd4c\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"c05057cb-f16d-4349-b03d-c09cdc6ba559\",\"type\":\"Selection\"},{\"attributes\":{\"plot\":null,\"text\":\"\",\"text_color\":{\"value\":\"black\"},\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"96b34104-9f23-4702-9783-868ea5114e49\",\"type\":\"Title\"},{\"attributes\":{\"data_source\":{\"id\":\"2b2287e6-2706-4b63-aedd-a6bf32979687\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"c15dc676-9a32-46ac-acfd-f99f4eb8e731\",\"type\":\"Line\"},\"hover_glyph\":null,\"muted_glyph\":{\"id\":\"4f29323a-f6a0-4040-aaaa-19ff6880b09d\",\"type\":\"Line\"},\"nonselection_glyph\":{\"id\":\"fd981887-f190-42fe-8cee-dfb35ef077e1\",\"type\":\"Line\"},\"selection_glyph\":null,\"view\":{\"id\":\"ec9bc3f8-1164-478d-8a1e-ac8f0b55492d\",\"type\":\"CDSView\"}},\"id\":\"125ade69-dfa3-4df1-90e0-238b813d7202\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"grid_line_color\":{\"value\":null},\"plot\":{\"id\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"132f30e9-c8c4-4c71-a5b2-98ab2f2d40ef\",\"type\":\"BasicTicker\"}},\"id\":\"49b73a93-7160-40db-bf35-7a5671949656\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"ffb0bc45-c827-4f96-ae52-ad134c44d0b3\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"3bb836e5-3444-4da9-9a78-ff97ff6f536a\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"label\":{\"value\":\"Burglary rate\"},\"renderers\":[{\"id\":\"125ade69-dfa3-4df1-90e0-238b813d7202\",\"type\":\"GlyphRenderer\"}]},\"id\":\"ebda79be-5fa7-4bab-9636-7e7f956bcaa9\",\"type\":\"LegendItem\"},{\"attributes\":{\"callback\":null,\"end\":2014,\"start\":1960},\"id\":\"5059be37-ae81-463d-8db0-292d6b4b5a54\",\"type\":\"Range1d\"},{\"attributes\":{\"line_alpha\":0.2,\"line_color\":\"#1f77b4\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"087d0ef0-6a69-4746-ba34-828ff44f60c8\",\"type\":\"Line\"},{\"attributes\":{\"line_alpha\":0.2,\"line_color\":\"#2ca02c\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"4f29323a-f6a0-4040-aaaa-19ff6880b09d\",\"type\":\"Line\"},{\"attributes\":{\"line_alpha\":0.1,\"line_color\":\"#2ca02c\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"fd981887-f190-42fe-8cee-dfb35ef077e1\",\"type\":\"Line\"},{\"attributes\":{},\"id\":\"132f30e9-c8c4-4c71-a5b2-98ab2f2d40ef\",\"type\":\"BasicTicker\"},{\"attributes\":{\"axis_label\":\"Year\",\"bounds\":\"auto\",\"formatter\":{\"id\":\"3bb836e5-3444-4da9-9a78-ff97ff6f536a\",\"type\":\"BasicTickFormatter\"},\"major_label_orientation\":\"horizontal\",\"plot\":{\"id\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"132f30e9-c8c4-4c71-a5b2-98ab2f2d40ef\",\"type\":\"BasicTicker\"}},\"id\":\"4c2875d5-77df-4591-9438-8d68a338a1d8\",\"type\":\"LinearAxis\"},{\"attributes\":{\"axis_label\":\"Rate (per 100k people)\",\"bounds\":\"auto\",\"formatter\":{\"id\":\"0d192563-9260-41da-a32c-f5ccbc73bd4c\",\"type\":\"BasicTickFormatter\"},\"major_label_orientation\":\"horizontal\",\"plot\":{\"id\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"c685bdab-a075-4101-ae8a-b6968cb1d905\",\"type\":\"BasicTicker\"}},\"id\":\"a9c6f8b1-9ea1-41ca-a889-60f708773bc7\",\"type\":\"LinearAxis\"},{\"attributes\":{\"source\":{\"id\":\"2b2287e6-2706-4b63-aedd-a6bf32979687\",\"type\":\"ColumnDataSource\"}},\"id\":\"ec9bc3f8-1164-478d-8a1e-ac8f0b55492d\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"c685bdab-a075-4101-ae8a-b6968cb1d905\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"150786bd-42e2-4f3f-ad4d-35e7ce7581e4\",\"type\":\"ResetTool\"},{\"attributes\":{\"dimension\":1,\"grid_line_color\":{\"value\":null},\"plot\":{\"id\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"c685bdab-a075-4101-ae8a-b6968cb1d905\",\"type\":\"BasicTicker\"}},\"id\":\"c62b66c7-6be2-4af9-a12f-aafc8e32caff\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"data\":{\"Rate (per 100k people)\":{\"__ndarray__\":\"zczMzMwcZEAzMzMzM8NjQJqZmZmZSWRAZmZmZmYGZUAzMzMzM9NnQGZmZmZmBmlAAAAAAACAa0BmZmZmZqZvQGZmZmZmpnJAMzMzMzOLdEAAAAAAALh2QAAAAAAAwHhAAAAAAAAQeUBmZmZmZhZ6QJqZmZmZ0XxAzczMzMx8fkDNzMzMzDx9QGZmZmZmvn1AzczMzMwcf0AzMzMzMyeBQM3MzMzMpIJAAAAAAACMgkBmZmZmZtaBQM3MzMzM0IBAMzMzMzPfgEDNzMzMzHCBQM3MzMzMYINAAAAAAAAkg0DNzMzMzASEQDMzMzMz14RAzczMzMzMhkCamZmZmbGHQJqZmZmZrYdAzczMzMxYh0DNzMzMzEyGQAAAAAAAZIVAzczMzMzkg0AAAAAAABiDQM3MzMzMvIFAAAAAAABYgEAAAAAAAKh/QAAAAAAAiH9AZmZmZmbmfkDNzMzMzLx9QDMzMzMz83xAAAAAAABQfUDNzMzMzPR9QM3MzMzMfH1AmpmZmZmpfEBmZmZmZv56QAAAAAAASHlAmpmZmZkxeEDNzMzMzDx4QJqZmZmZsXdAMzMzMzN7d0A=\",\"dtype\":\"float64\",\"shape\":[55]},\"Rate_left_parenthesis_per_100k_people_right_parenthesis\":{\"__ndarray__\":\"zczMzMwcZEAzMzMzM8NjQJqZmZmZSWRAZmZmZmYGZUAzMzMzM9NnQGZmZmZmBmlAAAAAAACAa0BmZmZmZqZvQGZmZmZmpnJAMzMzMzOLdEAAAAAAALh2QAAAAAAAwHhAAAAAAAAQeUBmZmZmZhZ6QJqZmZmZ0XxAzczMzMx8fkDNzMzMzDx9QGZmZmZmvn1AzczMzMwcf0AzMzMzMyeBQM3MzMzMpIJAAAAAAACMgkBmZmZmZtaBQM3MzMzM0IBAMzMzMzPfgEDNzMzMzHCBQM3MzMzMYINAAAAAAAAkg0DNzMzMzASEQDMzMzMz14RAzczMzMzMhkCamZmZmbGHQJqZmZmZrYdAzczMzMxYh0DNzMzMzEyGQAAAAAAAZIVAzczMzMzkg0AAAAAAABiDQM3MzMzMvIFAAAAAAABYgEAAAAAAAKh/QAAAAAAAiH9AZmZmZmbmfkDNzMzMzLx9QDMzMzMz83xAAAAAAABQfUDNzMzMzPR9QM3MzMzMfH1AmpmZmZmpfEBmZmZmZv56QAAAAAAASHlAmpmZmZkxeEDNzMzMzDx4QJqZmZmZsXdAMzMzMzN7d0A=\",\"dtype\":\"float64\",\"shape\":[55]},\"Variable\":[\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\",\"Violent Crime rate\"],\"Year\":[1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014]},\"selected\":{\"id\":\"82b27fb8-dcc0-4828-9fe8-cb11fa8c716d\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"7fa57198-d15f-4a50-bad8-d6730c3d1eac\",\"type\":\"UnionRenderers\"}},\"id\":\"c408e98a-be3e-4937-954c-9e712fa19f2c\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"line_color\":\"#1f77b4\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"f5cb752e-2308-4d2e-a511-0a1c60baf228\",\"type\":\"Line\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"8d9c612d-75cb-4181-a127-97ed515133b2\",\"type\":\"BoxAnnotation\"},{\"attributes\":{},\"id\":\"1c3e9b46-f773-44ef-9d3f-5b7734e5a267\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"5bd2845d-0741-40fa-83fb-b04651091fb8\",\"type\":\"PanTool\"},{\"attributes\":{},\"id\":\"d0b1b2a0-7f3c-4115-be37-2b6913b3b801\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"e6b98c50-b4c7-4a21-8f0f-f1afcdb8eb23\",\"type\":\"WheelZoomTool\"},{\"attributes\":{\"overlay\":{\"id\":\"8d9c612d-75cb-4181-a127-97ed515133b2\",\"type\":\"BoxAnnotation\"}},\"id\":\"da5e614b-d585-4e75-95f5-6a59eea5e0bc\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"line_alpha\":0.1,\"line_color\":\"#1f77b4\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"4aff6e88-4363-4255-b78b-c71223315006\",\"type\":\"Line\"},{\"attributes\":{\"source\":{\"id\":\"c408e98a-be3e-4937-954c-9e712fa19f2c\",\"type\":\"ColumnDataSource\"}},\"id\":\"de890536-a3b0-4809-af7a-f51c8ae52099\",\"type\":\"CDSView\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"3481d497-d16e-4103-802f-c7f49222fe53\",\"type\":\"HoverTool\"},{\"id\":\"1c3e9b46-f773-44ef-9d3f-5b7734e5a267\",\"type\":\"SaveTool\"},{\"id\":\"5bd2845d-0741-40fa-83fb-b04651091fb8\",\"type\":\"PanTool\"},{\"id\":\"e6b98c50-b4c7-4a21-8f0f-f1afcdb8eb23\",\"type\":\"WheelZoomTool\"},{\"id\":\"da5e614b-d585-4e75-95f5-6a59eea5e0bc\",\"type\":\"BoxZoomTool\"},{\"id\":\"150786bd-42e2-4f3f-ad4d-35e7ce7581e4\",\"type\":\"ResetTool\"}]},\"id\":\"aebf021b-befc-4d6b-9ffd-a4e1aadceb03\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"end\":1684.1,\"start\":58.3},\"id\":\"94497823-8347-4b0e-b26c-fe2aa2abbd31\",\"type\":\"Range1d\"},{\"attributes\":{\"line_color\":\"#ff7f0e\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"6d895348-af34-47fb-8db9-7aed49b78720\",\"type\":\"Line\"},{\"attributes\":{\"click_policy\":\"mute\",\"items\":[{\"id\":\"44735ebd-4ba2-41b2-a24b-6c318fc39393\",\"type\":\"LegendItem\"},{\"id\":\"f8870c4e-c6c9-46b6-ac8b-82c4df0ff009\",\"type\":\"LegendItem\"},{\"id\":\"ebda79be-5fa7-4bab-9636-7e7f956bcaa9\",\"type\":\"LegendItem\"}],\"plot\":{\"id\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\",\"subtype\":\"Figure\",\"type\":\"Plot\"}},\"id\":\"2215446e-e645-4546-acea-13fc8fa81a4a\",\"type\":\"Legend\"},{\"attributes\":{\"data_source\":{\"id\":\"c408e98a-be3e-4937-954c-9e712fa19f2c\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"f5cb752e-2308-4d2e-a511-0a1c60baf228\",\"type\":\"Line\"},\"hover_glyph\":null,\"muted_glyph\":{\"id\":\"087d0ef0-6a69-4746-ba34-828ff44f60c8\",\"type\":\"Line\"},\"nonselection_glyph\":{\"id\":\"4aff6e88-4363-4255-b78b-c71223315006\",\"type\":\"Line\"},\"selection_glyph\":null,\"view\":{\"id\":\"de890536-a3b0-4809-af7a-f51c8ae52099\",\"type\":\"CDSView\"}},\"id\":\"0d4d8d8c-cbc8-4eba-8c60-867a3ce11b30\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"callback\":null,\"data\":{\"Rate (per 100k people)\":{\"__ndarray__\":\"zczMzMwMTkBmZmZmZiZNQJqZmZmZ2U1AZmZmZmbmTkDNzMzMzAxRQM3MzMzM7FFAMzMzMzMzVEAzMzMzM7NZQJqZmZmZeWBAzczMzMyMYkAzMzMzM4NlQAAAAAAAgGdAZmZmZmaWZkAzMzMzM+NmQJqZmZmZKWpAmpmZmZmZa0CamZmZmeloQGZmZmZm1mdAmpmZmZl5aEDNzMzMzExrQDMzMzMzY29AZmZmZmYmcECamZmZmdltQGZmZmZmFmtAZmZmZma2aUCamZmZmSlqQAAAAAAAQGxAZmZmZma2akAzMzMzM8NrQJqZmZmZSW1AzczMzMwEcEAzMzMzMwtxQDMzMzMze3BAAAAAAAAAcECamZmZmbltQM3MzMzMnGtAzczMzMw8aUBmZmZmZkZnQAAAAAAAsGRAMzMzMzPDYkAAAAAAACBiQAAAAAAAkGJAMzMzMzNDYkAAAAAAANBhQGZmZmZmFmFAmpmZmZmZYUAAAAAAAMBiQJqZmZmZiWJAzczMzMw8YkAzMzMzM6NgQDMzMzMz011AmpmZmZl5XEBmZmZmZkZcQAAAAAAAQFtAzczMzMyMWUA=\",\"dtype\":\"float64\",\"shape\":[55]},\"Rate_left_parenthesis_per_100k_people_right_parenthesis\":{\"__ndarray__\":\"zczMzMwMTkBmZmZmZiZNQJqZmZmZ2U1AZmZmZmbmTkDNzMzMzAxRQM3MzMzM7FFAMzMzMzMzVEAzMzMzM7NZQJqZmZmZeWBAzczMzMyMYkAzMzMzM4NlQAAAAAAAgGdAZmZmZmaWZkAzMzMzM+NmQJqZmZmZKWpAmpmZmZmZa0CamZmZmeloQGZmZmZm1mdAmpmZmZl5aEDNzMzMzExrQDMzMzMzY29AZmZmZmYmcECamZmZmdltQGZmZmZmFmtAZmZmZma2aUCamZmZmSlqQAAAAAAAQGxAZmZmZma2akAzMzMzM8NrQJqZmZmZSW1AzczMzMwEcEAzMzMzMwtxQDMzMzMze3BAAAAAAAAAcECamZmZmbltQM3MzMzMnGtAzczMzMw8aUBmZmZmZkZnQAAAAAAAsGRAMzMzMzPDYkAAAAAAACBiQAAAAAAAkGJAMzMzMzNDYkAAAAAAANBhQGZmZmZmFmFAmpmZmZmZYUAAAAAAAMBiQJqZmZmZiWJAzczMzMw8YkAzMzMzM6NgQDMzMzMz011AmpmZmZl5XEBmZmZmZkZcQAAAAAAAQFtAzczMzMyMWUA=\",\"dtype\":\"float64\",\"shape\":[55]},\"Variable\":[\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\",\"Robbery rate\"],\"Year\":[1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014]},\"selected\":{\"id\":\"5277ef26-8da1-45ef-b505-c9855b23f975\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"080c73c2-3a77-45d1-83b4-7e294d83271c\",\"type\":\"UnionRenderers\"}},\"id\":\"26627c94-a925-4257-add4-d88fc980dcc4\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"callback\":null,\"renderers\":[{\"id\":\"0d4d8d8c-cbc8-4eba-8c60-867a3ce11b30\",\"type\":\"GlyphRenderer\"},{\"id\":\"f5632049-54e4-4001-905f-2e37f0480b55\",\"type\":\"GlyphRenderer\"},{\"id\":\"125ade69-dfa3-4df1-90e0-238b813d7202\",\"type\":\"GlyphRenderer\"}],\"tooltips\":[[\"Variable\",\"@{Variable}\"],[\"Year\",\"@{Year}\"],[\"Rate (per 100k people)\",\"@{Rate_left_parenthesis_per_100k_people_right_parenthesis}\"]]},\"id\":\"3481d497-d16e-4103-802f-c7f49222fe53\",\"type\":\"HoverTool\"},{\"attributes\":{\"line_alpha\":0.2,\"line_color\":\"#ff7f0e\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"b39f13cb-fc20-4a4f-aac9-2b702f99c6b1\",\"type\":\"Line\"},{\"attributes\":{},\"id\":\"8223a783-c027-473c-9217-f60e3ac36aa0\",\"type\":\"LinearScale\"},{\"attributes\":{\"source\":{\"id\":\"26627c94-a925-4257-add4-d88fc980dcc4\",\"type\":\"ColumnDataSource\"}},\"id\":\"342bcecb-3198-4223-a63d-02a2b07986eb\",\"type\":\"CDSView\"},{\"attributes\":{\"line_alpha\":0.1,\"line_color\":\"#ff7f0e\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"44bd8031-3f2c-48c0-8850-51598e239176\",\"type\":\"Line\"},{\"attributes\":{},\"id\":\"7fa57198-d15f-4a50-bad8-d6730c3d1eac\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"080c73c2-3a77-45d1-83b4-7e294d83271c\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"data_source\":{\"id\":\"26627c94-a925-4257-add4-d88fc980dcc4\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"6d895348-af34-47fb-8db9-7aed49b78720\",\"type\":\"Line\"},\"hover_glyph\":null,\"muted_glyph\":{\"id\":\"b39f13cb-fc20-4a4f-aac9-2b702f99c6b1\",\"type\":\"Line\"},\"nonselection_glyph\":{\"id\":\"44bd8031-3f2c-48c0-8850-51598e239176\",\"type\":\"Line\"},\"selection_glyph\":null,\"view\":{\"id\":\"342bcecb-3198-4223-a63d-02a2b07986eb\",\"type\":\"CDSView\"}},\"id\":\"f5632049-54e4-4001-905f-2e37f0480b55\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"82b27fb8-dcc0-4828-9fe8-cb11fa8c716d\",\"type\":\"Selection\"},{\"attributes\":{\"callback\":null,\"data\":{\"Rate (per 100k people)\":{\"__ndarray__\":\"mpmZmZnJf0AzMzMzMzeAQJqZmZmZuYBAMzMzMzMDgkCamZmZmdWDQJqZmZmZtYRAAAAAAACIhkDNzMzMzNSJQGZmZmZmIo1AzczMzMzAjkCamZmZmfOQQAAAAAAALpJAMzMzMzPTkUAAAAAAABqTQM3MzMzMdpZAZmZmZmbwl0DNzMzMzKCWQDMzMzMzL5ZAZmZmZmZqlkCamZmZmZ+XQGZmZmZmUJpAzczMzMy8mUAAAAAAAECXQM3MzMzM6pRAAAAAAADGk0DNzMzMzC6UQDMzMzMzF5VAzczMzMzelEDNzMzMzJCUQGZmZmZmDpRAzczMzMxAk0BmZmZmZpCTQJqZmZmZQZJAzczMzMwukUBmZmZmZkiQQAAAAAAA2I5AAAAAAACIjUBmZmZmZraMQJqZmZmZ+YpAMzMzMzMTiEBmZmZmZsaGQGZmZmZmLodAAAAAAABYh0AAAAAAACiHQGZmZmZm0oZAMzMzMzO3hkDNzMzMzOiGQM3MzMzMsIZAAAAAAADohkCamZmZmW2GQAAAAAAA6IVAZmZmZmbqhUCamZmZmQGFQDMzMzMzE4NAAAAAAAD0gEA=\",\"dtype\":\"float64\",\"shape\":[55]},\"Rate_left_parenthesis_per_100k_people_right_parenthesis\":{\"__ndarray__\":\"mpmZmZnJf0AzMzMzMzeAQJqZmZmZuYBAMzMzMzMDgkCamZmZmdWDQJqZmZmZtYRAAAAAAACIhkDNzMzMzNSJQGZmZmZmIo1AzczMzMzAjkCamZmZmfOQQAAAAAAALpJAMzMzMzPTkUAAAAAAABqTQM3MzMzMdpZAZmZmZmbwl0DNzMzMzKCWQDMzMzMzL5ZAZmZmZmZqlkCamZmZmZ+XQGZmZmZmUJpAzczMzMy8mUAAAAAAAECXQM3MzMzM6pRAAAAAAADGk0DNzMzMzC6UQDMzMzMzF5VAzczMzMzelEDNzMzMzJCUQGZmZmZmDpRAzczMzMxAk0BmZmZmZpCTQJqZmZmZQZJAzczMzMwukUBmZmZmZkiQQAAAAAAA2I5AAAAAAACIjUBmZmZmZraMQJqZmZmZ+YpAMzMzMzMTiEBmZmZmZsaGQGZmZmZmLodAAAAAAABYh0AAAAAAACiHQGZmZmZm0oZAMzMzMzO3hkDNzMzMzOiGQM3MzMzMsIZAAAAAAADohkCamZmZmW2GQAAAAAAA6IVAZmZmZmbqhUCamZmZmQGFQDMzMzMzE4NAAAAAAAD0gEA=\",\"dtype\":\"float64\",\"shape\":[55]},\"Variable\":[\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\",\"Burglary rate\"],\"Year\":[1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014]},\"selected\":{\"id\":\"c05057cb-f16d-4349-b03d-c09cdc6ba559\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"ffb0bc45-c827-4f96-ae52-ad134c44d0b3\",\"type\":\"UnionRenderers\"}},\"id\":\"2b2287e6-2706-4b63-aedd-a6bf32979687\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"line_color\":\"#2ca02c\",\"line_width\":2,\"x\":{\"field\":\"Year\"},\"y\":{\"field\":\"Rate (per 100k people)\"}},\"id\":\"c15dc676-9a32-46ac-acfd-f99f4eb8e731\",\"type\":\"Line\"},{\"attributes\":{\"label\":{\"value\":\"Robbery rate\"},\"renderers\":[{\"id\":\"f5632049-54e4-4001-905f-2e37f0480b55\",\"type\":\"GlyphRenderer\"}]},\"id\":\"f8870c4e-c6c9-46b6-ac8b-82c4df0ff009\",\"type\":\"LegendItem\"}],\"root_ids\":[\"9268281a-2417-46c5-9182-d5ebb5689dc2\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}}\n        </script>\n        <script type=\"text/javascript\">\n          (function() {\n            var fn = function() {\n              Bokeh.safely(function() {\n                (function(root) {\n                  function embed_document(root) {\n\n                  var docs_json = document.getElementById('de6014ac-0cb8-4cb5-aff2-696c243a0f51').textContent;\n                  var render_items = [{\"docid\":\"cf966cb6-e460-4e42-870e-e1fe77bb6636\",\"elementid\":\"a54b0d41-2dbd-4cbd-abe8-b158862abe73\",\"modelid\":\"9268281a-2417-46c5-9182-d5ebb5689dc2\"}];\n                  root.Bokeh.embed.embed_items(docs_json, render_items);\n\n                  }\n                  if (root.Bokeh !== undefined) {\n                    embed_document(root);\n                  } else {\n                    var attempts = 0;\n                    var timer = setInterval(function(root) {\n                      if (root.Bokeh !== undefined) {\n                        embed_document(root);\n                        clearInterval(timer);\n                      }\n                      attempts++;\n                      if (attempts > 100) {\n                        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n                        clearInterval(timer);\n                      }\n                    }, 10, root)\n                  }\n                })(window);\n              });\n            };\n            if (document.readyState != \"loading\") fn();\n            else document.addEventListener(\"DOMContentLoaded\", fn);\n          })();\n        </script>\n    </body>\n</html>\n"
  },
  {
    "path": "docs/source/api.rst",
    "content": "API\n===\n\nAuto-generated reference\n\n.. toctree::\n   :maxdepth: 1\n\n   api_user.rst\n   api_base.rst\n   api_other.rst\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/api2.rst",
    "content": "\n.. _api2:\n\nAPI Reference\n=============\n\nUser Functions\n--------------\n\n.. autosummary::\n    intake.config.Config\n    intake.readers.datatypes.recommend\n    intake.readers.convert.auto_pipeline\n    intake.readers.convert.path\n    intake.readers.entry.Catalog\n    intake.readers.entry.DataDescription\n    intake.readers.entry.ReaderDescription\n    intake.readers.readers.recommend\n    intake.readers.readers.reader_from_call\n\n.. autoclass:: intake.config.Config\n    :members:\n\n.. autofunction:: intake.readers.datatypes.recommend\n\n.. autofunction:: intake.readers.convert.auto_pipeline\n\n.. autoclass:: intake.readers.entry.Catalog\n    :members:\n\n.. autoclass:: intake.readers.entry.DataDescription\n    :members:\n\n.. autoclass:: intake.readers.entry.ReaderDescription\n    :members:\n\n.. autofunction:: intake.readers.readers.recommend\n\n.. autofunction:: intake.readers.readers.reader_from_call\n\n.. _base:\n\nBase Classes\n------------\n\nThese may be subclassed by developers\n\n.. autosummary::\n   intake.readers.datatypes.BaseData\n   intake.readers.readers.BaseReader\n   intake.readers.convert.BaseConverter\n   intake.readers.namespaces.Namespace\n   intake.readers.search.SearchBase\n   intake.readers.user_parameters.BaseUserParameter\n\n.. autoclass:: intake.readers.datatypes.BaseData\n   :members:\n\n.. autoclass:: intake.readers.readers.BaseReader\n   :members:\n\n.. autoclass:: intake.readers.convert.BaseConverter\n   :members:\n\n.. autoclass:: intake.readers.namespaces.Namespace\n   :members:\n\n.. autoclass:: intake.readers.search.SearchBase\n   :members:\n\n.. autoclass:: intake.readers.user_parameters.BaseUserParameter\n   :members:\n\n.. _data:\n\nData Classes\n------------\n\n.. autosummary::\n   intake.readers.datatypes.ASDF\n   intake.readers.datatypes.AVRO\n   intake.readers.datatypes.CSV\n   intake.readers.datatypes.Catalog\n   intake.readers.datatypes.CatalogAPI\n   intake.readers.datatypes.CatalogFile\n   intake.readers.datatypes.DICOM\n   intake.readers.datatypes.DeltalakeTable\n   intake.readers.datatypes.Excel\n   intake.readers.datatypes.FITS\n   intake.readers.datatypes.Feather1\n   intake.readers.datatypes.Feather2\n   intake.readers.datatypes.FileData\n   intake.readers.datatypes.GDALRasterFile\n   intake.readers.datatypes.GDALVectorFile\n   intake.readers.datatypes.GRIB2\n   intake.readers.datatypes.GeoJSON\n   intake.readers.datatypes.GeoPackage\n   intake.readers.datatypes.HDF5\n   intake.readers.datatypes.Handle\n   intake.readers.datatypes.HuggingfaceDataset\n   intake.readers.datatypes.IcebergDataset\n   intake.readers.datatypes.JPEG\n   intake.readers.datatypes.JSONFile\n   intake.readers.datatypes.KerasModel\n   intake.readers.datatypes.Literal\n   intake.readers.datatypes.MatlabArray\n   intake.readers.datatypes.MatrixMarket\n   intake.readers.datatypes.NetCDF3\n   intake.readers.datatypes.Nifti\n   intake.readers.datatypes.NumpyFile\n   intake.readers.datatypes.ORC\n   intake.readers.datatypes.OpenDAP\n   intake.readers.datatypes.PNG\n   intake.readers.datatypes.Parquet\n   intake.readers.datatypes.PickleFile\n   intake.readers.datatypes.Prometheus\n   intake.readers.datatypes.PythonSourceCode\n   intake.readers.datatypes.RawBuffer\n   intake.readers.datatypes.SKLearnPickleModel\n   intake.readers.datatypes.SQLQuery\n   intake.readers.datatypes.SQLite\n   intake.readers.datatypes.STACJSON\n   intake.readers.datatypes.Service\n   intake.readers.datatypes.Shapefile\n   intake.readers.datatypes.TFRecord\n   intake.readers.datatypes.THREDDSCatalog\n   intake.readers.datatypes.TIFF\n   intake.readers.datatypes.Text\n   intake.readers.datatypes.TileDB\n   intake.readers.datatypes.TiledDataset\n   intake.readers.datatypes.TiledService\n   intake.readers.datatypes.WAV\n   intake.readers.datatypes.XML\n   intake.readers.datatypes.YAMLFile\n   intake.readers.datatypes.Zarr\n\n.. _reader:\n\nReader Classes\n--------------\n\nIncludes readers, transformers, converters and output classes.\n\n.. autosummary::\n   intake.readers.catalogs.EarthdataCatalogReader\n   intake.readers.catalogs.EarthdataReader\n   intake.readers.catalogs.HuggingfaceHubCatalog\n   intake.readers.catalogs.SKLearnExamplesCatalog\n   intake.readers.catalogs.SQLAlchemyCatalog\n   intake.readers.catalogs.STACIndex\n   intake.readers.catalogs.StacCatalogReader\n   intake.readers.catalogs.StacSearch\n   intake.readers.catalogs.StackBands\n   intake.readers.catalogs.THREDDSCatalogReader\n   intake.readers.catalogs.TensorFlowDatasetsCatalog\n   intake.readers.catalogs.TiledCatalogReader\n   intake.readers.catalogs.TorchDatasetsCatalog\n   intake.readers.convert.ASDFToNumpy\n   intake.readers.convert.BaseConverter\n   intake.readers.convert.DaskArrayToTileDB\n   intake.readers.convert.DaskDFToPandas\n   intake.readers.convert.DaskToRay\n   intake.readers.convert.DeltaQueryToDask\n   intake.readers.convert.DeltaQueryToDaskGeopandas\n   intake.readers.convert.DicomToNumpy\n   intake.readers.convert.DuckToPandas\n   intake.readers.convert.FITSToNumpy\n   intake.readers.convert.GenericFunc\n   intake.readers.convert.HuggingfaceToRay\n   intake.readers.convert.NibabelToNumpy\n   intake.readers.convert.NumpyToTileDB\n   intake.readers.convert.PandasToGeopandas\n   intake.readers.convert.PandasToMetagraph\n   intake.readers.convert.PandasToPolars\n   intake.readers.convert.PandasToRay\n   intake.readers.convert.Pipeline\n   intake.readers.convert.PolarsEager\n   intake.readers.convert.PolarsLazy\n   intake.readers.convert.PolarsToPandas\n   intake.readers.convert.RayToDask\n   intake.readers.convert.RayToPandas\n   intake.readers.convert.RayToSpark\n   intake.readers.convert.SparkDFToRay\n   intake.readers.convert.TileDBToNumpy\n   intake.readers.convert.TileDBToPandas\n   intake.readers.convert.TiledNodeToCatalog\n   intake.readers.convert.TiledSearch\n   intake.readers.convert.ToHvPlot\n   intake.readers.convert.TorchToRay\n   intake.readers.output.CatalogToJson\n   intake.readers.output.DaskArrayToZarr\n   intake.readers.output.GeopandasToFile\n   intake.readers.output.MatplotlibToPNG\n   intake.readers.output.NumpyToNumpyFile\n   intake.readers.output.PandasToCSV\n   intake.readers.output.PandasToFeather\n   intake.readers.output.PandasToHDF5\n   intake.readers.output.PandasToParquet\n   intake.readers.output.Repr\n   intake.readers.output.ToMatplotlib\n   intake.readers.output.XarrayToNetCDF\n   intake.readers.output.XarrayToZarr\n   intake.readers.readers.ASDFReader\n   intake.readers.readers.Awkward\n   intake.readers.readers.AwkwardAVRO\n   intake.readers.readers.AwkwardJSON\n   intake.readers.readers.AwkwardParquet\n   intake.readers.readers.Condition\n   intake.readers.readers.CupyNumpyReader\n   intake.readers.readers.CupyTextReader\n   intake.readers.readers.DaskAwkwardJSON\n   intake.readers.readers.DaskAwkwardParquet\n   intake.readers.readers.DaskCSV\n   intake.readers.readers.DaskDF\n   intake.readers.readers.DaskDeltaLake\n   intake.readers.readers.DaskHDF\n   intake.readers.readers.DaskJSON\n   intake.readers.readers.DaskNPYStack\n   intake.readers.readers.DaskParquet\n   intake.readers.readers.DaskSQL\n   intake.readers.readers.DaskZarr\n   intake.readers.readers.DeltaReader\n   intake.readers.readers.DicomReader\n   intake.readers.readers.DuckCSV\n   intake.readers.readers.DuckDB\n   intake.readers.readers.DuckJSON\n   intake.readers.readers.DuckParquet\n   intake.readers.readers.DuckSQL\n   intake.readers.readers.FITSReader\n   intake.readers.readers.FileByteReader\n   intake.readers.readers.FileExistsReader\n   intake.readers.readers.FileReader\n   intake.readers.readers.GeoPandasReader\n   intake.readers.readers.GeoPandasTabular\n   intake.readers.readers.HandleToUrlReader\n   intake.readers.readers.HuggingfaceReader\n   intake.readers.readers.KerasAudio\n   intake.readers.readers.KerasImageReader\n   intake.readers.readers.KerasModelReader\n   intake.readers.readers.KerasText\n   intake.readers.readers.NibabelNiftiReader\n   intake.readers.readers.NumpyReader\n   intake.readers.readers.NumpyText\n   intake.readers.readers.NumpyZarr\n   intake.readers.readers.Pandas\n   intake.readers.readers.PandasCSV\n   intake.readers.readers.PandasExcel\n   intake.readers.readers.PandasFeather\n   intake.readers.readers.PandasHDF5\n   intake.readers.readers.PandasORC\n   intake.readers.readers.PandasParquet\n   intake.readers.readers.PandasSQLAlchemy\n   intake.readers.readers.Polars\n   intake.readers.readers.PolarsAvro\n   intake.readers.readers.PolarsCSV\n   intake.readers.readers.PolarsDeltaLake\n   intake.readers.readers.PolarsExcel\n   intake.readers.readers.PolarsFeather\n   intake.readers.readers.PolarsIceberg\n   intake.readers.readers.PolarsJSON\n   intake.readers.readers.PolarsParquet\n   intake.readers.readers.PrometheusMetricReader\n   intake.readers.readers.PythonModule\n   intake.readers.readers.RasterIOXarrayReader\n   intake.readers.readers.Ray\n   intake.readers.readers.RayBinary\n   intake.readers.readers.RayCSV\n   intake.readers.readers.RayDeltaLake\n   intake.readers.readers.RayJSON\n   intake.readers.readers.RayParquet\n   intake.readers.readers.RayText\n   intake.readers.readers.Retry\n   intake.readers.readers.SKImageReader\n   intake.readers.readers.SKLearnExampleReader\n   intake.readers.readers.SKLearnModelReader\n   intake.readers.readers.ScipyMatlabReader\n   intake.readers.readers.ScipyMatrixMarketReader\n   intake.readers.readers.SparkCSV\n   intake.readers.readers.SparkDataFrame\n   intake.readers.readers.SparkDeltaLake\n   intake.readers.readers.SparkParquet\n   intake.readers.readers.SparkText\n   intake.readers.readers.TFORC\n   intake.readers.readers.TFPublicDataset\n   intake.readers.readers.TFRecordReader\n   intake.readers.readers.TFSQL\n   intake.readers.readers.TFTextreader\n   intake.readers.readers.TileDBDaskReader\n   intake.readers.readers.TileDBReader\n   intake.readers.readers.TiledClient\n   intake.readers.readers.TiledNode\n   intake.readers.readers.TorchDataset\n   intake.readers.readers.XArrayDatasetReader\n   intake.readers.readers.YAMLCatalogReader\n   intake.readers.transform.DataFrameColumns\n   intake.readers.transform.GetItem\n   intake.readers.transform.Method\n   intake.readers.transform.PysparkColumns\n   intake.readers.transform.THREDDSCatToMergedDataset\n   intake.readers.transform.XarraySel\n"
  },
  {
    "path": "docs/source/api_base.rst",
    "content": "Base Classes\n------------\n\nThis is a reference API class listing, useful mainly for developers.\n\n.. autosummary::\n   intake.source.base.DataSourceBase\n   intake.source.base.DataSource\n   intake.catalog.Catalog\n   intake.catalog.entry.CatalogEntry\n   intake.catalog.local.UserParameter\n   intake.source.derived.AliasSource\n   intake.source.base.Schema\n\n.. autoclass:: intake.source.base.DataSource\n   :members:\n\n   .. attribute:: plot\n\n      Accessor for HVPlot methods.  See :doc:`plotting` for more details.\n\n.. autoclass:: intake.catalog.Catalog\n   :members:\n\n.. autoclass:: intake.catalog.entry.CatalogEntry\n   :members:\n\n.. autoclass:: intake.catalog.local.UserParameter\n   :members:\n\n.. autoclass:: intake.source.derived.AliasSource\n   :members:\n\n.. autoclass:: intake.source.base.Schema\n   :members:\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/api_other.rst",
    "content": "Other Classes\n=============\n\nGUI\n---\n\n.. autosummary::\n\n   intake.interface.base.Base\n   intake.interface.base.BaseSelector\n   intake.interface.base.BaseView\n   intake.interface.catalog.add.FileSelector\n   intake.interface.catalog.add.URLSelector\n   intake.interface.catalog.add.CatAdder\n   intake.interface.catalog.search.Search\n   intake.interface.source.defined_plots.Plots\n\n.. autoclass:: intake.interface.base.Base\n   :members:\n\n.. autoclass:: intake.interface.base.BaseSelector\n   :members:\n\n.. autoclass:: intake.interface.base.BaseView\n   :members:\n\n.. autoclass:: intake.interface.catalog.add.FileSelector\n   :members:\n\n.. autoclass:: intake.interface.catalog.add.URLSelector\n   :members:\n\n.. autoclass:: intake.interface.catalog.add.CatAdder\n   :members:\n\n.. autoclass:: intake.interface.catalog.search.Search\n   :members:\n\n.. autoclass:: intake.interface.source.defined_plots.Plots\n   :members:\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/api_user.rst",
    "content": "End User\n--------\n\nThese are reference class and function definitions likely to be useful to everyone.\n\n.. autosummary::\n   intake.open_catalog\n   intake.registry\n   intake.register_driver\n   intake.unregister_driver\n   intake.source.csv.CSVSource\n   intake.source.textfiles.TextFilesSource\n   intake.source.jsonfiles.JSONFileSource\n   intake.source.jsonfiles.JSONLinesFileSource\n   intake.source.npy.NPySource\n   intake.source.zarr.ZarrArraySource\n   intake.catalog.local.YAMLFileCatalog\n   intake.catalog.local.YAMLFilesCatalog\n   intake.catalog.zarr.ZarrGroupCatalog\n\n.. autofunction::\n   intake.open_catalog\n\n.. attribute:: intake.registry\n\n   Mapping from plugin names to the DataSource classes that implement them. These are the\n   names that should appear in the ``driver:`` key of each source definition in a\n   catalog. See :doc:`plugin-directory` for more details.\n\n.. attribute:: intake.open_\n\n   Set of functions, one for each plugin, for direct opening of a data source. The names are derived from the names of\n   the plugins in the registry at import time.\n\n\nSource classes\n''''''''''''''\n\n.. autoclass:: intake.source.csv.CSVSource\n   :members: __init__, discover, read_partition, read, to_dask\n\n.. autoclass:: intake.source.zarr.ZarrArraySource\n   :members: __init__, discover, read_partition, read, to_dask\n\n.. autoclass:: intake.source.textfiles.TextFilesSource\n   :members: __init__, discover, read_partition, read, to_dask\n\n.. autoclass:: intake.source.jsonfiles.JSONFileSource\n   :members: __init__, discover, read\n\n.. autoclass:: intake.source.jsonfiles.JSONLinesFileSource\n   :members: __init__, discover, read, head\n\n.. autoclass:: intake.source.npy.NPySource\n   :members: __init__, discover, read_partition, read, to_dask\n\n.. autoclass:: intake.catalog.local.YAMLFileCatalog\n   :members: __init__, reload, search, walk\n\n.. autoclass:: intake.catalog.local.YAMLFilesCatalog\n   :members: __init__, reload, search, walk\n\n.. autoclass:: intake.catalog.zarr.ZarrGroupCatalog\n   :members: __init__, reload, search, walk, to_zarr\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/catalog.rst",
    "content": "Catalogs\n========\n\nData catalogs provide an abstraction that allows you to externally define, and optionally share, descriptions of\ndatasets, called *catalog entries*.  A catalog entry for a dataset includes information like:\n\n* The name of the Intake driver that can load the data\n* Arguments to the ``__init__()`` method of the driver\n* Metadata provided by the catalog author (such as field descriptions and types, or data provenance)\n\nIn addition, Intake allows the arguments to data sources to be templated, with the variables explicitly\nexpressed as \"user parameters\". The given arguments are rendered using ``jinja2``, the\nvalues of named user parameters, and any overrides.\nThe parameters are also offer validation of the allowed types and values, for both the template\nvalues and the final arguments passed to the data source. The parameters are named and described, to\nindicate to the user what they are for. This kind of structure can be used to, for example,\nchoose between two parts of a given data source, like \"latest\" and \"stable\", see the `entry1_part` entry in\nthe example below.\n\nThe user of the catalog can always override any template or argument value at the time\nthat they access a give source.\n\nThe Catalog class\n-----------------\n\nIn Intake, a ``Catalog`` instance is an object with one or more named entries.\nThe entries might be read from a static file (e.g., YAML, see the next section), from\nan Intake server or from any other data service that has a driver. Drivers which\ncreate catalogs are ordinary DataSource classes, except that they have the container\ntype \"catalog\", and do not return data products via the ``read()`` method.\n\nFor example, you might choose to instantiate the base class and fill in some entries\nexplicitly in your code\n\n.. code-block:: python\n\n    from intake.catalog import Catalog\n    from intake.catalog.local import LocalCatalogEntry\n    mycat = Catalog.from_dict({\n        'source1': LocalCatalogEntry(name, description, driver, args=...),\n        ...\n        })\n\nAlternatively, subclasses of ``Catalog`` can define how entries are created from\nwhichever file format or service they interact with, examples including ``RemoteCatalog``\nand `SQLCatalog`_. These generate entries based on their respective targets; some\nprovide advanced search capabilities executed on the server.\n\n.. _SQLCatalog: https://intake-sql.readthedocs.io/en/latest/api.html#intake_sql.SQLCatalog\n\n\nYAML Format\n-----------\n\nIntake catalogs can most simply be described with YAML files. This is very common\nin the tutorials and this documentation, because it simple to understand, but demonstrate\nthe many features of Intake. Note that YAML files are also the easiest way to share\na catalog, simply by copying to a publicly-available location such as a cloud storage\nbucket.\nHere is an example:\n\n.. code-block:: yaml\n\n    metadata:\n      version: 1\n      parameters:\n        file_name:\n          type: str\n          description: default file name for child entries\n          default: example_file_name\n    sources:\n      example:\n        description: test\n        driver: random\n        args: {}\n\n      entry1_full:\n        description: entry1 full\n        metadata:\n          foo: 'bar'\n          bar: [1, 2, 3]\n        driver: csv\n        args: # passed to the open() method\n          urlpath: '{{ CATALOG_DIR }}/entry1_*.csv'\n\n      entry1_part:\n        description: entry1 part\n        parameters: # User parameters\n          part:\n            description: section of the data\n            type: str\n            default: \"stable\"\n            allowed: [\"latest\", \"stable\"]\n        driver: csv\n        args:\n          urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n\n      entry2:\n        description: entry2\n        driver: csv\n        args:\n          # file_name parameter will be inherited from file-level parameters, so will\n          # default to \"example_file_name\"\n          urlpath: '{{ CATALOG_DIR }}/entry2/{{ file_name }}.csv`\n\n\nMetadata\n''''''''\n\nArbitrary extra descriptive information can go into the metadata section. Some fields will be\nclaimed for internal use and some fields may be restricted to local reading; but for now the only\nfield that is expected is ``version``, which will be updated when a breaking change is made to the\nfile format. Any catalog will have ``.metadata`` and ``.version`` attributes available.\n\nNote that each source also has its own metadata.\n\nThe metadata section an also contain ``parameters`` which will be inherited by the sources in\nthe file (note that these sources can augment these parameters, or override them with their own\nparameters).\n\nExtra drivers\n'''''''''''''\n\nThe ``driver:`` entry of a data source specification can be a driver name, as has been shown in the examples so far.\nIt can also be an absolute class path to use for the data source, in which case there will be no ambiguity about how\nto load the data. That is the the preferred way to be explicit, when the driver name alone is not enough\n(see `Driver Selection`_, below).\n\n.. code-block:: yaml\n\n    plugins:\n      source:\n        - module: intake.catalog.tests.example1_source\n    sources:\n      ...\n\nHowever, you do not, in general, need to do this, since the ``driver:`` field of\neach source can also explicitly refer to the plugin class.\n\nSources\n'''''''\n\nThe majority of a catalog file is composed of data sources, which are named data sets that can be loaded for the user.\nCatalog authors describe the contents of data set, how to load it, and optionally offer some customization of the\nreturned data.  Each data source has several attributes:\n\n- ``name``: The canonical name of the source.  Best practice is to compose source names from valid Python identifiers.\n  This allows Intake to support things like tab completion of data source names on catalog objects.\n  For example, ``monthly_downloads`` is a good source\n  name.\n- ``description``: Human readable description of the source.  To help catalog browsing tools, the description should be\n  Markdown.\n\n- ``driver``: Name of the Intake :term:`Driver` to use with this source.  Must either already be installed in the current\n  Python environment (i.e. with conda or pip) or loaded in the ``plugin`` section of the file. Can be a simple\n  driver name like \"csv\" or the full path to the implementation class like \"package.module.Class\".\n\n- ``args``: Keyword arguments to the init method of the driver.  Arguments may use template expansion.\n\n- ``metadata``: Any metadata keys that should be attached to the data source when opened.  These will be supplemented\n  by additional metadata provided by the driver.  Catalog authors can use whatever key names they would like, with the\n  exception that keys starting with a leading underscore are reserved for future internal use by Intake.\n\n- ``direct_access``: Control whether the data is directly accessed by the client, or proxied through a catalog server.\n  See :ref:`remote-catalogs` for more details.\n\n- ``parameters``: A dictionary of data source parameters.  See below for more details.\n\nCaching Source Files Locally\n''''''''''''''''''''''''''''\n\n*This method of defining the cache  with a dedicated block is deprecated, see the Remote Access\nsection, below*\n\nTo enable caching on the first read of remote data source files, add the ``cache`` section with the\nfollowing attributes:\n\n- ``argkey``: The args section key which contains the URL(s) of the data to be cached.\n- ``type``: One of the keys in the cache registry [`intake.source.cache.registry`], referring to an implementation of caching behaviour. The default is \"file\" for the caching of one or more files.\n\nExample:\n\n.. code-block:: yaml\n\n  test_cache:\n    description: cache a csv file from the local filesystem\n    driver: csv\n    cache:\n      - argkey: urlpath\n        type: file\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n\nThe ``cache_dir`` defaults to ``~/.intake/cache``, and can be specified in the intake configuration\nfile or ``INTAKE_CACHE_DIR``\nenvironment variable, or at runtime using the ``\"cache_dir\"`` key of the configuration.\nThe special value ``\"catdir\"`` implies that cached files will appear in the same directory as the\ncatalog file in which the data source is defined, within a directory named \"intake_cache\". These will\nnot appear in the cache usage reported by the CLI.\n\nOptionally, the cache section can have a ``regex`` attribute, that modifies the path of the cache on\nthe disk. By default, the cache path is made by concatenating ``cache_dir``, dataset name, hash of\nthe url, and the url itself (without the protocol). ``regex`` attribute allows to remove part of the\nurl (the matching part).\n\nCaching can be disabled at runtime for all sources regardless of the catalog specification::\n\n    from intake.config import conf\n\n    conf['cache_disabled'] = True\n\nBy default, progress bars are shown during downloads if the package ``tqdm`` is\navailable, but this can be disabled (e.g., for\nconsoles that don't support complex text) with\n\n    conf['cache_download_progress'] = False\n\nor, equivalently, the environment parameter ``INTAKE_CACHE_PROGRESS``.\n\n\nThe \"types\" of caching are that supported are listed in ``intake.source.cache.registry``, see\nthe docstrings of each for specific parameters that should appear in the cache block.\n\n\nIt is possible to work with compressed source files by setting ``type: compression`` in the cache specification.\nBy default the compression type is inferred from the file extension, otherwise it can be set by assigning the ``decomp``\nvariable to any of the options listed in ``intake.source.decompress.decomp``.\nThis will extract all the file(s) in the compressed file referenced by urlpath and store them in the cache directory.\n\nIn cases where miscellaneous files are present in the compressed file, a ``regex_filter`` parameter can be used. Only\nthe extracted filenames that match the pattern will be loaded. The cache path is appended to the filename so it is\nnecessary to include a wildcard to the beginning of the pattern.\n\nExample:\n\n.. code-block:: yaml\n\n  test_compressed:\n    driver: csv\n    args:\n      urlpath: 'compressed_file.tar.gz'\n    cache:\n      - type: compressed\n        decomp: tgz\n        argkey: urlpath\n        regex_filter: '.*data.csv'\n\nTemplating\n----------\n\nIntake catalog files support Jinja2 templating for driver arguments. Any occurrence of\na substring like ``{{field}}`` will be replaced by the value of the user parameters with\nthat same name, or the value explicitly provided by the user. For how to specify these user parameters,\nsee the next section.\n\nSome additional values are available for templating. The following is always available:\n``CATALOG_DIR``, the full path to the directory containing the YAML catalog file.  This is especially useful\nfor constructing paths relative to the catalog directory to locate data files and custom drivers.\nFor example, the search for CSV files for the two \"entry1\" blocks, above, will happen in the same directory as\nwhere the catalog file was found.\n\nThe following functions `may` be available. Since these execute code, the user of a catalog may decide\nwhether they trust those functions or not.\n\n- ``env(\"USER\")``: look in the set environment variables for the named variable\n- ``client_env(\"USER\")``: exactly the same, except that when using a client-server topology, the\n  value will come from the environment of the client.\n- ``shell(\"get_login thisuser -t\")``: execute the command, and use the output as the value. The\n  output will be trimmed of any trailing whitespace.\n- ``client_shell(\"get_login thisuser -t\")``: exactly the same, except that when using a client-server\n  topology, the value will come from the system of the client.\n\nThe reason for the \"client\" versions of the functions is to prevent leakage of potentially sensitive\ninformation between client and server by controlling where lookups happen. When working without a server,\nonly the ones without \"client\" are used.\n\nAn example:\n\n.. code-block:: yaml\n\n    sources:\n      personal_source:\n        description: This source needs your username\n        args:\n          url: \"http://server:port/user/{{env(USER)}}\"\n\nHere, if the user is named \"blogs\", the ``url`` argument will resolve to\n``\"http://server:port/user/blogs\"``; if the environment variable is not defined, it will\nresolve to ``\"http://server:port/user/\"``\n\n.. _paramdefs:\n\nParameter Definition\n--------------------\n\nSource parameters\n'''''''''''''''''\n\nA source definition can contain a \"parameters\" block.\nExpressed in YAML, a parameter may look as follows:\n\n.. code-block:: yaml\n\n    parameters:\n      name:\n        description: name to use  # human-readable text for what this parameter means\n        type: str  # one of bool, str, int, float, list[str | int | float], datetime, mlist\n        default: normal  # optional, value to assume if user does not override\n        allowed: [\"normal\", \"strange\"]  # optional, list of values that are OK, for validation\n        min: \"n\"  # optional, minimum allowed, for validation\n        max: \"t\"  # optional, maximum allowed, for validation\n\nA parameter, not to be confused with an :term:`argument`,\ncan have one of two uses:\n\n- to provide values for variables to be used in templating the arguments. *If* the pattern \"{{name}}\" exists in\n  any of the source arguments, it will be replaced by the value of the parameter. If the user provides\n  a value (e.g., ``source = cat.entry(name='something\")``), that will be used, otherwise the default value. If\n  there is no user input or default, the empty value appropriate for type is used. The ``default`` field allows\n  for the same function expansion as listed for arguments, above.\n\n- *If* an argument with the same name as the parameter exists, its value, after any templating, will be\n  coerced to the given type of the parameter and validated against the allowed/max/min. It is therefore possible\n  to use the string templating system (e.g., to get a value from the environment), but pass the final value as,\n  for example, an integer. It makes no sense to provide a default for this case (the argument already has a value),\n  but providing a default will not raise an exception.\n\n- the \"mlist\" type is special: it means that the input must be a list, whose values are chosen from the\n  allowed list. This is the only type where the parameter value is not the same type as the allowed list's\n  values, e.g., if a list of str is set for ``allowed``, a list of str must also be the final value.\n\nNote: the ``datetime`` type accepts multiple values:\nPython datetime, ISO8601 string,  Unix timestamp int, \"now\" and  \"today\".\n\nCatalog parameters\n''''''''''''''''''\n\nYou can also define user parameters at the catalog level. This applies the parameter to\nall entries within that catalog, without having to define it for each and every entry.\nFurthermore, catalogs dested within the catalog will also inherit the parameter(s).\n\nFor example, with the following spec\n\n.. code-block:: yaml\n\n    metadata:\n      version: 1\n      parameters:\n        bucket:\n          type: str\n          description: description\n          default: test_bucket\n    sources:\n      param_source:\n        driver: parquet\n        description: description\n        args:\n          urlpath: s3://{{bucket}}/file.parquet\n      subcat:\n        driver: yaml_file\n        path: \"{{CATALOG_DIR}}/other.yaml\"\n\nIf ``cat`` is the corresponsing catalog instance,\nthe URL of source ``cat.param_source`` will evaluate to \"s3://test_bucket/file.parquet\" by default, but\nthe parameter can be overridden with ``cat.param_source(bucket=\"other_bucket\")``. Also, any\nentries of ``subcat``, another catalog referenced from here, would also have the \"bucket\"-named\nparameter attached to all sources. Of course, those sources do no need to make use of the\nparameter.\n\nTo change the default, we can gerenate a new instance\n\n.. code-block:: python\n\n    cat2 = cat(bucket=\"production\")  # sets default value of \"bucket\" for cat2\n    subcat = cat.subcat(bucket=\"production\")  # sets default only for the nested catalog\n\nOf course, in these situations you can still override the value of the parameter for any\nsource, or pass explicit values for the arguments of the source, as normal.\n\nFor cases where the catalog is not defined in a YAML spec, the argument ``user_parameters``\nto the constructor takes the same form as ``parameters`` above: a dict of user parameters,\neither as ``UserParameter`` instances or as a dictionary spec for each one.\n\nTemplating parameters\n'''''''''''''''''''''\n\nTemplate functions can also be used in parameters (see `Templating`_, above), but you can use the available functions directly without the extra `{{...}}`.\n\nFor example, this catalog entry uses the ``env(\"HOME\")`` functionality as described to set a default based on the user's home directory.\n\n.. code-block:: yaml\n\n    sources:\n      variabledefault:\n        description: \"This entry leads to an example csv file in the user's home directory by default, but the user can pass root=\"somepath\" to override that.\"\n        driver: csv\n        args:\n          path: \"{{root}}/example.csv\"\n        parameters:\n          root:\n            description: \"root path\"\n            type: str\n            default: \"env(HOME)\"\n\n\nDriver Selection\n----------------\n\nIn some cases, it may be possible that multiple backends are capable of loading from the same data\nformat or service. Sometimes, this may mean two drivers with unique names, or a single driver\nwith a parameter to choose between the different backends.\n\nHowever, it is possible that multiple drivers for reading a particular type of data\nalso share the same driver name: for example, both the\nintake-iris and the intake-xarray packages contain drivers with the name ``\"netcdf\"``, which\nare capable of reading the same files, but with different backends. Here we will describe the\nvarious possibilities of coping with this situation. Intake's plugin system makes it easy to encode such choices.\n\nIt may be\nacceptable to use any driver which claims to handle that data type, or to give the option of\nwhich driver to use to the user, or it may be necessary to specify which precise driver(s) are\nappropriate for that particular data. Intake allows all of these possibilities, even if the\nbackend drivers require extra arguments.\n\nSpecifying a single driver explicitly, rather than using a generic name, would look like this:\n\n.. code-block:: yaml\n\n    sources:\n      example:\n        description: test\n        driver: package.module.PluginClass\n        args: {}\n\nIt is also possible to describe a list of drivers with the same syntax. The first one\nfound will be the one used. Note that the class imports will only happen at data source\ninstantiation, i.e., when the entry is selected from the catalog.\n\n.. code-block:: yaml\n\n    sources:\n      example:\n        description: test\n        driver:\n          - package.module.PluginClass\n          - another_package.PluginClass2\n        args: {}\n\nThese alternative plugins can also be given data-source specific names, allowing the\nuser to choose at load time with `driver=` as a parameter. Additional arguments may also\nbe required for each option (which, as usual, may include user parameters); however, the\nsame global arguments will be passed to all of the drivers listed.\n\n\n.. code-block:: yaml\n\n    sources:\n      example:\n        description: test\n        driver:\n          first:\n            class: package.module.PluginClass\n            args:\n              specific_thing: 9\n          second:\n            class: another_package.PluginClass2\n        args: {}\n\nRemote Access\n-------------\n\n(see also :ref:`remote_data` for the implementation details)\n\nMany drivers support reading directly from remote data sources such as HTTP, S3 or GCS. In these cases,\nthe path to read from is usually given with a protocol prefix such as ``gcs://``. Additional dependencies\nwill typically be required (``requests``, ``s3fs``, ``gcsfs``, etc.), any data package\nshould specify these.  Further parameters\nmay be necessary for communicating with the storage backend and, by convention, the driver should take\na parameter ``storage_options`` containing arguments to pass to the backend. Some\nremote backends may also make use of environment variables or config files to\ndetermine thier default behaviour.\n\nThe special template variable \"CATALOG_DIR\" may be used to construct relative URLs in the arguments to\na source. In such cases, if the filesystem used to load that catalog contained arguments, then\nthe ``storage_options`` of that file system will be extracted and passed to the source. Therefore, all\nsources which can accept general URLs (beyond just local paths) must make sure to accept this\nargument.\n\nAs an example of using ``storage_options``, the following\ntwo sources would allow for reading CSV data from S3 and GCS backends without\nauthentication (anonymous access), respectively\n\n.. code-block:: yaml\n\n   sources:\n     s3_csv:\n       driver: csv\n       description: \"Publicly accessible CSV data on S3; requires s3fs\"\n       args:\n         urlpath: s3://bucket/path/*.csv\n         storage_options:\n           anon: true\n     gcs_csv:\n       driver: csv\n       description: \"Publicly accessible CSV data on GCS; requires gcsfs\"\n       args:\n         urlpath: gcs://bucket/path/*.csv\n         storage_options:\n           token: \"anon\"\n\n.. _caching:\n\n**Using S3 Profiles**\n\nAn AWS profile may be specified as an argument under ``storage_options`` via the following format:\n\n.. code-block:: yaml\n\n      args:\n        urlpath: s3://bucket/path/*.csv\n        storage_options:\n          profile: aws-profile-name\n\n\nCaching\n'''''''\n\nURLs interpreted by ``fsspec`` offer `automatic caching`_. For example, to enable\nfile-based caching for the first source above, you can do:\n\n.. code-block:: yaml\n\n   sources:\n     s3_csv:\n       driver: csv\n       description: \"Publicly accessible CSV data on S3; requires s3fs\"\n       args:\n         urlpath: simplecache::s3://bucket/path/*.csv\n         storage_options:\n           s3:\n             anon: true\n\nHere we have added the \"simplecache\" to the URL (this caching backend does not store any\nmetadata about the cached file) and specified that the \"anon\" parameter is\nmeant as an argument to s3, not to the caching mechanism. As each file in\ns3 is accessed, it will first be downloaded and then the local version\nused instead.\n\n.. _automatic caching: https://filesystem-spec.readthedocs.io/en/latest/features.html#caching-files-locally\n\nYou can tailor how the caching works. In particular the location of the local\nstorage can be set with the ``cache_storage`` parameter (under the \"simplecache\"\ngroup of storage_options, of course) - otherwise they are stored in a temporary\nlocation only for the duration of the current python session. The cache location\nis particularly useful in conjunction with an environment variable, or\nrelative to \"{{CATALOG_DIR}}\", wherever the catalog was loaded from.\n\nPlease see the ``fsspec`` documentation for the full set of cache types and their\nvarious options.\n\nLocal Catalogs\n--------------\n\nA Catalog can be loaded from a YAML file on the local filesystem by creating a Catalog object:\n\n.. code-block:: python\n\n    from intake import open_catalog\n    cat = open_catalog('catalog.yaml')\n\nThen sources can be listed:\n\n.. code-block:: python\n\n    list(cat)\n\nand data sources are loaded via their name:\n\n.. code-block:: python\n\n    data = cat.entry_part1\n\nand you can optionally configure new instances of the source to define user parameters\nor override arguments by calling either of:\n\n.. code-block:: python\n\n    data = cat.entry_part1.configure_new(part='1')\n    data = cat.entry_part1(part='1')  # this is a convenience shorthand\n\nIntake also supports loading a catalog from all of the files ending in ``.yml`` and ``.yaml`` in a directory, or by using an\nexplicit glob-string. Note that the URL provided may refer to a remote storage systems by passing a protocol\nspecifier such as ``s3://``, ``gcs://``.:\n\n.. code-block:: python\n\n    cat = open_catalog('/research/my_project/catalog.d/')\n\nIntake Catalog objects will automatically reload changes or new additions to catalog files and directories on disk.\nThese changes will not affect already-opened data sources.\n\n\nCatalog Nesting\n---------------\n\nA catalog is just another type of data source for Intake. For example, you can print a YAML\nspecification corresponding to a catalog as follows:\n\n.. code-block:: python\n\n    cat = intake.open_catalog('cat.yaml')\n    print(cat.yaml())\n\nresults in:\n\n.. code-block:: yaml\n\n    sources:\n      cat:\n        args:\n          path: cat.yaml\n        description: ''\n        driver: intake.catalog.local.YAMLFileCatalog\n        metadata: {}\n\nThe `point` here, is that this can be included in another catalog.\n(It would, of course, be better to include a description and the full path of the catalog\nfile here.)\nIf the entry above were saved to another file, \"root.yaml\", and the\noriginal catalog contained an entry, ``data``, you could access it as:\n\n.. code-block:: python\n\n    root = intake.open_catalog('root.yaml')\n    root.cat.data\n\n\n\nIt is, therefore, possible to build up a hierarchy of catalogs referencing each other.\nThese can, of course, include remote URLs and indeed catalog sources other than simple files (all the\ntables on a SQL server, for instance). Plus, since the argument and parameter system also\napplies to entries such as the example above, it would be possible to give the user a runtime\nchoice of multiple catalogs to pick between, or have this decision depend on an environment\nvariable.\n\n.. _remote-catalogs:\n\nServer Catalogs\n---------------\n\nIntake also includes a server which can share an Intake catalog over HTTP\n(or HTTPS with the help of a TLS-enabled reverse proxy).  From the user perspective, remote catalogs function\nidentically to local catalogs:\n\n.. code-block:: python\n\n    cat = open_catalog('intake://catalog1:5000')\n    list(cat)\n\nThe difference is that operations on the catalog translate to requests sent to the catalog server.  Catalog servers\nprovide access to data sources in one of two modes:\n\n* Direct access: In this mode, the catalog server tells the client how to load the data, but the client uses its\n  local drivers to make the connection.  This requires the client has the required driver already installed *and*\n  has direct access to the files or data servers that the driver will connect to.\n\n* Proxied access: In this mode, the catalog server uses its local drivers to open the data source and stream the data\n  over the network to the client.  The client does not need *any* special drivers to read the data, and can read data\n  from files and data servers that it cannot access, as long as the catalog server has the required access.\n\nWhether a particular catalog entry supports direct or proxied access is determined by the ``direct_access`` option:\n\n\n- ``forbid`` (default): Force all clients to proxy data through the catalog server\n\n- ``allow``: If the client has the required driver, access the source directly, otherwise proxy the data through the\n  catalog server.\n\n- ``force``: Force all clients to access the data directly.  If they do not have the required driver, an exception will\n  be raised.\n\nNote that when the client is loading a data source via direct access, the catalog server will need to send the driver\narguments to the client.  Do not include sensitive credentials in a data source that allows direct access.\n\nClient Authorization Plugins\n''''''''''''''''''''''''''''\n\nIntake servers can check if clients are authorized to access the catalog as a whole, or individual catalog entries.\nTypically a matched pair of server-side plugin (called an \"auth plugin\") and a client-side plugin (called a \"client\nauth plugin) need to be enabled for authorization checks to work.  This feature is still in early development, but see\nmodule ``intake.auth.secret`` for a demonstration pair of server and client classes implementation auth via a shared\nsecret.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/changelog.rst",
    "content": "Changelog\n=========\n\n2.0.4\n-----\n\nReleased March 19, 2024\n\n- re-enable v1 entrypoint sources\n- expose recommend functions higher up (e.g,. intake.recommend)\n- add more geo types and readers, including pmtiles\n- add migration guide\n\n2.0.3\n-----\n\nReleased February 29, 2024\n\n- fix v1 caches\n- more docs\n\n2.0.0\n-----\n\nReleased Jan 31, 2024\n\n- complete rewrite of the package, see main docs page\n\n0.7.0\n-----\n\nReleased May 29, 2023\n\n- be able to override arguments when using a source defined in an entry-point\n- make sources usable without explicit dependence on dask: zarr, textfiles, csv\n- removed some explicit usage (but not all) of dask throughout the codebase\n- new dataframe pipeline transform source\n\n.. _v0.6.8:\n\n0.6.8\n-----\n\nReleased March 11, 2023\n\n- user parameter parsed as string before conversion to given type\n- numpy source becomes first to have read() path avoid dask\n- when registering drivers dynamically, corresponding open_* functions\n  will be created automatically (plus refactor/cleanup of the discovery code)\n- docs config and style updates; the list of plugins to automatically\n  pull in status badges\n- catalog .gui attribute will make top-level GUI instance instead of\n  cut down one-catalog version\n- pre-commit checks added and consistent code style applied\n\n\n.. _v0.6.7:\n\n0.6.7\n-----\n\nReleased February 13, 2023\n\n- server fix for upstream dask change giving newlined in report\n- editable plots, based on hvPlot's \"explorer\"\n- remove \"text\" input to YAMLFileCatalog\n- GUI bug fixes\n- allow catalog TTL as None\n\n.. _v0.6.6:\n\n0.6.6\n-----\n\nReleased on August 26, 2022.\n\n- Fixed bug in json and jsonl driver.\n- Ensure description is retained in the catalog.\n- Fix cache issue when running inside a notebook.\n- Add templating parameters.\n- Plotting api keeps hold of hvplot calls to allow other plots to be made.\n- docs updates\n- fix urljoin for server via proxy\n\n.. _v0.6.5:\n\n0.6.5\n-----\n\nReleased on January 9, 2022.\n\n- Added link to intake-google-analytics.\n- Add tiled driver.\n- Add json and jsonl drivers.\n- Allow parameters to be passed through catalog.\n- Add mlist type which allows inputs from a known list of values.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/code-of-conduct.rst",
    "content": "Code of Conduct\n===============\n\nAll participants in the fsspec community are expected to adhere to a Code of Conduct.\n\nAs contributors and maintainers of this project, and in the interest of\nfostering an open and welcoming community, we pledge to respect all people who\ncontribute through reporting issues, posting feature requests, updating\ndocumentation, submitting pull requests or patches, and other activities.\n\nWe are committed to making participation in this project a harassment-free\nexperience for everyone, treating everyone as unique humans deserving of\nrespect.\n\nExamples of unacceptable behaviour by participants include:\n\n- The use of sexualized language or imagery\n- Personal attacks\n- Trolling or insulting/derogatory comments\n- Public or private harassment\n- Publishing other's private information, such as physical or electronic\n  addresses, without explicit permission\n- Other unethical or unprofessional conduct\n\nProject maintainers have the right and responsibility to remove, edit, or\nreject comments, commits, code, wiki edits, issues, and other contributions\nthat are not aligned to this Code of Conduct, or to ban temporarily or\npermanently any contributor for other behaviours that they deem inappropriate,\nthreatening, offensive, or harmful.\n\nBy adopting this Code of Conduct, project maintainers commit themselves\nto fairly and consistently applying these principles to every aspect of\nmanaging this project. Project maintainers who do not follow or enforce\nthe Code of Conduct may be permanently removed from the project team.\n\nThis code of conduct applies both within project spaces and in public\nspaces when an individual is representing the project or its community.\n\nIf you feel the code of conduct has been violated, please report the\nincident to the fsspec core team.\n\nReporting\n---------\n\nIf you believe someone is violating theCode of Conduct we ask that you report it\nto the  Project by emailing community@anaconda.com. All reports will be kept\nconfidential. In some cases we may determine that a public statement will need\nto be made. If that's the case, the identities of all victims and reporters\nwill remain confidential unless those individuals instruct us otherwise.\nIf you believe anyone is in physical danger, please notify appropriate law\nenforcement first.\n\nIn your report please include:\n\n- Your contact info\n- Names (real, nicknames, or pseudonyms) of any individuals involved.\n  If there were other witnesses besides you, please try to include them as well.\n- When and where the incident occurred. Please be as specific as possible.\n- Your account of what occurred. If there is a publicly available record\n  please include a link.\n- Any extra context you believe existed for the incident.\n- If you believe this incident is ongoing.\n- If you believe any member of the core team has a conflict of interest\n  in adjudicating the incident.\n- What, if any, corrective response you believe would be appropriate.\n- Any other information you believe we should have.\n\nCore team members are obligated to maintain confidentiality with regard\nto the reporter and details of an incident.\n\nWhat happens next?\n~~~~~~~~~~~~~~~~~~\n\nYou will receive an email acknowledging receipt of your complaint.\nThe core team will immediately meet to review the incident and determine:\n\n- What happened.\n- Whether this event constitutes a code of conduct violation.\n- Who the bad actor was.\n- Whether this is an ongoing situation, or if there is a threat to anyone's\n  physical safety.\n- If this is determined to be an ongoing incident or a threat to physical safety,\n  the working groups' immediate priority will be to protect everyone involved.\n\nIf a member of the core team is one of the named parties, they will not be\nincluded in any discussions, and will not be provided with any confidential\ndetails from the reporter.\n\nIf anyone on the core team believes they have a conflict of interest in\nadjudicating on a reported issue, they will inform the other core team\nmembers, and exempt themselves from any discussion about the issue.\nFollowing this declaration, they will not be provided with any confidential\ndetails from the reporter.\n\nOnce the working group has a complete account of the events they will make a\ndecision as to how to response. Responses may include:\n\n- Nothing (if we determine no violation occurred).\n- A private reprimand from the working group to the individual(s) involved.\n- A public reprimand.\n- An imposed vacation\n- A permanent or temporary ban from some or all spaces (GitHub repositories, etc.)\n- A request for a public or private apology.\n\nWe'll respond within one week to the person who filed the report with either a\nresolution or an explanation of why the situation is not yet resolved.\n\nOnce we've determined our final action, we'll contact the original reporter\nto let them know what action (if any) we'll be taking. We'll take into account\nfeedback from the reporter on the appropriateness of our response, but we\ndon't guarantee we'll act on it.\n\nAcknowledgement\n---------------\n\nThis CoC is modified from the one by `BeeWare`_, which in turn refers to\nthe `Contributor Covenant`_ and the `Django`_ project.\n\n.. _BeeWare: https://beeware.org/community/behavior/code-of-conduct/\n.. _Contributor Covenant: https://www.contributor-covenant.org/version/1/3/0/code-of-conduct/\n.. _Django: https://www.djangoproject.com/conduct/reporting/\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/community.rst",
    "content": "Community\n=========\n\nIntake is used and developed by individuals at a variety of institutions.  It\nis open source (`license <https://github.com/intake/intake/blob/master/LICENSE>`_)\nand sits within the broader Python numeric ecosystem commonly referred to as\nPyData or SciPy.\n\nDiscussion\n----------\n\nConversation happens in the following places:\n\n1.  **Usage questions** are directed to `Stack Overflow with the #intake tag`_.\n    Intake developers monitor this tag.\n2.  **Bug reports and feature requests** are managed on the `GitHub issue\n    tracker`_. Individual intake plugins are managed in separate repositories\n    each with its own issue tracker. Please consult the :doc:`plugin-directory`\n    for a list of available plugins.\n3.  **Chat** occurs on at `gitter.im/ContinuumIO/intake\n    <https://gitter.im/ContinuumIO/intake>`_.  Note that\n    because gitter chat is not searchable by future users we discourage usage\n    questions and bug reports on gitter and instead ask people to use Stack\n    Overflow or GitHub.\n4.  **Monthly community meeting** happens the first Thursday of the month at\n    9:00 US Central Time. See `<https://github.com/intake/intake/issues/596>`_,\n    with a reminder sent out on the gitter channel. Strictly informal chatter.\n\n\n.. _`Stack Overflow with the #intake tag`: https://stackoverflow.com/questions/tagged/intake\n.. _`GitHub issue tracker`: https://github.com/intake/intake/issues/\n\n\nAsking for help\n---------------\n\nWe welcome usage questions and bug reports from all users, even those who are\nnew to using the project.  There are a few things you can do to improve the\nlikelihood of quickly getting a good answer.\n\n1.  **Ask questions in the right place**:  We strongly prefer the use\n    of Stack Overflow or GitHub issues over Gitter chat.  GitHub and\n    Stack Overflow are more easily searchable by future users, and therefore is more\n    efficient for everyone's time.  Gitter chat is strictly reserved for\n    developer and community discussion.\n\n    If you have a general question about how something should work or\n    want best practices then use Stack Overflow.  If you think you have found a\n    bug then use GitHub\n\n2.  **Ask only in one place**: Please restrict yourself to posting your\n    question in only one place (likely Stack Overflow or GitHub) and don't post\n    in both\n\n3.  **Create a minimal example**:  It is ideal to create `minimal, complete,\n    verifiable examples <https://stackoverflow.com/help/mcve>`_.  This\n    significantly reduces the time that answerers spend understanding your\n    situation, resulting in higher quality answers more quickly.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/conf.py",
    "content": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n#\n# intake documentation build configuration file, created by\n# sphinx-quickstart on Jan 8 09:15:00 2018.\n#\n# This file is execfile()d with the current directory set to its\n# containing dir.\n#\n# Note that not all possible configuration values are present in this\n# autogenerated file.\n#\n# All configuration values have a default; values that are commented out\n# serve to show the default.\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\nimport os\nimport sys\n\nsys.path.insert(0, os.path.abspath(\"../..\"))\nimport intake\n\n# -- General configuration ------------------------------------------------\n\n# If your documentation needs a minimal Sphinx version, state it here.\n#\n# needs_sphinx = '1.0'\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    \"sphinx.ext.intersphinx\",\n    \"sphinx.ext.autodoc\",\n    \"sphinx.ext.autosummary\",\n    \"numpydoc\",\n]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = [\"_templates\"]\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\n#\n# source_suffix = ['.rst', '.md']\nsource_suffix = \".rst\"\n\n# The master toctree document.\nmaster_doc = \"index\"\n\n# General information about the project.\nproject = \"intake\"\ncopyright = \"2018, Anaconda\"\nauthor = \"Anaconda\"\n\n# The version info for the project you're documenting, acts as replacement for\n# |version| and |release|, also used in various other places throughout the\n# built documents.\n#\n# The short X.Y version.\nversion = intake.__version__.split(\"+\")[0]\n# The full version, including alpha/beta/rc tags.\nrelease = intake.__version__\n\n# The language for content autogenerated by Sphinx. Refer to documentation\n# for a list of supported languages.\n#\n# This is also used if you do content translation via gettext catalogs.\n# Usually you set \"language\" from the command line for these cases.\nlanguage = \"en\"\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This patterns also effect to html_static_path and html_extra_path\nexclude_patterns = [\"**.ipynb_checkpoints\"]\n\n# The name of the Pygments (syntax highlighting) style to use.\npygments_style = \"sphinx\"\n\n# If true, `todo` and `todoList` produce output, else they produce nothing.\ntodo_include_todos = False\n\n\n# -- Options for HTML output ----------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\n\nhtml_theme = \"sphinx_rtd_theme\"\nhtml_favicon = \"_static/images/favicon.ico\"\n\n# on_rtd is whether we are on readthedocs.org\non_rtd = os.environ.get(\"READTHEDOCS\", None) == \"True\"\n\nif not on_rtd:\n    # only import and set the theme if we're building docs locally\n    # otherwise, readthedocs.org uses their theme by default, so no need to specify it\n    import sphinx_rtd_theme\n\n    html_theme = \"sphinx_rtd_theme\"\n\n\n# Theme options are theme-specific and customize the look and feel of a theme\n# further.  For a list of options available for each theme, see the\n# documentation.\n#\n# html_theme_options = {}\n\n# Default title is \"<project> v<release> documentation\"\nhtml_title = \"Intake documentation\"\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = [\"_static\"]\nhtml_css_files = [\n    \"css/custom.css\",\n]\n\n\n# -- Options for HTMLHelp output ------------------------------------------\n\n# Output file base name for HTML help builder.\nhtmlhelp_basename = \"intakedoc\"\n\n\n# -- Options for LaTeX output ---------------------------------------------\n\nlatex_elements = {\n    # The paper size ('letterpaper' or 'a4paper').\n    #\n    # 'papersize': 'letterpaper',\n    # The font size ('10pt', '11pt' or '12pt').\n    #\n    # 'pointsize': '10pt',\n    # Additional stuff for the LaTeX preamble.\n    #\n    # 'preamble': '',\n    # Latex figure (float) alignment\n    #\n    # 'figure_align': 'htbp',\n}\n\n# Grouping the document tree into LaTeX files. List of tuples\n# (source start file, target name, title,\n#  author, documentclass [howto, manual, or own class]).\nlatex_documents = [\n    (master_doc, \"intake.tex\", \"Intake Documentation\", \"Anaconda\", \"manual\"),\n]\n\n\n# -- Options for manual page output ---------------------------------------\n\n# One entry per manual page. List of tuples\n# (source start file, name, description, authors, manual section).\nman_pages = [(master_doc, \"intake\", \"Intake Documentation\", [author], 1)]\n\n\n# -- Options for Texinfo output -------------------------------------------\n\n# Grouping the document tree into Texinfo files. List of tuples\n# (source start file, target name, title, author,\n#  dir menu entry, description, category)\ntexinfo_documents = [\n    (\n        master_doc,\n        \"intake\",\n        \"Intake Documentation\",\n        author,\n        \"intake\",\n        \"Fast data ingestion for Python.\",\n        \"Miscellaneous\",\n    ),\n]\n\n\n# Example configuration for intersphinx: refer to the Python standard library.\nintersphinx_mapping = {\"python\": (\"https://docs.python.org/\", None)}\n\n\n# Config numpydoc\nnumpydoc_show_class_members = False\nnumpydoc_show_inherited_class_members = False\nnumpydoc_class_members_toctree = False\n"
  },
  {
    "path": "docs/source/contributing.rst",
    "content": "Contributor guide\n=================\n\n``intake`` is an open-source project (see the LICENSE). We welcome contributions from\nthe public, for code, including fixes and new features, documentation and anything else\nthat will help make this repo better. Even posting issues can be very useful, as they\nwill help others to make the necessary changes to alleviate the issue.\n\nDevelopment process\n-------------------\n\nDevelopment of ``intake`` happens on `github`_. There you will find options for\ncreating issues and commenting on existing issues and pull-requests (PRs). You may\nwish to \"watch\" the repo, to be notified of changes as they occur. You must have an\naccount on github to be able to interact here, but this is free. By default, you\nwill be notified of changes (e.g., new comments) on any issue or PR you have interacted\nwith.\n\nIn order to propose changes to the repo itself, you will need to create a PR. This is\ndone by following these steps:\n\n1. clone the repo. There are many ways to do this, but most common is the following command,\n   which will create a local directory ``intake/`` containing the code, metadata, docs and\n   version control information.\n\n.. code-block:: shell\n\n   $ git clone https://github.com/intake/intake\n\n\n2. create a fork or the repo using the github web interface. Your fork will probably live in\n   your private github namespace. Set this as a remote inside your local copy of the repo\n\n.. code-block:: shell\n\n   $ git remote add fork https://github.com/<username>/intake\n\n3. make changes locally in a new branch. First you create the branch, and then add commits\n   to that branch. Here are suggested ways to do this. Note that git is _very_ flexible and\n   there are many ways to achieve each step.\n\n.. code-block:: shell\n\n   $ git checkout -b <new branch name>\n   $ git commit -a\n\n4. When your branch is an a suitable state, `push` your work to your branch. github will prompt\n   you with a URL to create the PR, or navigate to your fork and branch in the web interface to\n   create the PR there.\n\n.. code-block:: shell\n\n   $ git push fork\n\n5. After review from a maintainer, you may wish to push more commits to your branch as required,\n   and your PR may be accepted (\"merged\") or rejected (\"closed\").\n\n.. _github: https://github.com/intake/intake\n\nGuidelines\n----------\n\nTo make contributing as smooth as possible, we recommend the following.\n\n1. Always follow the project's Code of Conduct when interacting with other humans.\n\n2. Please describe as clearly as possible what your intent is. In the case of issues, this\n   might include pasting the whole traceback your have seen following an error, listing the\n   versions of ``intake`` and its dependencies that you have installed, describing the\n   circumstances when you saw a problem or would like better behaviour. Ideally, you would\n   include code that allows maintainers to fully reproduce your steps.\n\n3. When submitting changes, make sure that you describe what the changes achieve and how.\n   Ideally, all code should be covered by tests included in the same PR, and that run to\n   completion as part of CI (see below).\n\n4. New functions and classes should include reasonable\n   `style`, e.g., appropriate labels and hierarchy, indentation and other code formatting\n   matching the rest of the docs, and docstrings and comments as appropriate. A \"precommit\"\n   set of linters is available to run against your code, and runs as part of CI to enforce\n   a minimal set of style rules. To run these locally on every commit, you can run this in the\n   repo root:\n\n.. code-block:: shell\n\n   $ pre-commit install\n\n5. Additions to the prose documentation (under docs/source/) should be included for new\n   or altered features. After the initial full release, we will be maintaining a changelog.\n\nTesting\n-------\n\nThis repo uses ``pytest`` for testing. You can install test dependencies, for example with\nthis command run in the repo root. There are many optional dependencies for specific tests,\nand we recommend that you use ``pytest.importorskip`` to tests that need these or additional\npackages, so that they will not fail for developers without those dependencies. **Do**, however,\nedit one or more files in scripts/ci/, to ensure that added tests will execute in at least one\nof the CI runs.\n\nThe easiest way to boostrap a development environment in order to run tests as they will in\nCI is to use conda-env, e.g.:\n\n.. code-block:: shell\n\n   $ conda env create -y -f scripts/ci/environment-py313.yml\n   $ conda activate test_env\n\nTo run the tests:\n\n.. code-block:: shell\n\n   $ pytest -v\n\nNote that ensuring coverage is optional, but recommended.\n\nAdding docs\n-----------\n\nDocstrings, prose text and examples/tutorials are eagerly accepted! We, as coders, often\nare late to fully document our work, and all contributions are welcome. Separate instructions\ncan be found in the docs/README.md file.\n\nIn addition, full notebook examples may be added in the examples/ directory, but you\nshould be sure to add instructions on the appropriate environment or other preparation\nrequired to run them.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/data-packages.rst",
    "content": "Making Data Packages\n====================\n\nIntake can used to create :term:`Data packages`, so that you can easily distribute\nyour catalogs - others can just \"install data\". Since you may also want to distribute\ncustom catalogues, perhaps with visualisations, and driver code, packaging these things\ntogether is a great convenience. Indeed, packaging gives you the opportunity to\nversion-tag your distribution and to declare the requirements needed to be able to\nuse the data. This is a common pattern for distributing code for python and other\nlanguages, but not commonly seen for data artifacts.\n\nThe current version of Intake allows making data packages using standard python\ntools (to be installed, for example, using ``pip``).\nThe previous, now deprecated, technique is still described below, under\n:ref:`condapack` and is specific to the `conda` packaging system.\n\nPython packaging solution\n-------------------------\n\nIntake allows you to register data artifacts (catalogs and data sources) in the\nmetadata of a python package. This means, that when you install that package, intake\nwill automatically know of the registered items, and they will appear within the\n\"builtin\" catalog ``intake.cat``.\n\nHere we assume that you understand what is meant by a python package (i.e., a\nfolder containing ``__init__.py`` and other code, config and data files).\nFurthermore, you should familiarise yourself with what is required for\nbundling such a package into a *distributable* package (one with a ``setup.py``)\nby reading the `official packaging documentation`_\n\n.. _official packaging documentation: https://packaging.python.org/tutorials/packaging-projects/\n\nThe `intake examples`_ contains a full tutorial for packaging and distributing\nintake data and/or catalogs for ``pip`` and ``conda``, see the directory\n\"data_package/\".\n\n.. _intake examples: https://github.com/intake/intake-examples\n\nEntry points definition\n'''''''''''''''''''''''\n\nIntake uses the concept of `entry points` to define the entries that are defined\nby a given package. Entry points provide a mechanism to register metadata about a\npackage at install time, so that it can easily be found by other packages such as Intake.\nEntry points was originally a `separate package`_, but is included in the standard\nlibrary as of python 3.8 (you will not need to install it, as Intake requires it).\n\nAll you need to do to register an entry in ``intake.cat`` is:\n\n- define a data source somewhere in your package. This object can\n  be of any ttype that makes sense to Intake, including Catalogs, and sources\n  that have drivers defined in the very same package. Obviously, if you can have\n  catalogs, you can populate these however you wish, including with more catalogs.\n  You need not be restricted to simply loading in YAML files.\n- include a block in your call to ``setp`` in ``setup.py`` with code something like\n\n.. code-block:: python\n\n    entry_points={\n        'intake.catalogs': [\n            'sea_cat = intake_example_package:cat',\n            'sea_data = intake_example_package:data'\n        ]\n    }\n\n  Here only the lines with \"sea_cat\" and \"sea_data\" are specific to the example\n  package, the rest is required boilerplate. Each of those two lines defines a name\n  for the data entry (before the \"=\" sign) and the location to load from, in\n  module:object format.\n\n- install the package using ``pip``, ``python setup.py``, or package it for ``conda``\n\n.. _separate package: https://github.com/takluyver/entrypoints\n\nIntake's process\n''''''''''''''''\n\nWhen Intake is imported, it investigates all registered entry points with the\n``\"intake.catalogs\"`` group. It will go through and assign each name to the\ngiven location of the final object. In the above example, ``intake.cat.sea_cat``\nwould be associated with the ``cat`` object in the ``intake_example_package``\npackage, and so on.\n\nNote that Intake does **not** immediately import the given package or module, because imports\ncan sometimes be expensive, and if you have a lot of data packages, it might cause\na slow-down every time that Intake is imported. Instead, a placeholder entry is\ncreated, and whenever the entry is accessed, that's when the particular package\nwill be imported.\n\n.. code-block:: python\n\n    In [1]: import intake\n\n    In [2]: intake.cat.sea_cat  # does not import yet\n    Out[2]: <Entry containing Catalog named sea_cat>\n\n    In [3]: cat = intake.cat.sea_cat()  # imports now\n\n    In [4]: cat   # this data source happens to be a catalog\n    Out[4]: <Intake catalog: sea>\n\n(note here the parentheses - this explicitly initialises the source, and normally\nyou don't have to do this)\n\n.. _condapack:\n\nPure conda solution\n-------------------\n\nThis packaging method is deprecated, but still available.\n\nCombined with the `Conda Package Manger <https://conda.io/docs/>`_, Intake\nmakes it possible to create :term:`Data packages` which can be installed and upgraded just like\nsoftware packages.  This offers several advantages:\n\n  * Distributing Catalogs and Drivers becomes as easy as ``conda install``\n  * Data packages can be versioned, improving reproducibility in some cases\n  * Data packages can depend on the libraries required for reading\n  * Data packages can be self-describing using Intake catalog files\n  * Applications that need certain Catalogs can include data packages in their dependency list\n\nIn this tutorial, we give a walk-through to enable you to distribute any\nCatalogs to others, so that they can access the data using Intake without worrying about where it\nresides or how it should be loaded.\n\nImplementation\n''''''''''''''\n\nThe function ``intake.catalog.default.load_combo_catalog`` searches for YAML catalog files in a number\nof place at import. All entries in these catalogs are flattened and placed in the \"builtin\"\n``intake.cat``.\n\nThe places searched are:\n\n  * a platform-specific user directory as given by the `appdirs`_ package\n  * in the environment's \"/share/intake\" data directory, where the location of the current environment\n    is found from virtualenv or conda environment variables\n  * in directories listed in the \"INTAKE_PATH\" environment variable or \"catalog_path\" config parameter\n\n.. _appdirs: https://github.com/ActiveState/appdirs\n\nDefining a Package\n''''''''''''''''''\n\nThe steps involved in creating a data package are:\n\n1. Identifying a dataset, which can be accessed via a URL or included directly as one or more files in the package.\n\n2. Creating a package containing:\n\n   * an intake catalog file\n   * a ``meta.yaml`` file (description of the data, version, requirements, etc.)\n   * a script to copy the data\n\n3. Building the package using the command ``conda build``.\n\n4. Uploading the package to a package repository such as `Anaconda Cloud <https://anaconda.org>`_ or your own private\n   repository.\n\nData packages are standard conda packages that install an Intake catalog file into the user's conda environment\n(``$CONDA_PREFIX/share/intake``).  A data package does not necessarily imply there are data files inside the package.\nA data package could describe remote data sources (such as files in S3) and take up very little space on disk.\n\nThese packages are considered ``noarch`` packages, so that one package can be installed on any platform, with any\nversion of Python (or no Python at all).  The easiest way to create such a package is using a\n`conda build <https://conda.io/docs/commands/build/conda-build.html>`_ recipe.\n\nConda-build recipes are stored in a directory that contains a files like:\n\n  * ``meta.yaml`` - description of package metadata\n  * ``build.sh`` - script for building/installing package contents (on Linux/macOS)\n  * other files needed by the package (catalog files and data files for data packages)\n\nAn example that packages up data from a Github repository would look like this:\n\n.. code-block:: yaml\n\n    # meta.yaml\n    package:\n      version: '1.0.0'\n      name: 'data-us-states'\n\n    source:\n      git_rev: v1.0.0\n      git_url: https://github.com/CivilServiceUSA/us-states\n\n    build:\n      number: 0\n      noarch: generic\n\n    requirements:\n      run:\n        - intake\n      build: []\n\n    about:\n      description: Data about US states from CivilServices (https://civil.services/)\n      license: MIT\n      license_family: MIT\n      summary: Data about US states from CivilServices\n\nThe key parts of a data package recipe (different from typical conda recipes) is the ``build`` section:\n\n.. code-block:: yaml\n\n    build:\n      number: 0\n      noarch: generic\n\nThis will create a package that can be installed on any platform, regardless of the platform where the package is\nbuilt.  If you need to rebuild a package, the build number can be incremented to ensure users get the latest version when they conda update.\n\nThe corresponding ``build.sh`` file in the recipe looks like this:\n\n.. code-block:: bash\n\n    #!/bin/bash\n\n    mkdir -p $CONDA_PREFIX/share/intake/civilservices\n    cp $SRC_DIR/data/states.csv $PREFIX/share/intake/civilservices\n    cp $RECIPE_DIR/us_states.yaml $PREFIX/share/intake/\n\nThe ``$SRC_DIR`` variable refers to any source tree checked out (from Github or other service), and the\n``$RECIPE_DIR`` refers to the directory where the ``meta.yaml`` is located.\n\nFinishing out this example, the catalog file for this data source looks like this:\n\n.. code-block:: yaml\n\n    sources:\n      states:\n        description: US state information from [CivilServices](https://civil.services/)\n        driver: csv\n        args:\n          urlpath: '{{ CATALOG_DIR }}/civilservices/states.csv'\n        metadata:\n          origin_url: 'https://github.com/CivilServiceUSA/us-states/blob/v1.0.0/data/states.csv'\n\nThe ``{{ CATALOG_DIR }}`` Jinja2 variable is used to construct a path relative to where the catalog file was installed.\n\nTo build the package, you must have conda-build installed:\n\n.. code-block:: bash\n\n    conda install conda-build\n\nBuilding the package requires no special arguments:\n\n.. code-block:: bash\n\n    conda build my_recipe_dir\n\nConda-build will display the path of the built package, which you will need to upload it.\n\nIf you want your data package to be publicly available on `Anaconda Cloud <https://anaconda.org>`_, you can install\nthe anaconda-client utility:\n\n.. code-block:: bash\n\n    conda install anaconda-client\n\nThen you can register your Anaconda Cloud credentials and upload the package:\n\n.. code-block:: bash\n\n    anaconda login\n    anaconda upload /Users/intake_user/anaconda/conda-bld/noarch/data-us-states-1.0.0-0.tar.bz2\n\nBest Practices\n--------------\n\nVersioning\n''''''''''\n\n* Versions for data packages should be used to indicate changes in the data values or schema.  This allows applications\n  to easily pin to the specific data version they depend on.\n\n* Putting data files into a package ensures reproducibility by allowing a version number to be associated with files\n  on disk.  This can consume quite a bit of disk space for the user, however. Large data files are not generally\n  included in pip or conda packages so, if possible, you should reference the data assets in an external place where they\n  can be loaded.\n\nPackaging\n'''''''''\n\n* Packages that refer to remote data sources (such as databases and REST APIs) need to think about authentication.\n  Do not include authentication credentials inside a data package.  They should be obtained from the environment.\n\n* Data packages should depend on the Intake plugins required to read the data, or Intake itself.\n\n* You may well want to break any driver code code out into a separate package so that it can be updated\n  independent of the data. The data package would then depend on the driver package.\n\nNested catalogs\n'''''''''''''''\n\nAs noted above, entries will appear in the users' builtin\ncatalog as ``intake.cat.*``. In the case that the catalog has multiple entries, it may be desirable\nto put the entries below a namespace as ``intake.cat.data_package.*``. This can be achieved by having\none catalog containing the (several) data sources, with only a single top-level entry pointing to\nit. This catalog could be defined in a YAML file, created using any other catalog driver, or constructed\nin the code, e.g.:\n\n.. code-block:: python\n\n    from intake.catalog import Catalog\n    from intake.catalog.local import LocalCatalogEntry as Entry\n    cat = intake.catalog.Catalog()\n    cat._entries = {name: Entry(name, descr, driver='package.module.driver',\n                                  args={\"urlpath\": url})\n                              for name, url in my_input_list}\n\nIf your package contains many sources of different types, you may even nest the catalogs, i.e.,\nhave a top-level whose contents are also catalogs.\n\n.. code-block:: python\n\n    e = Entry('first_cat', 'sample', driver='catalog')\n    e._default_source = cat\n    top_level = Catalog()\n    top_level._entries = {'fist_cat': e, ...}\n\nwhere your entry point might look something like: ``\"my_cat = my_package:top_level\"``. You could achieve the same\nwith multiple YAML files.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/deployments.rst",
    "content": "Deployment Scenarios\n--------------------\n\nIn the following sections, we will describe some of the ways in which Intake is used in real\nproduction systems. These go well beyond the typical YAML files presented in the quickstart\nand examples sections, which are necessarily short and simple, and do not demonstrate the\nfull power of Intake.\n\nSharing YAML files\n~~~~~~~~~~~~~~~~~~\n\nThis is the simplest scenario, and amply described in these documents. The primary\nadvantage is simplicity: it is enough to put a file in an accessible place (even\na gist or repo), in order\nfor someone else to be able to discover and load that data. Furthermore, such\nfiles can easily refer to one-another, to build up a full tree of data assets with\nminimum pain Since YAML files are\ntext, this also lends itself to working well with version control systems.\nFurthermore, all sources can describe themselves as YAML, and the\n``export`` and ``upload`` commands can produce an efficient format (possibly remote) together\nwith YAML definition in a single step.\n\nPangeo\n~~~~~~\n\nThe `Pangeo`_ collaboration uses Intake to catalog their data holdings, which are generally\nin various forms of netCDF-compliant formats, massive multi-dimensional arrays with data\nrelating to earth and climate science and meteorology. On their cloud-based platform,\ncontainers start up jupyter-lab sessions which have Intake installed, and therefore can\nsimply pick and load the data that each researcher needs - often requiring large Dask\nclusters to actually do the processing.\n\nA `static <https://pangeo-data.github.io/pangeo-datastore/>`__ rendering of the catalog\ncontents is available, so that users can browse the holdings\nwithout even starting a python session. This rendering is produced by CI on the\n`repo <https://github.com/pangeo-data/pangeo-datastore>`__ whenever new definitions are\nadded, and it also checks (using Intake) that each definition is indeed loadable.\n\nPangeo also developed intake-stac, which can talk to STAC servers to make real-time\nqueries and parse the results into Intake data sources. This is a standard for\nspaceo-temporal data assets, and indexes massive amounts of cloud-stored data.\n\n.. _Pangeo: http://pangeo.io/\n\nAnaconda Enterprise\n~~~~~~~~~~~~~~~~~~~\n\nIntake will be the basis of the data access and cataloging service within\n`Anaconda Enterprise`_, running as a micro-service in a container, and offering data\nsource definitions to users. The access control, who gets to see which data-set,\nand serving of credentials to be able to read from the various data storage services,\nwill all be handled by the platform and be fully configurable by admins.\n\n.. _Anaconda Enterprise: https://www.anaconda.com/enterprise/\n\nNational Center for Atmospheric Research\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nNCAR has developed `intake-esm`_, a mechanism for creating file-based Intake catalogs\nfor climate data from project efforts such as the `Coupled Model Intercomparison Project (CMIP)`_\nand the `Community Earth System Model (CESM) Large Ensemble Project`_.\nThese projects produce a huge of amount climate data persisted on tape, disk storage components\nacross multiple (of the order ~300,000) netCDF files. Finding, investigating, loading these files into data array containers\nsuch as `xarray` can be a daunting task due to the large number of files a user may be interested in.\n``Intake-esm`` addresses this issue in three steps:\n\n- `Datasets Catalog Curation`_ in form of YAML files. These YAML files provide information about data locations,\n  access pattern,  directory structure, etc. ``intake-esm`` uses these YAML files in conjunction with file name templates\n  to construct a local database. Each row in this database consists of a set of metadata such as ``experiment``,\n  ``modeling realm``, ``frequency`` corresponding to data contained in one netCDF file.\n\n.. code-block:: python\n\n   cat = intake.open_esm_metadatastore(catalog_input_definition=\"GLADE-CMIP5\")\n\n\n- Search and Discovery: once the database is built, ``intake-esm`` can be used for searching and discovering\n  of climate datasets by eliminating the need for the user to know specific locations (file path) of\n  their data set of interest:\n\n.. code-block:: python\n\n   sub_cat = cat.search(variable=['hfls'], frequency='mon', modeling_realm='atmos', institute=['CCCma', 'CNRM-CERFACS'])\n\n- Access: when the user is satisfied with the results of their query, they can ask ``intake-esm``\n  to load the actual netCDF files into xarray datasets:\n\n.. code-block:: python\n\n   dsets = cat.to_xarray(decode_times=True, chunks={'time': 50})\n\n.. _intake-esm: https://github.com/NCAR/intake-esm\n.. _Datasets Catalog Curation: https://github.com/NCAR/intake-esm-datastore\n.. _Coupled Model Intercomparison Project (CMIP): https://www.wcrp-climate.org/wgcm-cmip\n.. _Community Earth System Model (CESM) Large Ensemble Project: http://www.cesm.ucar.edu/projects/community-projects/LENS/\n\nBrookhaven Archive\n~~~~~~~~~~~~~~~~~~\n\nThe `Bluesky`_ project uses Intake to dynamically query a MongoDB instance, which\nholds the details of experimental and simulation data catalogs, to return a\ncustom Catalog for every query. Data-sets can then be loaded into python, or the original\nraw data can be accessed ...\n\n.. _Bluesky: https://github.com/bluesky/intake-bluesky\n\nZillow\n~~~~~~\n\nZillow is developing Intake to meet the needs of their datalake access layer (DAL),\nto encapsulate the highly hierarchical nature of their data. Of particular importance,\nis the ability to provide different version (testing/production, and different\nstorage formats) of the same logical dataset, depending on\nwhether it is being read on a laptop versus the production infrastructure ...\n\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/examples.rst",
    "content": "Examples\n========\n\nHere we list links to notebooks and other code demonstrating the use of Intake in various\nscenarios. The first section is of general interest to various users, and the sections that\nfollow tend to be more specific about particular features and workflows.\n\nMany of the entries here include a link to Binder, which a service that lest you execute\ncode live in a notebook environment. This is a great way to experience using Intake.\nIt can take a while, sometimes, for Binder to come up; please have patience.\n\nSee also the `examples`_ repository, containing data-sets which can be built and installed\nas conda packages.\n\n.. _examples: https://github.com/intake/intake-examples/\n\nGeneral\n-------\n\n- Basic Data scientist workflow: using Intake\n  [`Static <https://github.com/intake/intake-examples/blob/master/tutorial/data_scientist.ipynb>`__]\n  [`Executable <https://mybinder.org/v2/gh/intake/intake-examples/master?filepath=tutorial%2Fdata_scientist.ipynb>`__].\n\n- Workflow for creating catalogs: a Data Engineer's approach to Intake\n  [`Static <https://github.com/intake/intake-examples/blob/master/tutorial/data_engineer.ipynb>`__]\n  [`Executable <https://mybinder.org/v2/gh/intake/intake-examples/master?filepath=tutorial%2Fdata_engineer.ipynb>`__]\n\nDeveloper\n---------\n\nTutorials delving deeper into the Internals of Intake, for those who wish to contribute\n\n- How you would go about writing a new plugin\n  [`Static <https://github.com/intake/intake-examples/blob/master/tutorial/dev.ipynb>`__]\n  [`Executable <https://mybinder.org/v2/gh/intake/intake-examples/master?filepath=tutorial%2Fdev.ipynb>`__]\n\nFeatures\n--------\n\nMore specific examples of Intake functionality\n\n- Caching:\n\n    - New-style data package creation [`Static <https://github.com/intake/intake-examples/tree/master/data_package>`__]\n\n    - Using automatically cached data-files\n      [`Static <https://github.com/mmccarty/intake-blog/blob/master/examples/caching.ipynb>`__]\n      [`Executable <https://mybinder.org/v2/gh/mmccarty/intake-blog/master?filepath=examples%2Fcaching.ipynb>`__]\n\n    - Earth science demonstration of cached dataset\n      [`Static <https://github.com/mmccarty/intake-blog/blob/master/examples/Walker_Lake.ipynb>`__]\n      [`Executable <https://mybinder.org/v2/gh/mmccarty/intake-blog/master?filepath=examples%2FWalker_Lake.ipynb>`__]\n\n- File-name pattern parsing:\n\n    - Satellite imagery, science workflow\n      [`Static <https://github.com/jsignell/intake-blog/blob/master/path-as-pattern/landsat.ipynb>`__]\n      [`Executable <https://mybinder.org/v2/gh/jsignell/intake-blog/master?filepath=path-as-pattern%2Flandsat.ipynb>`__]\n\n    - How to set up pattern parsing\n      [`Static <https://github.com/jsignell/intake-blog/blob/master/path-as-pattern/csv.ipynb>`__]\n      [`Executable <https://mybinder.org/v2/gh/jsignell/intake-blog/master?filepath=path-as-pattern%2Fcsv.ipynb>`__]\n\n- Custom catalogs:\n\n    - A custom intake plugin that adapts DCAT catalogs\n      [`Static <https://github.com/CityOfLosAngeles/intake-dcat/blob/master/examples/demo.ipynb>`__]\n      [`Executable <https://mybinder.org/v2/gh/CityOfLosAngeles/intake-dcat/master?urlpath=lab%2Ftree%2Fexamples%2Fdemo.ipynb>`__]\n\n\nData\n----\n\n- `Anaconda package data`_, originally announced in `this blog`_\n- `Planet Four Catalog`_, originally from https://www.planetfour.org/results\n- The official Intake `examples`_\n\n.. _Anaconda package data: https://github.com/ContinuumIO/anaconda-package-data\n.. _this blog: https://www.anaconda.com/announcing-public-anaconda-package-download-data/\n.. _Planet Four Catalog: https://github.com/michaelaye/p4catalog\n\nBlogs\n-----\n\nThese are Intake-related articles that may be of interest.\n\n- `Discovering and Exploring Data in a Graphical Interface`_\n- `Taking the Pain out of Data Access`_\n- `Caching Data on First Read Makes Future Analysis Faster`_\n- `Parsing Data from Filenames and Paths`_\n- `Intake for cataloguing Spark`_\n- `Intake released on Conda-Forge`_\n\n.. _Discovering and Exploring Data in a Graphical Interface: https://www.anaconda.com/intake-discovering-and-exploring-data-in-a-graphical-interface/\n.. _Intake for cataloguing Spark: https://www.anaconda.com/intake-for-cataloging-spark/\n.. _Taking the Pain out of Data Access: https://www.anaconda.com/intake-taking-the-pain-out-of-data-access/\n.. _Caching Data on First Read Makes Future Analysis Faster: https://www.anaconda.com/intake-caching-data-on-first-read-makes-future-analysis-faster/\n.. _Parsing Data from Filenames and Paths: https://www.anaconda.com/intake-parsing-data-from-filenames-and-paths/\n.. _Intake released on Conda-Forge: https://www.anaconda.com/intake-released-on-conda-forge/\n\nTalks\n-----\n\n- `__init__ podcast interview (May 2019)`_\n- `AnacondaCon (March 2019)`_\n- `PyData DC (November 2018)`_\n- `PyData NYC (October 2018)`_\n- `ESIP tech dive (November 2018)`_\n\n.. _\\__init__ podcast interview (May 2019): https://www.pythonpodcast.com/intake-data-catalog-episode-213/\n.. _ESIP tech dive (November 2018): https://www.youtube.com/watch?v=PSD7r3JFml0&feature=youtu.be\n.. _PyData DC (November 2018): https://www.youtube.com/watch?v=OvZFtePHKXw\n.. _PyData NYC (October 2018): https://www.youtube.com/watch?v=pjkMmJQfTb8\n.. _AnacondaCon (March 2019): https://www.youtube.com/watch?v=oyZJrROQzUs\n\nNews\n----\n\n- See out `Wiki`_ page\n\n.. _Wiki: https://github.com/intake/intake/wiki/Community-News\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/glossary.rst",
    "content": "Glossary\n========\n\n.. glossary::\n\n    Argument\n        One of a set of values passed to a function or class. In the Intake sense, this usually is the\n        set of key-value pairs defined in the \"args\" section of a source definition; unless the user\n        overrides, these will be used for instantiating the source.\n\n    Cache\n        Local copies of remote files. Intake allows for download-on-first-use for data-sources,\n        so that subsequent access is much faster. The\n        format of the files is unchanged in this case, but may be decompressed.\n\n    Catalog\n        An inventory of entries, each of which corresponds to a specific :term:`Data-set`. Within these docs, a catalog is\n        most commonly defined in a :term:`YAML` file, for simplicity, but there are other possibilities, such as connecting to an Intake\n        server or another third-party data service, like a SQL database. Thus, catalogs form a hierarchy: any\n        catalog can contain other, nested catalogs.\n\n    Catalog file\n        A :term:`YAML` specification file which contains a list of named entries describing how to load data\n        sources. :doc:`catalog`.\n\n    Conda\n        A package and environment management package for the python ecosystem, see the `conda website`_. Conda ensures\n        dependencies and correct versions are installed for you, provides precompiled, binary-compatible software,\n        and extends to many languages beyond python, such as R, javascript and C.\n\n    Conda package\n        A single installable item which the :term:`Conda` application can install. A package may include\n        a :term:`Catalog`, data-files and maybe some additional code. It will also include a specification of the\n        dependencies that it requires (e.g., Intake and any additional :term:`Driver`), so that Conda can install those\n        automatically. Packages can be created locally, or can be found on `anaconda.org`_ or other package\n        repositories.\n\n    Container\n        One of the supported data formats. Each :term:`Driver` outputs its data in one of these. The\n        containers correspond to familiar data structures for end-analysis, such as list-of-dicts, Numpy nd-array or\n        Pandas data-frame.\n\n    Data-set\n        A specific assemblage of data. The type of data (tabular, multi-dimensional or something else) and the format\n        (file type, data service type) are all attributes of the data-set. In addition, in the context of Intake,\n        data-sets are usually entries within a :term:`Catalog` with additional descriptive text and metadata and\n        a specification of *how* to load the data.\n\n    Data Source\n        An Intake specification for a specific :term:`Data-set`. In most cases, the two terms are\n        synonymous.\n\n    Data User\n        A person who uses data to produce models and other inferences/conclusions. This\n        person generally uses standard python analysis packages like Numpy, Pandas, SKLearn and may produce\n        graphical output. They will want to be able to find the right data for a given job, and for\n        the data to be available in a standard format as quickly and\n        easily as possible. In many organisations, the appropriate job title may be Data Scientist, but\n        research scientists and BI/analysts also fit this description.\n\n    Data packages\n        Data packages are standard conda packages that install an Intake catalog file into the user’s conda\n        environment ($CONDA_PREFIX/share/intake). A data package does not necessarily imply there are data files\n        inside the package. A data package could describe remote data sources (such as files in S3) and take up\n        very little space on disk.\n\n    Data Provider\n        A person whose main objective is to curate data sources, get them into appropriate\n        formats, describe the contents, and disseminate the data to those that need to use them. Such a person\n        may care about the specifics of the storage format and backing store, the right number of fields\n        to keep and removing bad data. They may have a good idea of the best way to visualise any give\n        data-set. In an organisation, this job may be known as Data Engineer, but it could as easily be\n        done by a member of the IT team. These people are the most likely to author :term:`Catalogs<Catalog>`.\n\n    Developer\n        A person who writes or fixes code. In the context of Intake, a developer may make new format\n        :term:`Drivers<Driver>`, create authentication systems or add functionality to Intake itself. They can\n        take existing code for loading data in other projects, and use Intake to add extra functionality to it,\n        for instance, remote data access, parallel processing, or file-name parsing.\n\n    Driver\n        The thing that does the work of reading the data for a catalog entry is known as a driver, often referred\n        to using a simple name such as \"csv\". Intake\n        has a plugin architecture, and new drivers can be created or installed, and specific catalogs/data-sets may\n        require particular drivers for their contained data-sets. If installed as :term:`Conda` packages, then\n        these requirements will be automatically installed for you. The driver's output will be a :term:`Container`,\n        and often the code is a simpler layer over existing functionality in a third-party package.\n\n    GUI\n        A Graphical User Interface. Intake comes with a GUI for finding and selecting data-sets, see :doc:`gui`.\n\n    IT\n        The Information Technology team for an organisation. Such a team may have\n        control of the computing infrastructure and security (sys-ops), and may well act as gate-keepers when\n        exposing data for use by other colleagues. Commonly, IT has stronger policy enforcement requirements\n        that other groups, for instance requiring all data-set copy actions to be logged centrally.\n\n    Persist\n        A process of making a local version of a data-source. One canonical format is used for each\n        of the container types, optimised for quick and parallel access. This is particularly useful\n        if the data takes a long time to acquire, perhaps because it is the result of a complex\n        query on a remote service. The resultant output can be set to expire and be automatically\n        refreshed, see :doc:`persisting`. Not to be confused with the :term:`cache`.\n\n    Plugin\n        Modular extra functionality for Intake, provided by a package that is installed separately. The most common type of\n        plugin will be for a :term:`Driver` to load some particular data format; but other parts of Intake are\n        pluggable, such as authentication mechanisms for the server.\n\n    Server\n        A remote source for Intake catalogs. The server will\n        provide data source specifications (i.e., a remote :term:`Catalog`), and may also provide the raw data, in situations\n        where the client is not able or not allowed to access it directly. As such, the server can act as a gatekeeper of\n        the data for security and monitoring purposes. The implementation of the server in Intake is accessible as the\n        ``intake-server`` command, and acts as a reference: other implementations can easily be created for\n        specific circumstances.\n\n    TTL\n        Time-to-live, how long before the given entity is considered to have expired. Usually in seconds.\n\n    User Parameter\n        A data source definition can contain a \"parameters\" section, which can act as explicit decision indicators\n        for the user, or as validation and type coersion for the definition's :term:`Argument` s. See\n        :ref:`paramdefs`.\n\n    YAML\n        A text-based format for expressing data with a dictionary (key-value) and list structure, with a limited\n        number of data-types, such as strings and numbers. YAML uses indentations to nest objects, making it easy\n        to read and write for humans, compared to JSON. Intake's catalogs and config are usually expressed in YAML\n        files.\n\n\n.. _conda website: https://conda.io/docs/\n.. _anaconda.org: http://anaconda.org\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/gui.rst",
    "content": "GUI\n===\n\nUsing the GUI\n-------------\n\n**Note**: the GUI requires ``panel`` and ``bokeh`` to\nbe available in the current environment.\n\nThe Intake top-level singleton ``intake.gui`` gives access to a graphical data browser\nwithin the Jupyter notebook. To expose it, simply enter it into a code cell (Jupyter\nautomatically display the last object in a code cell).\n\n.. image:: _static/images/gui_builtin.png\n\nNew instances of the GUI are also available by instantiating ``intake.interface.gui.GUI``,\nwhere you can specify a list of catalogs to initially include.\n\nThe GUI contains three main areas:\n\n- a **list of catalogs**. The \"builtin\" catalog, displayed by default, includes data-sets installed\n  in the system, the same as ``intake.cat``.\n\n- a **list of sources** within the currently selected catalog.\n\n- a **description** of the currently selected source.\n\n\nCatalogs\n--------\nSelecting a catalog from the list will display nested catalogs below the parent and display\nsource entries from the catalog in the **list of sources**.\n\nBelow the **lists of catalogs** is a row of buttons that are used for adding, removing and\nsearching-within catalogs:\n\n-  **Add**: opens a sub-panel for adding catalogs to the interface, by either browsing for a local\n   YAML file or by entering a URL for a catalog, which can be a remote file or Intake server\n\n-  **Remove**: deletes the currently selected catalog from the list\n\n-  **Search**: opens a sub-panel for finding entries in the currently selected catalog (and its\n   sub-catalogs)\n\nAdd Catalogs\n~~~~~~~~~~~~\n\nThe Add button (+) exposes a sub-panel with two main ways to add catalogs to the interface:\n\n.. image:: _static/images/gui_add.png\n\nThis panel has a tab to load files from **local**; from that you can navigate around the filesystem\nusing the arrow or by editing the path directly. Use the home button to get back to the starting\nplace. Select the catalog file you need. Use the \"Add Catalog\" button to add the catalog to the list\nabove.\n\n.. image:: _static/images/gui_add_local.png\n\nAnother tab loads a catalog from **remote**. Any URL is valid here, including cloud locations,\n``\"gcs://bucket/...\"``, and intake servers, ``\"intake://server:port\"``. Without a protocol\nspecifier, this can be a local path. Again, use the \"Add Catalog\" button to add\nthe catalog to the list above.\n\n.. image:: _static/images/gui_add_remote.png\n\nFinally, you can add catalogs to the interface in code, using the ``.add()`` method,\nwhich can take filenames, remote URLs or existing ``Catalog`` instances.\n\nRemove Catalogs\n~~~~~~~~~~~~~~~\n\nThe Remove button (-) deletes the currently selected catalog from the list. It is important to\nnote that this action does not have any impact on files, it only affects what shows up in the list.\n\n.. image:: _static/images/gui_remove.png\n\nSearch\n~~~~~~\n\nThe sub-panel opened by the Search button (🔍) allows the user to search within the selected catalog\n\n.. image:: _static/images/gui_search.png\n\nFrom the Search sub-panel the user enters for free-form text. Since some catalogs contain nested sub-catalogs,\nthe Depth selector allows the search to be limited to the stated number of nesting levels.\nThis may be necessary, since, in theory, catalogs can contain circular references,\nand therefore allow for infinite recursion.\n\n.. image:: _static/images/gui_search_inputs.png\n\nUpon execution of the search, the currently selected catalog will be searched. Entries will\nbe considered to match if any of the entered words is found in the description of the entry (this\nis case-insensitive). If any matches are found, a new entry will be made in the catalog list,\nwith the suffix \"_search\".\n\n.. image:: _static/images/gui_search_cat.png\n\nSources\n-------\nSelecting a source from the list updates the description text on the left-side of the gui.\n\nBelow the **list of sources** is a row of buttons for inspecting the selected data source:\n\n-  **Plot**: opens a sub-panel for viewing the pre-defined (specified in the yaml) plots\n   for the selected source.\n\nPlot\n~~~~\n\nThe Plot button (📊) opens a sub-panel with an area for viewing pre-defined plots.\n\n.. image:: _static/images/gui_plot.png\n\nThese plots are specified in the catalog yaml and that yaml can be displayed by\nchecking the box next to \"show yaml\".\n\n.. image:: _static/images/gui_plot_yaml.png\n\nThe holoviews object can be retrieved from the gui using ``intake.interface.source.plot.pane.object``,\nand you can then use it in Python or export it to a file.\n\nInteractive Visualization\n'''''''''''''''''''''''''\n\nIf you have installed the optional extra packages `dfviz`_ and `xrviz`_, you can\ninteractively plot your dataframe or array data, respectively.\n\n.. image:: _static/images/custom_button.png\n\n.. _dfviz: https://dfviz.readthedocs.io/\n.. _xrviz: https://xrviz.readthedocs.io/\n\nThe button \"customize\" will be available for data sources of the appropriate type.\nClick this to open the interactive interface. If you have not selected a predefined\nplot (or there are none), then the interface will start without any prefilled\nvalues, but if you do first select a plot, then the interface will have its options\npre-filled from the options\n\nFor specific instructions on how to use the interfaces (which can also be used\nindependently of the Intake GUI), please navigate to the linked documentation.\n\nNote that the final parameters that are sent to ``hvPlot`` to produce the output\neach time a plot if updated, are explicitly available in YAML format, so that\nyou can save the state as a \"predefined plot\" in the catalog. The same set of\nparameters can also be used in code, with ``datasource.plot(...)``.\n\n.. image:: _static/images/YAMLtab.png\n\nUsing the Selection\n-------------------\n\nOnce catalogs are loaded and the desired sources has been identified and selected,\nthe selected sources will be available at the ``.sources`` attribute (``intake.gui.sources``).\nEach source entry has informational methods available and can be opened as a data source,\nas with any catalog entry:\n\n.. code-block:: python\n\n   In [ ]: source_entry = intake.gui.sources[0]\n           source_entry\n   Out   :\n   name: sea_ice_origin\n   container: dataframe\n   plugin: ['csv']\n   description: Arctic/Antarctic Sea Ice\n   direct_access: forbid\n   user_parameters: []\n   metadata:\n   args:\n     urlpath: https://timeseries.weebly.com/uploads/2/1/0/8/21086414/sea_ice.csv\n\n   In [ ]: data_source = source_entry()  # may specify parameters here\n           data_source.read()\n   Out   : < some data >\n\n   In [ ]: source_entry.plot()  # or skip data source step\n   Out   : < graphics>\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/guide.rst",
    "content": "User Guide\n----------\n\nMore detailed information about specific parts of Intake, such as how to author catalogs,\nhow to use the graphical interface, plotting, etc.\n\n.. toctree::\n    :maxdepth: 1\n\n    gui.rst\n    catalog.rst\n    tools.rst\n    persisting.rst\n    plotting.rst\n    plugin-directory.rst\n    transforms.rst\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/index.rst",
    "content": ".. raw:: html\n\n   <img src=\"_static/images/logo.png\" alt=\"Intake Logo\" style=\"float:right;width:94px;height:60px;\">\n\n.. _take2:\n\nIntake Take2\n============\n\n*Taking the pain out of data access and distribution*\n\nIntake is an open-source package to:\n\n- describe your data declaratively\n- gather data sets into catalogs\n- search catalogs and services to find the right data you need\n- load, transform and output data in many formats\n- work with third party remote storage and compute platforms\n\nThis is the start of the documentation for the alpha version of Intake: Take2, a\nrewrite of Intake (henceforth referred to as legacy or V1). We will give an\nintroduction to the ideas of Intake in general and specifically how to use this\nrelease. Go directly to the walkthrough and examples, or read the following motivation\nand declarations of scope.\n\n.. note::\n\n    We are making Take2 as a full release. It is still \"beta\" in the sense that we will be adding\n    many data types, readers and transformers, and are prepared to revisit the API in general. The\n    reason not to use a pre-release or RC, is that users never see these.\n\n\n.. warning::\n\n    Looking for :ref:`v1` documentation? You may have just installed Intake and found that\n    Take2 broke things for you, so you might wish to pin to an older version. Or stick around\n    and find out why you might wish to update your code. All old \"sources\", whether still working\n    or not, should be considered deprecated.\n\n\n.. toctree::\n    :maxdepth: 2\n\n    scope2.rst\n    user2.rst\n    walkthrough2.rst\n    tour2.rst\n    api2.rst\n\n    code-of-conduct.rst\n    contributing.rst\n\n    index_v1.rst\n\nInstall\n-------\n\nTo install Intake Take2:\n\n.. code-block:: bash\n\n    pip install -c conda-forge intake\n    or\n    pip install intake\n\nPlease leave issues and discussions on our `repo page`_.\n\n.. _repo page: https://github.com/intake/intake\n\nIndices and tables\n==================\n\n* :ref:`genindex`\n* :ref:`modindex`\n* :ref:`search`\n\nThese docs pages collect anonymous tracking data using goatcounter, and the\ndashboard is available to the public: https://intake.goatcounter.com/ .\n\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/index_v1.rst",
    "content": ".. raw:: html\n\n   <img src=\"_static/images/logo.png\" alt=\"Intake Logo\" style=\"float:right;width:94px;height:60px;\">\n\n.. _v1:\n\nIntake Legacy\n=============\n\n*Taking the pain out of data access and distribution*\n\n\nIntake is a lightweight package for finding, investigating, loading and disseminating data. It will appeal to different\ngroups for some of the reasons below, but is useful for all and acts as a common platform that everyone can use to\nsmooth the progression of data from developers and providers to users.\n\n.. warning::\n\n    This is the Legacy documentation for Intake pre-v2. To install, please pin your versions to\n    \"<2\". You should expect old catalogs and sources to continue working, but a lot has changed,\n    so we encourage all comers to read the new documentation and adapt their catalogs and code\n    if possible.\n\n    Looking for :ref:`take2` ?\n\nIntake contains the following main components. You *do not* need to use them all! The\nlibrary is modular, only use the parts you need:\n\n* A set of **data loaders** (:term:`Drivers<Driver>`) with a common interface, so that you can\n  investigate or load anything, from local or remote, with the exact same call, and turning into data structures\n  that you already know how to manipulate, such as arrays and data-frames.\n* A **Cataloging system** (:term:`Catalogs<Catalog>`) for listing data sources, their metadata and parameters,\n  and referencing which of the Drivers should load each. The catalogs for a hierarchical,\n  searchable structure, which can be backed by files, Intake servers or third-party\n  data services\n* Sets of **convenience functions** to apply to various data sources, such as data-set\n  persistence, automatic concatenation and metadata inference and the ability to\n  distribute catalogs and data sources using simple packaging abstractions.\n* A **GUI layer** accessible in the Jupyter notebook or as a standalone webserver, which\n  allows you to find and navigate catalogs, investigate data sources, and plot either\n  predefined visualisations or interactively find the right view yourself\n* A **client-server protocol** to allow for arbitrary data cataloging services or to\n  serve the data itself, with a pluggable auth model.\n\n\n:term:`Data User`\n-----------------\n\n.. raw:: html\n\n   <img src=\"_static/images/line.png\" alt=\"Line graph\" style=\"float:left;width:160px;height:120px;padding-right:25px\">\n\n* Intake loads the data for a range of formats and types (see :ref:`plugin-directory`) into containers you already use,\n  like Pandas dataframes, Python lists, NumPy arrays, and more\n* Intake loads, then gets out of your way\n* GUI search and introspect data-sets in :term:`Catalogs<Catalog>`: quickly find what you need to do your work\n* Install data-sets and automatically get requirements\n* Leverage cloud resources and distributed computing.\n\nSee the executable tutorial:\n\n.. image:: https://mybinder.org/badge_logo.svg\n   :target: https://mybinder.org/v2/gh/intake/intake-examples/master?filepath=tutorial%2Fdata_scientist.ipynb\n\n:term:`Data Provider`\n---------------------\n\n.. raw:: html\n\n   <img src=\"_static/images/grid.png\" alt=\"Grid\" style=\"float:right;width:160px;height:120px;\">\n\n* Simple spec to define data sources\n* Single point of truth, no more copy&paste\n* Distribute data using packages, shared files or a server\n* Update definitions in-place\n* Parametrise user options\n* Make use of additional functionality like filename parsing and caching.\n\nSee the executable tutorial:\n\n.. image:: https://mybinder.org/badge_logo.svg\n   :target: https://mybinder.org/v2/gh/intake/intake-examples/master?filepath=tutorial%2Fdata_engineer.ipynb\n\n:term:`IT`\n----------\n\n.. raw:: html\n\n   <img src=\"_static/images/terminal.png\" alt=\"FA-terminal\" style=\"float:right;width:80px;height:80px\">\n\n* Create catalogs out of established departmental practices\n* Provide data access credentials via Intake parameters\n* Use server-client architecture as gatekeeper:\n\n   * add authentication methods\n   * add monitoring point; track the data-sets being accessed.\n\n* Hook Intake into proprietary data access systems.\n\n:term:`Developer`\n-----------------\n\n.. raw:: html\n\n   <img src=\"_static/images/code.png\" alt=\"Python code\" style=\"float:left;width:200px;height:90px;padding-right:25px\">\n\n* Turn boilerplate code into a reusable :term:`Driver`\n* Pluggable architecture of Intake allows for many points to add and improve\n* Open, simple code-base -- come and get involved on `github`_!\n\n.. _github: https://github.com/intake/intake\n\n\nSee the executable tutorial:\n\n.. image:: https://mybinder.org/badge_logo.svg\n   :target: https://mybinder.org/v2/gh/intake/intake-examples/master?filepath=tutorial%2Fdev.ipynb\n\nFirst steps\n===========\n\nThe :doc:`start` document contains the sections that all users new to Intake should\nread through. :ref:`usecases` shows specific problems that Intake solves.\nFor a brief demonstration, which you can execute locally, go to :doc:`quickstart`.\nFor a general description of all of the components of Intake and how they fit together, go\nto :doc:`overview`. Finally, for some notebooks using Intake and articles about Intake, go\nto :doc:`examples` and `intake-examples`_.\nThese and other documentation pages will make reference to concepts that\nare defined in the :doc:`glossary`.\n\n.. _intake-examples: https://github.com/intake/intake-examples\n\n|\n\n|\n\n.. toctree::\n    :maxdepth: 1\n    :hidden:\n\n    start.rst\n    guide.rst\n    reference.rst\n    roadmap.rst\n    glossary.rst\n    community.rst\n\n\nIndices and tables\n==================\n\n* :ref:`genindex`\n* :ref:`modindex`\n* :ref:`search`\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/making-plugins.rst",
    "content": "Making Drivers\n==============\n\nThe goal of the Intake plugin system is to make it very simple to implement a :term:`Driver` for a new data source, without\nany special knowledge of Dask or the Intake catalog system.\n\nAssumptions\n-----------\n\nAlthough Intake is very flexible about data, there are some basic assumptions that a driver must satisfy.\n\nData Model\n''''''''''\n\nIntake currently supports 3 kinds of containers, represented the most common data models used in Python:\n\n* dataframe\n* ndarray\n* python (list of Python objects, usually dictionaries)\n\nAlthough a driver can load *any* type of data into any container, and new container types can be added to the list\nabove, it is reasonable to expect that the number of container types remains small. Declaring a container type is\nonly informational for the user when read locally, but streaming of data from a server requires that the container type\nbe known to both server and client.\n\nA given driver must only return one kind of container.  If a file format (such as HDF5) could reasonably be\ninterpreted as two different data models depending on usage (such as a dataframe or an ndarray), then two different\ndrivers need to be created with different names.  If a driver returns the ``python`` container, it should document\nwhat Python objects will appear in the list.\n\nThe source of data should be essentially permanent and immutable.  That is, loading the data should not destroy or\nmodify the data, nor should closing the data source destroy the data either.  When a data source is serialized and\nsent to another host, it will need to be reopened at the destination, which may cause queries to be re-executed and\nfiles to be reopened.  Data sources that treat readers as \"consumers\" and remove data once read will cause erratic\nbehavior, so Intake is not suitable for accessing things like FIFO message queues.\n\nSchema\n''''''\n\nThe schema of a data source is a detailed description of the data, which can be known by loading only metadata or by\nloading only some small representative portion of the data. It is information to present to the user about the data\nthat they are considering loading, and may be important in the case of server-client communication. In the latter\ncontext, the contents of the schema must be serializable by ``msgpack`` (i.e., numbers, strings, lists and\ndictionaries only).\n\nThere may be unknown parts of\nthe schema before the whole data is read.  drivers may require this unknown information in the\n`__init__()` method (or the catalog spec), or do some kind of partial data inspection to determine the schema; or\nmore simply, may be given as unknown ``None`` values.\nRegardless of method used, the\ntime spent figuring out the schema ahead of time should be short and not scale with the size of the data.\n\nTypical fields in a schema dictionary are ``npartitions``, ``dtype``, ``shape``, etc., which will be more appropriate\nfor some drivers/data-types than others.\n\nPartitioning\n''''''''''''\n\nData sources are assumed to be *partitionable*.  A data partition is a randomly accessible fragment of the data.\nIn the case of sequential and data-frame sources, partitions are numbered, starting from zero, and correspond to\ncontiguous chunks of data divided along the first\ndimension of the data structure. In general, any partitioning scheme is conceivable, such as a tuple-of-ints to\nindex the chunks of a large numerical array.\n\nNot all data sources can be partitioned.  For example, file\nformats without sufficient indexing often can only be read from beginning to end.  In these cases, the DataSource\nobject should report that there is only 1 partition.  However, it often makes sense for a data source to be able to\nrepresent a directory of files, in which case each file will correspond to one partition.\n\nMetadata\n''''''''\n\nOnce opened, a DataSource object can have arbitrary metadata associated with it.  The metadata for a data source\nshould be a dictionary that can be serialized as JSON.  This metadata comes from the following sources:\n\n1. A data catalog entry can associate fixed metadata with the data source.  This is helpful for data formats that do\n   not have any support for metadata within the file format.\n\n2. The driver handling the data source may have some general metadata associated with the state of the system at the\n   time of access, available even before loading any data-specific information.\n\n2. A driver can add additional metadata when the schema is loaded for the data source.  This allows metadata embedded\n   in the data source to be exported.\n\nFrom the user perspective, all of the metadata should be loaded once the data source has loaded the rest of the\nschema (after ``discover()``, ``read()``, ``to_dask()``, etc have been called).\n\n\nSubclassing ``intake.source.base.DataSourceBase``\n-------------------------------------------------\n\nEvery Intake driver class should be a subclass of ``intake.source.base.DataSource``.\nThe class should have the following attributes to identify itself:\n\n- ``name``: The short name of the driver.  This should be a valid python identifier.\n  You should not include the\n  word ``intake`` in the driver name.\n\n- ``version``: A version string for the driver.  This may be reported to the user by tools\n  based on Intake, but has\n  no semantic importance.\n\n- ``container``: The container type of data sources created by this object, e.g.,\n  ``dataframe``, ``ndarray``, or\n  ``python``, one of the keys of ``intake.container.container_map``.\n  For simplicity, a driver many only return one typed of container.  If a particular\n  source of data could\n  be used in multiple ways (such as HDF5 files interpreted as dataframes or as ndarrays),\n  two drivers must be created.\n  These two drivers can be part of the same Python package.\n\n- ``partition_access``: Do the data sources returned by this driver have multiple\n  partitions?  This may help tools in\n  the future make more optimal decisions about how to present data.  If in doubt\n  (or the answer depends on init\n  arguments), ``True`` will always result in correct behavior, even if the data\n  source has only one partition.\n\nThe ``__init()__`` method should always accept a keyword argument ``metadata``, a\ndictionary of metadata from the\ncatalog to associate with the source.  This dictionary must be serializable as JSON.\n\nThe `DataSourceBase` class has a small number of methods which should be overridden.\nHere is an example producing a\ndata-frame::\n\n    class FooSource(intake.source.base.DataSource):\n        container = 'dataframe'\n        name = 'foo'\n        version = '0.0.1'\n        partition_access = True\n\n        def __init__(self, a, b, metadata=None):\n            # Do init here with a and b\n            super(FooSource, self).__init__(\n                metadata=metadata\n            )\n\n        def _get_schema(self):\n            return intake.source.base.Schema(\n                datashape=None,\n                dtype={'x': \"int64\", 'y': \"int64\"},\n                shape=(None, 2),\n                npartitions=2,\n                extra_metadata=dict(c=3, d=4)\n            )\n\n        def _get_partition(self, i):\n            # Return the appropriate container of data here\n            return pd.DataFrame({'x': [1, 2, 3], 'y': [10, 20, 30]})\n\n        def read(self):\n            self._load_metadata()\n            return pd.concat([self.read_partition(i) for i in range(self.npartitions)])\n\n        def _close(self):\n            # close any files, sockets, etc\n            pass\n\nMost of the work typically happens in the following methods:\n\n- ``__init__()``: Should be very lightweight and fast.  No files or network resources should be opened, and no\n  significant memory should be allocated yet.  Data sources might be serialized immediately.  The default implementation\n  of the pickle protocol in the base class will record all the arguments to ``__init__()`` and recreate the object with\n  those arguments when unpickled, assuming the class has no side effects.\n\n- ``_get_schema()``: May open files and network resources and return as much of the schema as possible in small\n  amount of *approximately* constant  time. Typically, imports of packages needed by the source only happen here.\n  The ``npartitions`` and ``extra_metadata`` attributes must be correct\n  when ``_get_schema`` returns.  Further keys such as ``dtype``, ``shape``, etc., should reflect the container type of\n  the data-source, and can be ``None`` if not easily knowable, or include ``None`` for some elements. File-based\n  sources should use fsspec to open a local or remote URL, and pass ``storage_options`` to it. This ensures\n  compatibility and extra features such as caching. If the backend can only deal with local files, you may\n  still want to use ``fsspec.open_local`` to allow for caching.\n\n- ``_get_partition(self, i)``: Should return all of the data from partition id ``i``, where ``i`` is typically an\n  integer, but may be something more complex.\n  The base class will automatically verify that ``i`` is in the range ``[0, npartitions)``, so no range checking is\n  required in the typical case.\n\n- ``_close(self)``: Close any network or file handles and deallocate any significant memory.  Note that these\n  resources may be need to be reopened/reallocated if a read is called again later.\n\nThe full set of user methods of interest are as follows:\n\n- ``discover(self)``: Read the source attributes, like ``npartitions``, etc.  As with ``_get_schema()`` above, this\n  method is assumed to be fast, and make a best effort to set attributes. The output should be serializable, if the\n  source is to be used on a server; the details contained will be used for creating a remote-source on the client.\n\n- ``read(self)``: Return all the data in memory in one in-memory container.\n\n- ``read_chunked(self)``: Return an iterator that returns contiguous chunks of the data.  The chunking is generally\n  assumed to be at the partition level, but could be finer grained if desired.\n\n- ``read_partition(self, i)``: Returns the data for a given partition id.  It is assumed that reading a given\n  partition does not require reading the data that precedes it.  If ``i`` is out of range, an ``IndexError`` should\n  be raised.\n\n- ``to_dask(self)``: Return a (lazy) Dask data structure corresponding to this data source.  It should be assumed\n  that the data can be read from the Dask workers, so the loads can be done in future tasks.  For further information,\n  see the `Dask documentation <https://dask.pydata.org/en/latest/>`_.\n\n- ``close(self)``: Close network or file handles and deallocate memory.  If other methods are called after ``close()``,\n  the source is automatically reopened.\n\n- ``to_*``: for some sources, it makes sense to provide alternative outputs aside from the base container\n  (dataframe, array, ...) and Dask variants.\n\nNote that all of these methods typically call ``_get_schema``, to make sure that the source has been\ninitialised.\n\nSubclassing ``intake.source.base.DataSource``\n---------------------------------------------\n\n``DataSource`` provides the same functionality as ``DataSourceBase``, but has some additional mixin\nclasses to provide some extras. A developer may choose to derive from ``DataSource`` to get all of\nthese, or from ``DataSourceBase`` and make their own choice of mixins to support.\n\n- ``HoloviewsMixin``: provides plotting and GUI capabilities via the `holoviz`_ stack\n\n- ``PersistMixin``: allows for storing a local copy in a default format for the given\n  container type\n\n- ``CacheMixin``: allows for local storage of data files for a source. Deprecated,\n  you should use one of the caching mechanisms in ``fsspec``.\n\n.. _holoviz: https://holoviz.org/index.html\n\n.. _driver-discovery:\n\nDriver Discovery\n----------------\n\nIntake discovers available drivers in three different ways, described below.\nAfter the discovery phase, Intake will automatically create\n``open_[driver_name]`` convenience functions under the ``intake`` module\nnamespace.  Calling a function like ``open_csv()`` is equivalent to\ninstantiating the corresponding data-source class.\n\nEntrypoints\n'''''''''''\n\nIf you are packaging your driver into an installable package to be shared, you\nshould add the following to the package's ``setup.py``:\n\n.. code-block:: python\n\n   setup(\n       ...\n       entry_points={\n           'intake.drivers': [\n               'some_format_name = some_package.and_maybe_a_submodule:YourDriverClass',\n               ...\n           ]\n       },\n   )\n\n.. important::\n\n   Some critical details of Python's entrypoints feature:\n\n   * Note the unusual syntax of the entrypoints. Each item is given as one long\n     string, with the ``=`` as part of the string. Modules are separated by\n     ``.``, and the final object name is preceded by ``:``.\n   * The right hand side of the equals sign must point to where the object is\n     *actually defined*. If ``YourDriverClass`` is defined in\n     ``foo/bar.py`` and imported into ``foo/__init__.py`` you might expect\n     ``foo:YourDriverClass`` to work, but it does not. You must spell out\n     ``foo.bar:YourDriverClass``.\n\nEntry points are a way for Python packages to advertise objects with some\ncommon interface. When Intake is imported, it discovers all packages installed\nin the current environment that advertise ``'intake.drivers'`` in this way.\n\nMost packages that define intake drivers have a dependency on ``intake``\nitself, for example in order to use intake's base classes. This can create a\nciruclar dependency: importing the package imports intake, which tries\nto discover and import packages that define drivers. To avoid this pitfall,\njust ensure that ``intake`` is imported first thing in your package's\n``__init__.py``. This ensures that the driver-discovery code runs first. Note\nthat you are *not* required to make your package depend on intake. The rule is\nthat *if* you import ``intake`` you must import it first thing. If you do not\nimport intake, there is no circularity.\n\nConfiguration\n'''''''''''''\n\nThe intake configuration file can be used to:\n\n* Specify precedence in the event of name collisions---for example, if two different\n  ``csv`` drivers are installed.\n* Disable a troublesome driver.\n* Manually make intake aware of a driver, which can be useful for\n  experimentation and early development until a ``setup.py`` with an\n  entrypoint is prepared.\n* Assign a driver to a name other than the one assigned by the driver's\n  author.\n\nThe commandline invocation\n\n.. code-block:: bash\n\n   intake drivers enable some_format_name some_package.and_maybe_a_submodule.YourDriverClass\n\nis equivalent to adding this to your intake configuration file:\n\n.. code-block:: yaml\n\n   drivers:\n     some_format_name: some_package.and_maybe_a_submodule.YourDriverClass\n\nYou can also disable a troublesome driver\n\n.. code-block:: bash\n\n   intake drivers disable some_format_name\n\nwhich is equivalent to\n\n.. code-block:: yaml\n\n   drivers:\n     your_format_name: false\n\nDeprecated: Package Scan\n''''''''''''''''''''''''\n\nWhen Intake is imported, it will search the Python module path (by default includes ``site-packages`` and other\ndirectories in your ``$PYTHONPATH``) for packages starting with ``intake\\_`` and discover DataSource subclasses inside\nthose packages to register.  drivers will be registered based on the``name`` attribute of the object.\nBy convention, drivers should have names that are lowercase, valid Python identifiers that do not contain the word\n``intake``.\n\nThis approach is deprecated because it is limiting (requires the package to\nbegin with \"intake\\_\") and because the package scan can be slow. Using\nentrypoints is strongly encouraged. The package scan *may* be disabled by\ndefault in some future release of intake. During the transition period, if a\npackage named ``intake_*`` provides an entrypoint for a given name, that will\ntake precedence over any drivers gleaned from the package scan having that\nname. If intake discovers any names from the package scan for which there are\nno entrypoints, it will issue a ``FutureWarning``.\n\nPython API to Driver Discovery\n''''''''''''''''''''''''''''''\n\n.. autofunction:: intake.source.discovery.drivers.register_driver\n.. autofunction:: intake.source.discovery.drivers.enable\n.. autofunction:: intake.source.discovery.drivers.disable\n\n.. _remote_data:\n\nRemote Data\n-----------\n\nFor drivers loading from files, the author should be aware that it is easy to implement loading\nfrom files stored in remote services. A simplistic case is demonstrated by the included CSV driver,\nwhich simply passes a URL to Dask, which in turn can interpret the URL as a remote data service,\nand use the ``storage_options`` as required (see the Dask documentation on `remote data`_).\n\n.. _remote data: http://dask.pydata.org/en/latest/remote-data-services.html\n\nMore advanced usage, where a Dask loader does not already exist, will likely rely on\n`fsspec.open_files`_ . Use this function to produce lazy ``OpenFile`` object for local\nor remote data, based on a URL, which will have a protocol designation and possibly contain\nglob \"*\" characters. Additional parameters may be passed to ``open_files``, which should,\nby convention, be supplied by a driver argument named ``storage_options`` (a dictionary).\n\n.. _fsspec.open_files: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.open_files\n\nTo use an ``OpenFile`` object, make it concrete by using a context:\n\n\n.. code-block:: python\n\n    # at setup, to discover the number of files/partitions\n    set_of_open_files = fsspec.open_files(urlpath, mode='rb', **storage_options)\n\n    # when actually loading data; here we loop over all files, but maybe we just do one partition\n    for an_open_file in set_of_open_files:\n        # `with` causes the object to become concrete until the end of the block\n        with an_open_file as f:\n            # do things with f, which is a file-like object\n            f.seek(); f.read()\n\nThe ``textfiles`` builtin drivers implements this mechanism, as an example.\n\n\nStructured File Paths\n---------------------\n\nThe CSV driver sets up an example of how to gather data which is encoded in file paths\nlike (``'data_{site}_.csv'``) and return that data in the output.\nOther drivers could also follow the same structure where data is being loaded from a\nset of filenames. Typically this would apply to data-frame output.\nThis is possible as long as the driver has access to each of the file paths at some\npoint in ``_get_schema``. Once the file paths are known, the driver developer can use the helper\nfunctions defined in ``intake.source.utils`` to get the values for each field in the pattern\nfor each file in the list. These values should then be added to the data, a process which\nnormally would happen within the _get_schema method.\n\nThe PatternMixin defines driver properties such as urlpath, path_as_pattern, and pattern.\nThe implementation might look something like this::\n\n    from intake.source.utils import reverse_formats\n\n    class FooSource(intake.source.base.DataSource, intake.source.base.PatternMixin):\n        def __init__(self, a, b, path_as_pattern, urlpath, metadata=None):\n            # Do init here with a and b\n            self.path_as_pattern = path_as_pattern\n            self.urlpath = urlpath\n\n            super(FooSource, self).__init__(\n                container='dataframe',\n                metadata=metadata\n            )\n        def _get_schema(self):\n            # read in the data\n            values_by_field = reverse_formats(self.pattern, file_paths)\n            # add these fields and map values to the data\n            return data\n\n\nSince dask already has a specific method for including the file paths in the output dataframe,\nin the CSV driver we set ``include_path_column=True``, to get a dataframe where one of the\ncolumns contains all the file paths. In this case, `add these fields and values to data`\nis a mapping between the categorical file paths column and the ``values_by_field``.\n\nIn other drivers where each file is read in independently the driver developer\ncan set the new fields on the data from each file before concattenating.\nThis pattern looks more like::\n\n    from intake.source.utils import reverse_format\n\n    class FooSource(intake.source.base.DataSource):\n        ...\n\n        def _get_schema(self):\n            # get list of file paths\n            for path in file_paths:\n                # read in the file\n                values_by_field = reverse_format(self.pattern, path)\n                # add these fields and values to the data\n            # concatenate the datasets\n            return data\n\n\nTo toggle on and off this path as pattern behavior, the CSV and intake-xarray drivers\nuses the bool ``path_as_pattern`` keyword argument.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/overview.rst",
    "content": "Overview\n========\n\nIntroduction\n------------\n\nThis page describes the technical design of Intake, with brief details of the aims of the project and\ncomponents of the library\n\nWhy Intake?\n-----------\n\nIntake solves a related set of problems:\n\n* Python API standards for loading data (such as DB-API 2.0) are optimized for transactional databases and query results\n  that are processed one row at a time.\n\n* Libraries that do load data in bulk tend to each have their own API for doing so, which adds friction when switching\n  data formats.\n\n* Loading data into a distributed data structure (like those found in Dask and Spark) often requires writing a separate\n  loader.\n\n* Abstractions often focus on just one data model (tabular, n-dimensional array, or semi-structured), when many projects\n  need to work with multiple kinds of data.\n\nIntake has the explicit goal of **not** defining a computational expression\nsystem.  Intake plugins load the data into containers (e.g., arrays or data-frames) that\nprovide their data processing features.  As a result, it is\nvery easy to make a new Intake plugin with a relatively small amount of Python.\n\nStructure\n---------\n\nIntake is a Python library for accessing data in a simple and uniform way.  It consists of three parts:\n\n1. A lightweight plugin system for adding data loader :term:`drivers<Driver>` for new file formats and servers\n(like databases, REST endpoints or other cataloging services)\n\n2. A cataloging system for specifying these sources in simple :term:`YAML` syntax, or with plugins that read source specs\nfrom some external data service\n\n3. A server-client architecture that can share data catalog metadata over the network, or even stream the data directly\nto clients if needed\n\nIntake supports loading data into standard Python containers. The list can be easily extended,\nbut the currently supported list is:\n\n* Pandas Dataframes - tabular data\n\n* NumPy Arrays - tensor data\n\n* Python lists of dictionaries - semi-structured data\n\nAdditionally, Intake can load data into distributed data structures.  Currently it supports Dask, a flexible parallel\ncomputing library with distributed containers like `dask.dataframe <https://dask.pydata.org/en/latest/dataframe.html>`_,\n`dask.array <https://dask.pydata.org/en/latest/array.html>`_,\nand `dask.bag <https://dask.pydata.org/en/latest/bag.html>`_.\nIn the future, other distributed computing systems could use Intake to create similar data structures.\n\nConcepts\n--------\n\nIntake is built out of four core concepts:\n\n* Data Source classes: the \"driver\" plugins that each implement loading of some specific type of data into python, with\n  plugin-specific arguments.\n\n* Data Source: An object that represents a reference to a data source.  Data source instances have methods for loading the\n  data into standard containers, like Pandas DataFrames, but do not load any data until specifically requested.\n\n* Catalog: An inventory of catalog entries, each of which defines a Data Source. Catalog objects can be created from\n  local YAML definitions, by connecting\n  to remote servers, or by some driver that knows how to query an external data service.\n\n* Catalog Entry: A named data source held internally by catalog objects, which generate\n  data source instances when accessed.\n  The catalog entry includes metadata about the source, as well as the name of the\n  driver and arguments. Arguments can be parameterized, allowing one entry to return\n  different subsets of data depending on the user request.\n\nThe business of a plugin is to go from some data format (bunch of files or some remote service)\nto a \":term:`Container`\" of the data (e.g., data-frame), a thing on which you can perform further analysis.\nDrivers can be used directly by the user, or indirectly through data catalogs.  Data sources can be pickled, sent over\nthe network to other hosts, and reopened (assuming the remote system has access to the required files or servers).\n\nSee also the :doc:`glossary`.\n\nFuture Directions\n-----------------\n\nOngoing work for enhancements, as well as requests for plugins, etc., can be found at the\n`issue tracker <https://github.com/intake/intake/issues>`_. See the :ref:`roadmap`\nfor general mid- and long-term goals.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/persisting.rst",
    "content": ".. _persisting:\n\nPersisting Data\n===============\n\n(this is an experimental new feature, expect enhancements and changes)\n\nIntroduction\n------------\n\nAs defined in the glossary, to :term:`Persist` is to convert data into the storage format\nmost appropriate for the container type, and save a copy of this for rapid lookup in the future.\nThis is of great potential benefit where the creation or transfer of the original data source\ntakes some time.\n\nThis is not to be confused with the file :term:`Cache`.\n\nUsage\n-----\n\nAny :term:`Data Source` has a method ``.persist()``. The only option that you will need to\npick is a :term:`TTL`, the number of seconds that the persisted version lasts before\nexpiry (leave as ``None`` for no expiry). This creates a local copy in the persist\ndirectory, which may be in ``\"~/.intake/persist``, but can be configured.\n\nEach container type (dataframe, array, ...) will have its own implementation of persistence,\nand a particular file storage format associated. The call to ``.persist()`` may take\narguments to tune how the local files are created, and in some cases may require additional\noptional packages to be installed.\n\nExample::\n\n    cat = intake.open_catalog('mycat.yaml')  # load a remote cat\n    source = cat.csvsource()  # source pointing to remote data\n    source.persist()\n\n    source = cat.csvsource()  # future use now gives local intake_parquet.ParquetSource\n\nTo control whether a catalog will automatically give you the persisted version of a\nsource in this way using the argument ``persist_mode``, e.g., to ignore locally\npersisted versions, you could have done::\n\n    cat = intake.open_catalog('mycat.yaml', persist_mode='never')\n    or\n    source = cat.csvsource(persist_mode='never')\n\nNote that if you give a TTL (in seconds), then the original source will be accessed\nand a new persisted version written transparently when the old persisted version has expired.\n\nNote that after persisting, the original source will have ``source.has_been_persisted == True``\nand the persisted source (i.e., the one loaded from local files) will have\n``source.is_persisted == True``.\n\nExport\n------\n\nA similar concept to Persist, Export allows you to make a copy of some data source, in the\nformat appropriate for its container, and place this data-set in whichever location suits you,\nincluding remote locations. This functionality (``source.export()``) does *not* touch the persist\nstore; instead, it returns a YAML text representation of the output, so that you can put it into\na catalog of your own. It would be this catalog that you share with other people.\n\nNote that \"exported\" data-sources like this do contain the information of the original source they\nwere made from in their metadata, so you can recreate the original source, if you want to, and\nread from there.\n\nPersisting to Remote\n--------------------\n\nIf you are typically running your code inside of ephemoral containers, then persisting data-sets may\nbe something that you want to do (because the original source is slow, or parsing is CPU/memory intensive),\nbut the local storage is not useful. In some cases you may have access to some shared network storage\nmounted on the instance, but in other cases you will want to persist to a remote store.\n\nThe config value ``'persist_path'``, which can also be set by the environment variable\n``INTAKE_PERSIST_PATH`` can be a remote location such as ``s3://mybucket/intake-persist``. You will\nneed to install the appropriate package to talk to the external storage (e.g., ``s3fs``, ``gcsfs``,\n``pyarrow``), but otherwise everything should work as before, and you can access the persisted data\nfrom any container.\n\n\nThe Persist Store\n-----------------\n\nYou can interact directly with the class implementing persistence::\n\n    from intake.container.persist import store\n\nThis singleton instance, which acts like a catalog, allows you to query the contents of the\ninstance store and to add and remove entries. It also allows you to find the original\nsource for any given persisted source, and refresh the persisted version on demand.\n\nFor details on the methods of the persist store, see the API documentation:\n:func:`intake.container.persist.PersistStore`. Sources in the store carry a lot of\ninformation about the sources they were made from, so that they can be remade\nsuccessfully. This all appears in the source metadata.\nThe sources use the \"token\" of the original\ndata source as their key in the store, a value which can be found by ``dask.base.tokenize(source)``\nfor the original source, or can be taken from the metadata of a persisted source.\n\nNote that all of the information about persisted sources is held in a single YAML file in\nthe persist directory (typically ``/persisted/cat.yaml`` within the config directory, but\nsee ``intake.config.conf['persist_path']``). This file can be edited by hand if you wanted to,\nfor example, set some persisted source not to expire. This is only recommended for experts.\n\nFuture Enhancements\n-------------------\n\n- CLI functionality to investigate and alter the state of the persist store.\n\n- Time check-pointing of persisted data, such that you can not only get the \"most recent\" but\n  any version in the time-series.\n\n- (eventually) pipeline functionality, whereby a persisted data source depends on another\n  persisted data source, and the whole train can be refreshed on a schedule or on demand.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/plotting.rst",
    "content": "Plotting\n========\n\nIntake provides a plotting API based on the `hvPlot <https://hvplot.holoviz.org/index.html>`_ library, which\nclosely mirrors the pandas plotting API but generates interactive plots using `HoloViews <http://holoviews.org/>`_\nand `Bokeh <http://bokeh.pydata.org/>`_.\n\nThe `hvPlot website <https://hvplot.holoviz.org/index.html>`_ provides comprehensive documentation on using the\nplotting API to quickly visualize and explore small and large datasets. The main features offered by the plotting API\ninclude:\n\n  * Support for tabular data stored in pandas and dask dataframes\n  * Support for gridded data stored in xarray backed nD-arrays\n  * Support for plotting large datasets with `datashader <http://datashader.org/>`_\n\nUsing Intake alongside hvPlot allows declaratively persisting plot declarations and default options in the regular\ncatalog.yaml files.\n\nSetup\n'''''\n\nFor detailed installation instructions see the\n`getting started section <https://hvplot.holoviz.org/getting_started/index.html>`_ in the hvPlot documentation.\nTo start with install hvplot using conda:\n\n.. code-block:: bash\n\n    conda install -c conda-forge hvplot\n\nor using pip:\n\n.. code-block:: bash\n\n    pip install hvplot\n\n\nUsage\n'''''\n\nThe plotting API is designed to work well in and outside the Jupyter notebook, however when using it in JupyterLab\nthe PyViz lab extension must be installed first:\n\n.. code-block:: bash\n\n    jupyter labextension install @pyviz/jupyterlab_pyviz\n\nFor detailed instructions on displaying plots in the notebook and from the Python command prompt see the\n`hvPlot user guide <https://hvplot.holoviz.org/user_guide/Viewing.html>`_.\n\nPython Command Prompt & Scripts\n--------------------------------\n\nAssuming the US Crime dataset has been installed (in the\n`intake-examples repo <https://github.com/intake/intake-examples>`_, or from\nconda with `conda install -c intake us_crime`):\n\nOnce installed the plot API can be used, by using the ``.plot`` method on an intake ``DataSource``:\n\n.. code-block:: python\n\n    import intake\n    import hvplot as hp\n\n    crime = intake.cat.us_crime\n    columns = ['Burglary rate', 'Larceny-theft rate', 'Robbery rate', 'Violent Crime rate']\n\n    violin = crime.plot.violin(y=columns, group_label='Type of crime',\n                               value_label='Rate per 100k', invert=True)\n    hp.show(violin)\n\n.. image:: _static/images/plotting_violin.png\n\nNotebook\n--------\n\nInside the notebook plots will display themselves, however the notebook extension must be loaded first. The\nextension may be loaded by importing ``hvplot.intake`` module or explicitly loading the holoviews extension,\nor by calling ``intake.output_notebook()``:\n\n.. code-block:: python\n\n    # To load the extension run this import\n    import hvplot.intake\n\n    # Or load the holoviews extension directly\n    import holoviews as hv\n    hv.extension('bokeh')\n\n    # convenience function\n    import intake\n    intake.output_notebook()\n\n    crime = intake.cat.us_crime\n    columns = ['Violent Crime rate', 'Robbery rate', 'Burglary rate']\n    crime.plot(x='Year', y=columns, value_label='Rate (per 100k people)')\n\n.. raw:: html\n   :file: _static/images/plotting_example.html\n\nPredefined Plots\n----------------\n\nSome catalogs will define plots appropriate to a specific data source. These will be specified\nsuch that the user gets the right view with the right columns and labels, without having to investigate\nthe data in detail -- this is ideal for quick-look plotting when browsing sources.\n\n.. code-block:: python\n\n    import intake\n    intake.us_crime.plots\n\nReturns `['example']`. This works whether accessing the entry object or the source instance. To visualise\n\n.. code-block:: python\n\n    intake.us_crime.plot.example()\n\n\nPersisting metadata\n'''''''''''''''''''\n\nIntake allows catalog yaml files to declare metadata fields for each data source which are made available alongside\nthe actual dataset. The plotting API reserves certain fields to define default plot options, to label and annotate\nthe data fields in a dataset and to declare pre-defined plots.\n\nDeclaring defaults\n------------------\n\nThe first set of metadata used by the plotting API is the `plot` field in the metadata section. Any options found in\nthe metadata field will apply to all plots generated from that data source, allowing the definition of plotting\ndefaults. For example when plotting a fairly large dataset such as the NYC Taxi data, it might be desirable to enable\ndatashader by default ensuring that any plot that supports it is datashaded. The syntax to declare default plot options\nis as follows:\n\n.. code-block:: yaml\n\n    sources:\n      nyc_taxi:\n        description: NYC Taxi dataset\n        driver: parquet\n        args:\n          urlpath: 's3://datashader-data/nyc_taxi_wide.parq'\n        metadata:\n          plot:\n            datashade: true\n\n\nDeclaring data fields\n---------------------\n\nThe columns of a CSV or parquet file or the coordinates and data variables in a NetCDF file often have shortened, or\ncryptic names with underscores. They also do not provide additional information about the units of the data or the\nrange of values, therefore the catalog yaml specification also provides the ability to define additional information\nabout the `fields` in a dataset.\n\nValid attributes that may be defined for the data `fields` include:\n\n- `label`: A readable label for the field which will be used to label axes and widgets\n- `unit`: A unit associated with the values inside a data field\n- `range`: A range associated with a field declaring limits which will override those computed from the data\n\nJust like the default plot options the `fields` may be declared under the metadata section of a data source:\n\n.. code-block:: yaml\n\n    sources:\n      nyc_taxi:\n        description: NYC Taxi dataset\n        driver: parquet\n        args:\n          urlpath: 's3://datashader-data/nyc_taxi_wide.parq'\n        metadata:\n          fields:\n            dropoff_x:\n              label: Longitude\n            dropoff_y:\n              label: Latitude\n            total_fare:\n              label: Fare\n              unit: $\n\nDeclaring custom plots\n----------------------\n\nAs shown in the `hvPlot user guide <https://hvplot.holoviz.org/user_guide/Plotting.html>`__, the plotting API\nprovides a variety of plot types, which can be declared using the `kind` argument or via convenience methods on the\nplotting API, e.g. `cat.source.plot.scatter()`. In addition to declaring default plot options and field metadata data\nsources may also declare custom plot, which will be made available as methods on the plotting API. In this way a\ncatalogue may declare any number of custom plots alongside a datasource.\n\nTo make this more concrete consider the following custom plot declaration on the `plots` field in the metadata section:\n\n.. code-block:: yaml\n\n    sources:\n      nyc_taxi:\n        description: NYC Taxi dataset\n        driver: parquet\n        args:\n          urlpath: 's3://datashader-data/nyc_taxi_wide.parq'\n        metadata:\n          plots:\n            dropoff_scatter:\n              kind: scatter\n              x: dropoff_x\n              y: dropoff_y\n              datashade: True\n              width: 800\n              height: 600\n\nThis declarative specification creates a new custom plot called `dropoff_scatter`, which will be available on the\ncatalog under `cat.nyc_taxi.plot.dropoff_scatter()`. Calling this method on the plot API will automatically generate a\ndatashaded scatter plot of the dropoff locations in the NYC taxi dataset.\n\nOf course the three metadata fields may also be used together, declaring global defaults under the `plot` field,\nannotations for the data `fields` under the `fields` key and custom plots via the `plots` field.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/plugin-directory.rst",
    "content": ".. _plugin-directory:\n\nPlugin Directory\n================\n\nThis is a list of known projects which install driver plugins for Intake, and the named drivers each\ncontains:\n\n.. raw:: html\n   :file: plugin-list.html\n\n\n\nDon't see your favorite format?  See :doc:`making-plugins` for how to create new plugins.\n\nNote that if you want your plugin listed here, open an issue in the `Intake\nissue repository <https://github.com/intake/intake>`_ and add an entry to the\n`status dashboard repository <https://github.com/intake/intake-dashboard>`_. We also have a\n`plugin wishlist Github issue <https://github.com/intake/intake/issues/58>`_\nthat shows the breadth of plugins we hope to see for Intake.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/quickstart.rst",
    "content": "Quickstart\n==========\n\nThis guide will show you how to get started using Intake to read data, and give you a flavour\nof how Intake feels to the :term:`Data User`.\nIt assumes you are working in either a conda or a virtualenv/pip environment. For notebooks with\nexecutable code, see the :doc:`examples`. This walk-through can be run from a notebook or interactive\npython session.\n\nInstallation\n------------\n\nIf you are using `Anaconda`_ or Miniconda, install Intake with the following commands::\n\n    conda install -c conda-forge intake\n\nIf you are using virtualenv/pip, run the following command::\n\n    pip install intake\n\nNote that this will install with the minimum of optional requirements. If you want a more complete\ninstall, use `intake[complete]` instead.\n\n.. _Anaconda: https://www.anaconda.com/download/\n\nCreating Sample Data\n--------------------\n\nLet's begin by creating a sample data set and catalog.  At the command line, run the ``intake example`` command.\nThis will create an example data :term:`Catalog` and two CSV data files.  These files contains some basic facts about the 50\nUS states, and the catalog includes a specification of how to load them.\n\nLoading a Data Source\n---------------------\n\n:term:`Data sources<Data-set>` can be created directly with the ``open_*()`` functions in the ``intake``\nmodule.  To read our example data::\n\n    >>> import intake\n    >>> ds = intake.open_csv('states_*.csv')\n    >>> print(ds)\n    <intake.source.csv.CSVSource object at 0x1163882e8>\n\nEach open function has different arguments, specific for the data format or service being used.\n\nReading Data\n------------\n\nIntake reads data into memory using :term:`containers<Container>` you are already familiar with:\n\n  * Tables: Pandas DataFrames\n  * Multidimensional arrays: NumPy arrays\n  * Semistructured data: Python lists of objects (usually dictionaries)\n\nTo find out what kind of container a data source will produce, inspect the ``container`` attribute::\n\n    >>> ds.container\n    'dataframe'\n\nThe result will be ``dataframe``, ``ndarray``, or ``python``.  (New container types will be added in\nthe future.)\n\nFor data that fits in memory, you can ask Intake to load it directly::\n\n    >>> df = ds.read()\n    >>> df.head()\n            state        slug code                nickname  ...\n    0     Alabama     alabama   AL      Yellowhammer State\n    1      Alaska      alaska   AK       The Last Frontier\n    2     Arizona     arizona   AZ  The Grand Canyon State\n    3    Arkansas    arkansas   AR       The Natural State\n    4  California  california   CA            Golden State\n\nMany data sources will also have quick-look plotting available. The attribute ``.plot`` will list\na number of built-in plotting methods, such as ``.scatter()``, see :doc:`plotting`.\n\nIntake data sources can have *partitions*.  A partition refers to a contiguous chunk of data that can be loaded\nindependent of any other partition.  The partitioning scheme is entirely up to the plugin author.  In\nthe case of the CSV plugin, each ``.csv`` file is a partition.\n\nTo read data from a data source one chunk at a time, the ``read_chunked()`` method returns an iterator::\n\n    >>> for chunk in ds.read_chunked(): print('Chunk: %d' % len(chunk))\n    ...\n    Chunk: 24\n    Chunk: 26\n\n\nWorking with Dask\n-----------------\n\nWorking with large datasets is much easier with a parallel, out-of-core computing library like\n`Dask <https://dask.pydata.org/en/latest/>`_.  Intake can create Dask containers (like ``dask.dataframe``)\nfrom data sources that will load their data only when required::\n\n    >>> ddf = ds.to_dask()\n    >>> ddf\n    Dask DataFrame Structure:\n                admission_date admission_number capital_city capital_url    code constitution_url facebook_url landscape_background_url map_image_url nickname population population_rank skyline_background_url    slug   state state_flag_url state_seal_url twitter_url website\n    npartitions=2\n                        object            int64       object      object  object           object       object                   object        object   object      int64           int64                 object  object  object         object         object      object  object\n                            ...              ...          ...         ...     ...              ...          ...                      ...           ...      ...        ...             ...                    ...     ...     ...            ...            ...         ...     ...\n                            ...              ...          ...         ...     ...              ...          ...                      ...           ...      ...        ...             ...                    ...     ...     ...            ...            ...         ...     ...\n    Dask Name: from-delayed, 4 tasks\n\nThe Dask containers will be partitioned in the same way as the Intake data source, allowing different chunks\nto be processed in parallel. Please read the Dask documentation to understand the differences when\nworking with Dask collections (Bag, Array or Data-frames).\n\nOpening a Catalog\n-----------------\n\nA :term:`Catalog` is an inventory of data sources, with the type and arguments prescribed for each, and\narbitrary metadata about each source.\nIn the simplest case, a catalog can be described by a file in YAML format, a\n\":term:`Catalog file`\". In real usage, catalogues can be defined in a number of ways, such as remote\nfiles, by\nconnecting to a third-party data service (e.g., SQL server) or through an Intake :term:`Server` protocol, which\ncan implement any number of ways to search and deliver data sources.\n\nThe ``intake example`` command, above, created a catalog file\nwith the following :term:`YAML`-syntax content:\n\n.. code-block:: yaml\n\n    sources:\n      states:\n        description: US state information from [CivilServices](https://civil.services/)\n        driver: csv\n        args:\n          urlpath: '{{ CATALOG_DIR }}/states_*.csv'\n        metadata:\n          origin_url: 'https://github.com/CivilServiceUSA/us-states/blob/v1.0.0/data/states.csv'\n\nTo load a :term:`Catalog` from a :term:`Catalog file`::\n\n    >>> cat = intake.open_catalog('us_states.yml')\n    >>> list(cat)\n    ['states']\n\nThis catalog contains one data source, called ``states``.  It can be accessed by attribute::\n\n    >>> cat.states.to_dask()[['state','slug']].head()\n            state        slug\n    0     Alabama     alabama\n    1      Alaska      alaska\n    2     Arizona     arizona\n    3    Arkansas    arkansas\n    4  California  california\n\nPlacing data source specifications into a catalog like this enables declaring data sets in a single canonical place,\nand not having to use boilerplate code in each notebook/script that makes use of the data. The catalogs can also\nreference one-another, be stored remotely, and include extra metadata such as a set of named quick-look plots that\nare appropriate for the particular data source. Note that catalogs are **not** restricted\nto being stored in YAML files, that just happens to be the simplest way to display them.\n\nMany catalog entries will also contain \"user_parameter\" blocks, which are indications of options explicitly\nallowed by the catalog author, or for validation or the values passed. The user can customise how a data\nsource is accessed by providing values for the user_parameters, overriding the arguments specified in\nthe entry, or passing extra keyword arguments to be passed to the driver. The keywords that should\nbe passed are limited to the user_parameters defined and the inputs expected by the specific\ndriver - such usage is expected only from those already familiar with the specifics of the given\nformat. In the following example, the user overrides the \"csv_kwargs\" keyword, which is described\nin the documentation for :func:`CSVSource <intake.source.csv.CSVSource>` and gets passed down to the CSV reader::\n\n    # pass extra kwargs understood by the csv driver\n    >>> intake.cat.states(csv_kwargs={'header': None, 'skiprows': 1}).read().head()\n               0           1   ...                                17\n    0     Alabama     alabama  ...    https://twitter.com/alabamagov\n    1      Alaska      alaska  ...        https://twitter.com/alaska\n\n\nNote that, if you are *creating* such catalogs, you may well start by trying the ``open_csv`` command,\nabove, and then use ``print(ds.yaml())``. If you do this now, you will see that the output is very\nsimilar to the catalog file we have provided.\n\nInstalling Data Source Packages\n-------------------------------\n\nIntake makes it possible to create :term:`Data packages` (``pip`` or ``conda``)\nthat install data sources into a\nglobal catalog.  For example, we can\ninstall a data package containing the same data we have been working with::\n\n    conda install -c intake data-us-states\n\n:term:`Conda` installs the catalog file in this package to ``$CONDA_PREFIX/share/intake/us_states.yml``.\nNow, when we import\n``intake``, we will see the data from this package appear as part of a global catalog called ``intake.cat``. In this\nparticular case we use Dask to do the reading (which can handle larger-than-memory data and parallel\nprocessing), but ``read()`` would work also::\n\n    >>> import intake\n    >>> intake.cat.states.to_dask()[['state','slug']].head()\n            state        slug\n    0     Alabama     alabama\n    1      Alaska      alaska\n    2     Arizona     arizona\n    3    Arkansas    arkansas\n    4  California  california\n\nThe global catalog is a union of all catalogs installed in the conda/virtualenv environment and also any catalogs\ninstalled in user-specific locations.\n\n\nAdding Data Source Packages using the Intake path\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nIntake checks the Intake config file for ``catalog_path`` or the environment variable ``\"INTAKE_PATH\"`` for a colon\nseparated list of paths (semicolon on windows) to search for catalog files.\nWhen you import ``intake`` we will see all entries from all of the catalogues referenced as part of a global catalog\ncalled ``intake.cat``.\n\n\nUsing the GUI\n-------------\n\nA graphical data browser is available in the Jupyter notebook environment or standalone web-server.\nIt will show the\ncontents of any installed catalogs, plus allows for selecting local and remote catalogs,\nto browse and select entries from these. See :doc:`gui`.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/reference.rst",
    "content": "Reference\n---------\n\n\n.. toctree::\n    :maxdepth: 1\n\n    api.rst\n    changelog.rst\n    making-plugins.rst\n    data-packages.rst\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/roadmap.rst",
    "content": ".. _roadmap:\n\nRoadmap\n=======\n\nSome high-level work that we expect to be achieved on the time-scale of months. This list\nis not exhaustive, but rather aims to whet the appetite for what Intake can be in the future.\n\nSince Intake aims to be a community of data-oriented pythoneers, nothing written here is laid in\nstone, and users and devs are encouraged to make their opinions known!\n\n\nBroaden the coverage of formats\n-------------------------------\n\nData-type drivers are easy to write, but still require some effort, and therefore reasonable\nimpetus to get the work done. Conversations over the coming months can help determine the\ndrivers that should be created by the Intake team, and those that might be contributed by the\ncommunity.\n\nThe next type that we would specifically like to consider is machine learning\nmodel artifacts.  **EDIT** see https://github.com/AlbertDeFusco/intake-sklearn , and\nhopefully more to come.\n\nStreaming Source\n----------------\n\nMany data sources are inherently time-sensitive and event-wise. These are not covered well by existing\nPython tools, but the ``streamz`` library may present a nice way to model them. From the Intake point of\nview, the task would be to develop a streaming type, and at least one data driver that uses it.\n\nThe most obvious place to start would be read a file: every time a new line appears in the file, an event\nis emitted. This is appropriate, for instance, for watching the log files of a web-server, and indeed could\nbe extended to read from an arbitrary socket.\n\n**EDIT** see: https://github.com/intake/intake-streamz\n\n\nServer publish hooks\n--------------------\n\nTo add API endpoints to the server, so that a user (with sufficient privilege) can post data\nspecifications to a running server, optionally saving the specs to a catalog server-side. Furthermore,\nwe will consider the possibility of being able to upload and/or transform data\n(rather than refer to it in a third-party location), so that you would have a one-line \"publish\"\nability from the client.\n\nThe server, in general, could do with a lot of work to become more than the current\ndemonstration/prototype. In particular, it should be able to be performant and scalable,\nmeaning that the server implementation ought to keep as little local state as possible.\n\nSimplify dependencies and class hierarchy\n-----------------------------------------\n\nWe would like the make it easier to write Intake drivers which don't need any\npersist or GUI functionality, and to be able to install Intake core\nfunctionality (driver registry, data loading and catalog traversal) without\nneeding many other packages at all.\n\n**EDIT** this has been partly done, you can derive from ``DataSourceBase`` and\nnot have to use the full set of Intake's features for simplicity. We have also gone\nsome distance to separate out dependencies for parts of the package, so that you\ncan install Intake and only use some of the subpackages/modules - imports don't\nhappen until those parts of the code are used. We have *not* yet split the\nintake conda package into, for example, intake-base, intake-server, intake-gui...\n\nReader API\n----------\n\nFor those that wish to provide Intake's data source API, and make data sources\navailable to Intake cataloguing, but don't wish to take Intake as a direct dependency.\nThe actual API of ``DataSources`` is rather simple:\n\n- ``__init__``: collect arguments, minimal IO at this point\n- ``discover()``: get metadata from the source, by querying the files/service itself\n- ``read()``: return in-memory version of the data\n- ``to_*``: return reference objects for the given compute engine, typically Dask\n- ``read_partition(...)``: read part of the data into memory, where the argument\n  makes sense for the given type of data\n- ``configure_new()``: create new instance with different arguments\n- ``yaml()``: representation appropriate for inclusion in a YAML catalogue\n- ``close()``: release any resources\n\nOf these, only the first three are really necessary for a iminal interface, so\nIntake might do well to publish this *protocol specification*, so that new drivers\ncan be written that can be used by Intake but do not need Intake, and so help\nadoption.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/scope2.rst",
    "content": "Scope\n=====\n\nHere we lay out what Intake is, why you might want to use it, main features and also\na few reasons you may wish to look elsewhere.\n\n\nMotivation\n----------\n\nData scientists, analysis, ML/AI developers and engineers want to spend their time working\nwith data to produce models, results and insights. Those first few lines or cells in a workflow\nto get to the data are annoying, often wrong and brittle.\nConsider the following cell of realistic-looking code.\n\n.. image:: ./_static/images/complex_cell.png\n   :alt: Complex Cell\n\nThis cell encodes several steps it takes to produce the object of interest for the user, including\ndefining some location URL, fetching data, loading, cleaning and converting data. It refers to\nmultiple packages, and hard-codes various arguments directly. This is clearly messy code, but it\nis extremely common - in fact, most data-centric workflows start like this. Now imagine finding\nand using this dataset for a new project, or when the upstream location, format or types have changed.\nFurther more, the steps are highly specific - if you decide you need a different compute engine,\nor the data moves to a different storage service, it is painful to fix this. If the same dataset is\nused in many places, you will need to remember to fix it in each place.\n\n\nWouldn't it be nice if you could declare your data and pipeline just once? Then you can\n\n- version control your data set descriptions as you would for code (and maybe data files)\n- share your data definitions with others, just by writing this prescription to any shared space\n- update all users of this data in a single place, single source of truth\n- encode a set of transforms as part of a data-oriented processing framework\n- decide to load and process the same data but with different engines\n- automatically encode python statements into data decriptions\n- convert between dozens of data types\n\n\nCounter indications\n-------------------\n\nThe following are some situations where you might not be interested in Intake:\n\n- you only ever read one type of file with one package, e.g., ``pd.read_csv``.\n  In this case, Intake can still\n  serve to describe and enumerate data sources, but you miss out on most of the functionality.\n- you only ever work on data sources by yourself and have no reasons to direct other people in how\n  to read data (not be the consumer of such descriptions).\n- all your data needs are already met by some other data service by itself, for example a\n  set of tables/procedures/views/queries on a SQL database.\n- you have data that is not read by anything in Intake (but maybe you could pretty easily add any\n  reader you need).\n- you don't yet see any benefit of describing your datasets in catalogs.\n\n\nWhat Intake Isn't\n-----------------\n\nThis is not a workflow running system (maybe use Airflow, Celery, Prefect, etc.) nor\na compute engine (although we hook into spark, dask, ray and such). Our scope is limited\nto describing data and how to load it. Here, \"load\" includes transforms and cleaning\nwith the engine of your choice to get to something ready for analysis.\nWe delegate all calls to other third-party packages, so you can think of the descriptions\nas being \"what to call\".\n\nRelationship to ``fsspec``\n--------------------------\n\n``fsspec`` is a library concerned only with reading bytes from various stores. It has become\nthe standard (but not only) package for this in the python/data ecosystem and is supported by\nmany other packages. Intake will make use of ``fsspec`` for many file readers, encoding\n\"storage_options` (further arguments that ``fsspec`` understands) when necessary.\n\nWhere a reader doesn't understand ``fsspec``, \"storage_options\" are ignored, and in some\ncases the files might need to be downloaded locally before being accessible. Intake prefers\nusing ``fsspec``, but it is not necessary.\n\n\nRelationship to ``dask``\n------------------------\n\nIntake originally came out of the same stable as `dask`_, and this was implicit in all of the\n\"sources\" in V1. Since then, many more compute engines have become available in python and\nsome that already existed have become more convenient (e.g., Spark). We no longer require or\neven prefer ``dask``, but it is, of course, a fine choice for out-of-core, parallel and\ndistributed workloads.\n\n.. _dask: https://dask.org\n\n.. _rel_v1:\n\nRelationship to V1\n------------------\n\nWe aim to be largely backward compatible with pre-V2 Intake sources and catalogs.\nMany data sources have been rewritten\nto use the new framework, and many rarely-used features have been removed. In particular, the\nfollowing features are no longer supported for V1 sources:\n\n- the intake server (use ``tiled`` instead)\n- caching (use ``fsspec`` caching instead or custom caching pipelines)\n- \"persist\" and \"export\" (use the new converters and output classes)\n- automatic ``hvplot`` (this is now an \"output\" converter for pandas and xarray types)\n- some niche source features such as CSV file pattern matching\n\nIn addition, not all existing ``intake_*`` packages have corresponding readers in Take2, but we are\nmaking progress.\n"
  },
  {
    "path": "docs/source/start.rst",
    "content": ".. _start:\n\nStart here\n----------\n\nThese documents will familiarise you with Intake, show you some basic usage and examples,\nand describe Intake's place in the wider python data world.\n\n.. toctree::\n    :maxdepth: 1\n\n    quickstart.rst\n    use_cases.rst\n    overview.rst\n    examples.rst\n    deployments.rst\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/tools.rst",
    "content": "Command Line Tools\n==================\n\nThe package installs two executable commands: for starting the catalog server; and\na client for accessing catalogs and manipulating the configuration.\n\n.. _configuration:\n\nConfiguration\n-------------\n\nA file-based configuration service is available to Intake. This file is by default\nsought at the location ``~/.intake/conf.yaml``, but either of the environment variables\n``INTAKE_CONF_DIR`` or ``INTAKE_CONF_FILE`` can be used to specify another directory\nor file. If both are given, the latter takes priority.\n\nAt present, the configuration file might look as follows:\n\n.. code-block:: yaml\n\n   auth:\n     cls: \"intake.auth.base.BaseAuth\"\n   port: 5000\n   catalog_path:\n     - /home/myusername/special_dir\n\nThese are the defaults, and any parameters not specified will take the values above\n\n* the Intake Server will listen on port 5000 (this can be overridden on the command line,\n  see below)\n* and the auth system used will be the fully qualified class given (which, for BaseAuth,\n  always allows access).\n\nSee ``intake.config.defaults`` for a full list of keys and their default values.\n\nLog Level\n---------\n\nThe logging level is configurable using Python's built-in logging module.\n\nThe config option ``'logging'`` holds the current level for the intake logger, and\ncan take values such as ``'INFO'`` or ``'DEBUG'``. This can be set in the ``conf.yaml``\nfile of the config directory (e.g., ``~/.intake/``), or overriden by the environment\nvariable ``INTAKE_LOG_LEVEL``.\n\nFurthermore, the level and settings of the logger can be changed programmatically in code::\n\n  import logging\n  logger = logging.getLogger('intake')\n  logger.setLevel(logging.DEBUG)\n  logget.addHandler(..)\n\nIntake Server\n-------------\n\nThe server takes one or more catalog files as input and makes them available on\nport 5000 by default.\n\nYou can see the full description of the server command with:\n\n::\n\n  >>> intake-server --help\n\n  usage: intake-server [-h] [-p PORT] [--list-entries] [--sys-exit-on-sigterm]\n                       [--flatten] [--no-flatten] [-a ADDRESS]\n                       FILE [FILE ...]\n\n  Intake Catalog Server\n\n  positional arguments:\n    FILE                  Name of catalog YAML file\n\n  optional arguments:\n    -h, --help            show this help message and exit\n    -p PORT, --port PORT  port number for server to listen on\n    --list-entries        list catalog entries at startup\n    --sys-exit-on-sigterm\n                          internal flag used during unit testing to ensure\n                          .coverage file is written\n    --flatten\n    --no-flatten\n    -a ADDRESS, --address ADDRESS\n                          address to use as a host, defaults to the address in\n                          the configuration file, if provided otherwise localhost\n    usage: intake-server [-h] [-p PORT] [--list-entries] [--sys-exit-on-sigterm]\n                 [--flatten] [--no-flatten] [-a ADDRESS]\n                 FILE [FILE ...]\n\nTo start the server with a local catalog file, use the following:\n\n::\n\n  >>> intake-server intake/catalog/tests/catalog1.yml\n  Creating catalog from:\n    - intake/catalog/tests/catalog1.yml\n  catalog_args ['intake/catalog/tests/catalog1.yml']\n  Entries: entry1,entry1_part,use_example1\n  Listening on port 5000\n\nYou can use the catalog client (defined below) using:\n\n::\n\n  $ intake list intake://localhost:5000\n  entry1\n  entry1_part\n  use_example1\n\nIntake Client\n-------------\n\nWhile the Intake data sources will typically be accessed through the Python\nAPI, you can use the client to verify a catalog file.\n\nUnlike the server command, the client has several subcommands to access a\ncatalog. You can see the list of available subcommands with:\n\n::\n\n  >>> intake --help\n  usage: intake {list,describe,exists,get,discover} ...\n\nWe go into further detail in the following sections.\n\nList\n''''\n\nThis subcommand lists the names of all available catalog entries. This is\nuseful since other subcommands require these names.\n\nIf you wish to see the details about each catalog entry, use the ``--full`` flag.\nThis is equivalent to running the ``intake describe`` subcommand for all catalog\nentries.\n\n::\n\n  >>> intake list --help\n  usage: intake list [-h] [--full] URI\n\n  positional arguments:\n    URI         Catalog URI\n\n  optional arguments:\n    -h, --help  show this help message and exit\n    --full\n\n::\n\n  >>> intake list intake/catalog/tests/catalog1.yml\n  entry1\n  entry1_part\n  use_example1\n  >>> intake list --full intake/catalog/tests/catalog1.yml\n  [entry1] container=dataframe\n  [entry1] description=entry1 full\n  [entry1] direct_access=forbid\n  [entry1] user_parameters=[]\n  [entry1_part] container=dataframe\n  [entry1_part] description=entry1 part\n  [entry1_part] direct_access=allow\n  [entry1_part] user_parameters=[{'default': '1', 'allowed': ['1', '2'], 'type': u'str', 'name': u'part', 'description': u'part of filename'}]\n  [use_example1] container=dataframe\n  [use_example1] description=example1 source plugin\n  [use_example1] direct_access=forbid\n  [use_example1] user_parameters=[]\n\n\nDescribe\n''''''''\n\nGiven the name of a catalog entry, this subcommand lists the details of the\nrespective catalog entry.\n\n::\n\n  >>> intake describe --help\n  usage: intake describe [-h] URI NAME\n\n  positional arguments:\n    URI         Catalog URI\n    NAME        Catalog name\n\n  optional arguments:\n    -h, --help  show this help message and exit\n\n::\n\n  >>> intake describe intake/catalog/tests/catalog1.yml entry1\n  [entry1] container=dataframe\n  [entry1] description=entry1 full\n  [entry1] direct_access=forbid\n  [entry1] user_parameters=[]\n\n\nDiscover\n''''''''\n\nGiven the name of a catalog entry, this subcommand returns a key-value\ndescription of the data source. The exact details are subject to change.\n\n::\n\n  >>> intake discover --help\n  usage: intake discover [-h] URI NAME\n\n  positional arguments:\n    URI         Catalog URI\n    NAME        Catalog name\n\n  optional arguments:\n    -h, --help  show this help message and exit\n\n::\n\n  >>> intake discover intake/catalog/tests/catalog1.yml entry1\n  {'npartitions': 2, 'dtype': dtype([('name', 'O'), ('score', '<f8'), ('rank', '<i8')]), 'shape': (None,), 'datashape':None, 'metadata': {'foo': 'bar', 'bar': [1, 2, 3]}}\n\n\nExists\n''''''\n\nGiven the name of a catalog entry, this subcommand returns whether or not the\nrespective catalog entry is valid.\n\n::\n\n  >>> intake exists --help\n  usage: intake exists [-h] URI NAME\n\n  positional arguments:\n    URI         Catalog URI\n    NAME        Catalog name\n\n  optional arguments:\n    -h, --help  show this help message and exit\n\n::\n\n  >>> intake exists intake/catalog/tests/catalog1.yml entry1\n  True\n  >>> intake exists intake/catalog/tests/catalog1.yml entry2\n  False\n\n\nGet\n'''\n\nGiven the name of a catalog entry, this subcommand outputs the entire data\nsource to standard output.\n\n::\n\n  >>> intake get --help\n  usage: intake get [-h] URI NAME\n\n  positional arguments:\n    URI         Catalog URI\n    NAME        Catalog name\n\n  optional arguments:\n    -h, --help  show this help message and exit\n\n::\n\n  >>> intake get intake/catalog/tests/catalog1.yml entry1\n         name  score  rank\n  0    Alice1  100.5     1\n  1      Bob1   50.3     2\n  2  Charlie1   25.0     3\n  3      Eve1   25.0     3\n  4    Alice2  100.5     1\n  5      Bob2   50.3     2\n  6  Charlie2   25.0     3\n  7      Eve2   25.0     3\n\n\nConfig and Cache\n''''''''''''''''\n\nCLI functions starting with ``intake cache`` and ``intake config`` are available to\nprovide information about the system: the locations and value of configuration\nparameters, and the state of cached files.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/tour2.rst",
    "content": "Developers' Package Tour\n========================\n\nGeneral Guidelines\n------------------\n\nIntake is an open source project, and all development happens on `github`_. Please open issues\nor discussions there to talk about problems with the code or request features.\n\nTo contribute, you should:\n\n- clone the repo locally\n- fork the repo to your personal identity in github using the \"fork\" button\n- run ``pre-commit install`` in the repo\n- make changes locally as you see fit, and commit to a new branch\n- push the branch to your fork, and follow the prompt to create a Pull Request (PR)\n\nYou can expect comments on your PR within a couple of days.\n\nTo have a higher chance of having your changes accepted, a concise title description are best,\nand ideally new code should be accompanied by tests.\n\n.. _github: https://github.com/intake/intake\n\nOutline\n-------\n\nFor those interested in Intake Take2, here are the places to look for contributing.\nAll of the implementation code lives under ``intake.readers``, which was developed for a\nwhile parallel with and without touching Intake's V1 code.\nThe list below gives summaries of the modules,\nand the principle classes themselves are in the :ref:`api2`.\n\n\n.. autosummary::\n    intake.config\n    intake.readers.catalogs\n    intake.readers.convert\n    intake.readers.datatypes\n    intake.readers.entry\n    intake.readers.importlist\n    intake.readers.metadata\n    intake.readers.mixins\n    intake.readers.namespaces\n    intake.readers.output\n    intake.readers.readers\n    intake.readers.search\n    intake.readers.transform\n    intake.readers.user_parameters\n\n\nCreating Datatypes and Readers\n------------------------------\n\nHere follows a minimalist complete set of classes to make a complete pipeline.\n\nA typical data/reader implementation in Intake Take 2 is very simple. Here is the CSV\nprototype\n\n.. code-block:: python\n\n    from intake.readers import FileData\n\n    class CSV(FileData):\n        filepattern = \"(csv$|txt$|tsv$)\"\n        mimetypes = \"(text/csv|application/csv|application/vnd.ms-excel)\"\n        structure = {\"table\"}\n\nThis specified that CSVs live in files (the superclass), which also implies that they may\nbe local or remote. Further, the block specifies expected URL/filenames and MIME types,\nas well as an indicator that this filetype is typically used for tables. All of these attributes\nare optional - an instance just contains enough information to unambiguously identify the source\nof data. For the case of a CSV dataset, this would be just the URL(s) of the data plus any\nextra storage backend parameters. Other data types may have other necessary attributes,\nsuch as a SQL dataset is a combination of server connection string and query.\n\nThe pandas CSV reader counterpart looks like this\n\n.. code-block:: python\n\n    from intake.readers import FileReader, datatypes\n\n    class PandasCSV(FileReader):\n        imports = {\"pandas\"}\n        output_instance = \"pandas:DataFrame\"\n        storage_options = True\n        implements = {datatypes.CSV}\n        func = \"pandas:read_csv\"\n        url_arg = \"filepath_or_buffer\"\n\nThis says that:\n\n- the data type is made of files\n- the reader requires \"pandas\" to be installed\n- the result will be a DataFrame\n- if the URL is remote, fsspec-style storage_options are acceptable\n- it can be used on the CSV type from before (only)\n- it uses the ``read_csv`` function from the ``pandas`` package\n- and that the URL of the data source should be passed using the argument name \"filepath_or_buffer\" (this information can be found\n  from the target function's signature and docstring).\n\nOften a reader is this simple, or even simpler when you can group attributes in common subclasses.\nIn other cases, it may be necessary to override the key ``._read()`` method, which is the one\nthat does the work.\nIn fact, PandasCSV does override ``.discover()``, to add the ``nrows=`` argument,\nbut adding such refinements is optional.\n\nDoing the above is enough, such that a URL ending in \"csv\" will be recognised, and pandas offered\nas one of the potential readers; and thus we can make a reader instance and store it in a Catalog.\n\nNext, let's imagine we want to make a super simple converter:\n\n.. code-block:: python\n\n    from intake import BaseConverter\n\n    class PandasToStr(BaseConverter):\n        instances = {\"pandas:DataFrame\": \"builtins:str\"}\n        func = \"builtins:str\"\n\nThis just returns the string representation of the dataframe, turning DataFrame instances into\n``str`` instances (actually, it would work for just about any python object). The inclusion\nof \"DataFrame\" in ``instances`` means that Intake will know that this is a transform that can be\napplied to readers that produce a DataFrame, and it will appear in tab completions and a\nreader instance's ``.transform`` attribute.\n\nTo complete the pipeline, lets make a outputter which writes this back to a file\n\n.. code-block:: python\n\n    class StrToFile(BaseConverter):\n        instances = {\"builtins:str\": datatypes.Text.qname()}\n\n        def run(self, x, url, storage_options=None, metadata=None, **kwargs):\n            with fsspec.open(url, mode=\"wt\", **storage_options) as f:\n                f.write(x)\n            return datatypes.Text(url=url, storage_options=storage_options, metadata=metadata)\n\nAlthough we use ``fsspec`` (which is recommended, where possible), the code is again super-simple.\nIt is conventional, but not necessary, to have such \"output\" nodes return a datatypes instance.\n\nAll of this now allows:\n\n.. code-block:: python\n\n    >>> import intake\n    >>> intake.auto_pipeline(\"blah.csv\", \"Text\")\n    PipelineReader:\n\n      0: intake.readers.readers:PandasCSV, () {} => pandas:DataFrame\n      1: PandasToStr, () {} => builtins:str\n      2: StrToFile, () {} => intake.readers.datatypes:Text\n\n(where the output filename remains to be filled in)\n\nPackaging\n---------\n\nHaving made a couple of new classes, how would we get these to potential users?\n\nAssuming you are already familiar with how to create a python package _in_general_,\nwhat you need to know, is that Intake will find the new code so long as the classes\nare subclasses of BaseData, BaseReader (etc.), and the code is imported. That importing\ncan be done\n\n- explicitly (which is good form for ad-hoc/experimental use)\n- including an `entrypoint`_ for the package in the group \"intake.imports\",\n  where the value would be of the form\n  \"package.module\" or \"package:module\" (the latter for ``import .. from`` style). This\n  requires that the new package is installed via ``pip``, ``conda``, etc.\n- adding the package/module to ``intake.conf[\"extra_imports\"]`` and saving; this will take\n  effect on the next import of Intake.\n\n.. _entrypoint: https://packaging.python.org/en/latest/specifications/entry-points/\n\nMigration from V1\n-----------------\n\nSection :ref:`v1` shows the principal differences to Intake before Take2. From\na developer's viewpoint, if porting former plugins, here are some things to bear in mind.\n\n- in v2 we generally separate out the definition of the data itself versus the\n  specific reader, e.g., HDF5 is a file type, but xarray is a reader which can\n  handle HDF5. It is totally possible to write a reader without a data type if appropriate.\n  See :ref:`base` for an overview of the classes.\n- the new readers only really have one method that matters, ``.read()``, and will\n  contain all of the previous logic. It should consistently only produce one\n  particular output type. Other attributes of BaseReader (or FileReader) are\n  one-line overrides and mostly provide information rather than functionality;\n  for instance, Intake uses these for recommending readers for a given data instance.\n- for catalog-producing readers, the output type will be :class:`intake.readers.entry:Catalog`,\n  and the ``.read()`` method will create the Catalog instance and assign readers into it.\n  Module ``intake.readers.catalogs`` contains some patterns to copy.\n- if using file patterns: the ``DaskCSVPattern`` reader will give an idea of how to\n  implement that in the new framework.\n- if using V1 plots, dataframe and xarray-producing readers have the ``ToHvPlot``\n  converter can be used for similar functionality.\n\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/transforms.rst",
    "content": "Dataset Transforms\n------------------\n\naka. derived datasets.\n\n.. warning::\n    experimental feature, the API may change. The data sources in\n    ``intake.source.derived`` are not yet declared as top-level\n    named drivers in the package entrypoints.\n\nIntake allows for the definition of data sources which take as their input\nanother source in the same directory, so that you have the opportunity to\npresent *processing* to the user of the catalog.\n\nThe \"target\" or a derived data source will normally be a string. In the\nsimple case, it is the name of a data source in the same catalog. However,\nwe use the syntax \"catalog:source\" to refer to sources in other catalogs,\nwhere the part before \":\" will be passed to :func:`intake.open_catalog`,\ntogether with any keyword arguments from ``cat_kwargs``.\n\nThis can be done by defining classes which inherit from\n``intake.source.derived.DerivedSource``, or using one of the pre-defined classes\nin the same module, which usually need to be passed a reference to a function\nin a python module. We will demonstrate both.\n\nExample\n```````\nConsider the following *target* dataset, which loads some simple facts\nabout US states from a CSV file. This  example is taken from the Intake\ntest suite.\n\n.. code-block:: yaml\n\n    sources:\n      input_data:\n        description: a local data file\n        driver: csv\n        args:\n          urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n\nWe now show two ways to apply a super-simple transform to this data,\nwhich selects two of the dataframe's columns.\n\n\nClass Example\n~~~~~~~~~~~~~\n\nThe first version uses an approach in which the transform is derived in a\ndata source class, and the parameters passed are specific to the transform type.\nNote that the driver is referred to by it's fully-qualified name in the\nIntake package.\n\n.. code-block:: yaml\n\n      derive_cols:\n        driver: intake.source.derived.Columns\n        args:\n          targets:\n            - input_data\n          columns: [\"state\", \"slug\"]\n\nThe source class for this is included in the Intake codebase, but the important\npart is:\n\n.. code-block:: python\n\n    class Columns(DataFrameTransform):\n        ...\n\n        def pick_columns(self, df):\n            return df[self._params[\"columns\"]]\n\nWe see that this specific class inherits from ``DataFrameTransform``,\nwith ``transform=self.pick_columns``. We know\nthat the inputs and outputs are both dataframes. This allows for some additional validation\nand an automated way to infer the output dataframe's schema that reduces the number of line\nof code required.\n\nThe given method does exactly what you might imagine: it takes and input dataframe and\napplies a column selection to it.\n\nRunning ``cat.derive_cols.read()`` will indeed, as expected, produce a version of the data\nwith only the selected columns included. It does this by defining the original dataset,\nappying the selection, and then getting Dask to generate the output. For some datasets,\nthis can mean that the selection is pushed down to the reader, and the data for the dropped\ncolumns is never loaded. The user may choose to do ``.to_dask()`` instead, and manipulate\nthe lazy dataframe directly, before loading.\n\nFunctional Example\n~~~~~~~~~~~~~~~~~~\n\nThis second version of the same output uses the more generic and flexible\n``intake.source.derived.DataFrameTransform``.\n\n.. code-block:: yaml\n\n    derive_cols_func:\n      driver: intake.source.derived.DataFrameTransform\n      args:\n        targets:\n          - input_data\n        transform: \"intake.source.tests.test_derived._pick_columns\"\n        transform_kwargs:\n          columns: [\"state\", \"slug\"]\n\nIn this case, we pass a reference to a *function* defined in the Intake test suite.\nNormally this would be declared in user modules, where perhaps those declarations\nand catalog(s) are distributed together as a package.\n\n.. code-block:: python\n\n    def _pick_columns(df, columns):\n        return df[columns]\n\nThis is, of course, very similar to the method shown in the previous section,\nand again applies the selection in the given named argument to the input. Note that\nIntake does not support including actual code in your catalog, since we would not\nwant to allow arbitrary execution of code on catalog load, as opposed to execution.\n\nLoading this data source proceeds exactly the same way as the class-based approach,\nabove. Both Dask and in-memory (Pandas, via ``.read()``) methods work as expected.\nThe declaration in YAML, above, is slightly more verbose, but the amount of\ncode is smaller. This demonstrates a tradeoff between flexibility and concision. If\nthere were validation code to add for the arguments or input dataset, it would be\nless obvious where to put these things.\n\nBarebone Example\n~~~~~~~~~~~~~~~~\n\nThe previous two examples both did dataframe to dataframe transforms. However, totally\narbitrary computations are possible. Consider the following:\n\n.. code-block:: yaml\n\n  barebones:\n    driver: intake.source.derived.GenericTransform\n    args:\n      targets:\n        - input_data\n      transform: builtins.len\n      transform_kwargs: {}\n\nThis applies ``len`` to the input dataframe. ``cat.barebones.describe()`` gives\nthe output container type as \"other\", i.e., not specified. The result of ``read()``\non this gives the single number 50, the number of rows in the input data. This class,\nand ``DerivedDataSource`` and included with the intent as superclasses, and probably\nwill not be used directly often.\n\nExecution engine\n````````````````\n\nNone of the above examples specified explicitly where the compute implied by the\ntransformation will take place. However, most Intake drivers support in-memory containers\nand Dask; remembering that the input dataset here is a dataframe. However, the behaviour\nis defined in the driver class itself - so it would be fine to write a driver in which\nwe make different assumptions. Let's suppose, for instance, that the original source\nis to be loaded from ``spark`` (see the ``intake-spark`` package), the driver could\nexplicitly call ``.to_spark`` on the original source, and be assured that it has a\nSpark object to work with. It should, of course, explain in its documentation what\nassumptions are being made and that, presumably, the user is expected to also call\n``.to_spark`` if they wished to directly manipulate the spark object.\n\nPlugin examples\n```````````````\n\n - call `.sel` on xarray datasets `xarray-plugin-transform`_\n\n.. _xarray-plugin-transform: https://github.com/intake/intake-xarray/blob/master/intake_xarray/derived.py#L38\n\nAPI\n```\n\n.. autosummary::\n   intake.source.derived.DerivedSource\n   intake.source.derived.AliasSource\n   intake.source.derived.GenericTransform\n   intake.source.derived.DataFrameTransform\n   intake.source.derived.Columns\n\n.. autoclass:: intake.source.derived.DerivedSource\n   :members: __init__\n.. autoclass:: intake.source.derived.GenericTransform\n   :members: __init__\n.. autoclass:: intake.source.derived.DataFrameTransform\n   :members: __init__\n.. autoclass:: intake.source.derived.Columns\n   :members: __init__\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/use_cases.rst",
    "content": ".. _usecases:\n\nUse Cases - I want to...\n========================\n\nHere follows a list of specific things that people may want to get done, and\ndetails of how Intake can help. The details of how to achieve each of these\nactivities can be found in the rest of the detailed documentation.\n\nAvoid copy&paste of blocks of code for accessing data\n-----------------------------------------------------\n\nThis is a very common pattern, if you want to load some specific data, to\nfind someone, perhaps a colleague, who has accessed it before, and copy that\ncode. Such a practice is extremely error prone, and cause a proliferation of\ncopies of code, which may evolve over time, with various versions simultaneously\nin use.\n\nIntake separates the concerns of data-source specification from code. The\nspecs are stored separately, and all users can reference the one and only\nauthoritative definition, whether in a shared file, a service visible to\neveryone or by using the Intake server. This spec can be updated so that\neveryone gets the current version instead of relying on outdated code.\n\nVersion control data sources\n----------------------------\n\nVersion control (e.g., using ``git``) is an essential practice in modern\nsoftware engineering and data science. It ensures that the change history is\nrecorded, with times, descriptions and authors along with the changes themselves.\n\nWhen data is specified using a well-structured syntax such as YAML, it can\nbe checked into a version controlled repository in the usual fashion. Thus, you\ncan bring rigorous practices to your data as well as your code.\n\nIf using conda packages to distribute data specifications, these come with a\nnatural internal version numbering system, such that users need only do\n``conda update ...`` to get the latest version.\n\n\"Install\" data\n--------------\n\nOften, finding and grabbing data is a major hurdle to productivity. People may be\nrequired to download artifacts from various places or search through storage\nsystems to find the specific thing that they are after. One-line commands which\ncan retrieve data-source specifications or the files themselves can be a massive\ntime-saver. Furthermore, each data-set will typically need its own code to\nbe able to access it, and probably additional software dependencies.\n\nIntake allows you to build ``conda`` packages, which can include catalog files\nreferencing online resources, or to include data files directly in that package.\nWhether uploaded to ``anaconda.org`` or hosted on a private enterprise channel,\ngetting the data becomes a single ``conda install ...`` command, whereafter\nit will appear as an entry in ``intake.cat``. The conda package brings versioning\nand dependency declaration for free, and you can include any code that may be\nrequired for that specific data-set directly in the package too.\n\nUpdate data specifications in-place\n-----------------------------------\n\nIndividual data-sets often may be static, but commonly, the \"best\" data to get a\njob done changes with time as new facts emerge. Conversely, the very same data\nmight be better stored in a different format which is, for instance, better-suited\nto parallel access in the cloud. In such situations, you really don't want to force\nall the data scientists who rely on it to have their code temporarily broken and\nbe forced to change this code.\n\nBy working with a catalog file/service in a fixed shared location, it is possible to\nupdate the data source specs in-place. When users now run their code, they will get\nthe latest version. Because all Intake drivers have the same API, the code using the\ndata will be identical and not need to be changed, even when the format has been\nupdated to something more optimised.\n\nAccess data stored on cloud resources\n-------------------------------------\n\nServices such as AWS S3, GCS and Azure Datalake (or private enterprise variants of\nthese) are increasingly popular locations to amass large amounts of data. Not only\nare they relatively cheap per GB, but they provide long-term resilience, metadata\nservices, complex access control patterns and can have very large data throughput\nwhen accessed in parallel by machines on the same architecture.\n\nIntake comes with integration to cloud-based storage out-of-the box for most of the\nfile-based data formats, to be able to access the data directly in-place and in\nparallel. For the few remaining cases where direct access is not feasible, the\ncaching system in Intake allows for download of files on first use, so that\nall further  access is much faster.\n\nWork with \"Big Data\"\n--------------------\n\nThe era of Big Data is here! The term means different things to different people,\nbut certainly implies that an individual data-set is too large to fit into the\nmemory of a typical workstation computer (>>10GB). Nevertheless, most data-loading\nexamples available use functions in packages such as ``pandas`` and expect to\nbe able to produce in-memory representations of the whole data. This is clearly a\nproblem, and a more general answer should be available aside from \"get more memory\nin your machine\".\n\nIntake integrates with ``Dask`` and ``Spark``, which both offer out-of-core\ncomputation (loading the data in chunks which fit in memory and aggregating result)\nor can spread their work over a cluster of machines, effectively making use of the\nshared memory resources of the whole cluster. Dask integration is built into the\nmajority of the the drivers and exposed with the ``.to_dask()`` method, and Spark\nintegration is available for a small number of drivers with a similar ``.to_spark()``\nmethod, as well as directly with the ``intake-spark`` package.\n\nIntake also integrates with many data services which themselves can perform big-data\ncomputations, only extracting the smaller aggregated data-sets that *do* fit into\nmemory for further analysis. Services such as SQL systems, ``solr``, ``elastic-search``,\n``splunk``, ``accumulo`` and ``hbase`` all can distribute the work required to fulfill\na query across many nodes of a cluster.\n\nFind the right data-set\n-----------------------\n\nBrowsing for the data-set which will solve a particular problem can be hard, even\nwhen the data have been curated and stored in a single, well-structured system. You\ndo *not* want to rely on word-of-mouth to specify which data is right for which job.\n\nIntake catalogs allow for self-description of data-sets, with simple text and\narbitrary metadata, with a consistent access pattern. Not only can you list the data\navailable to you, but you can find out what exactly that data represents, and\nthe form the data would take if loaded (table versus list of items, for example). This extra\nmetadata is also searchable: you can descend through a hierarchy of catalogs with\na single search, and find all the entries containing some particular keywords.\n\nYou can use the Intake GUI to graphically browse through your available data-sets or\npoint to catalogs available to you, look through the entries listed there and get\ninformation about each, or even show a sample of the data or quick-look plots. The GUI\nis also able to execute searches and browse file-systems to find data artifacts of\ninterest. This same functionality is also available via a command-line interface\nor programmatically.\n\nWork remotely\n-------------\n\nInteracting with cloud storage resources is very convenient, but you will not want to\ndownload large amounts of data to your laptop or workstation for analysis. Intake\nfinds itself at home in the remote-execution world of jupyter and Anaconda Enterprise\nand other in-browser technologies. For instance, you can run the Intake GUI either as a\nstand-alone application for browsing data-sets or in a notebook for full analytics,\nand have all the runtime live on a remote machine, or perhaps a cluster which is\nco-located with the data storage. Together with cloud-optimised data formats such\nas parquet, this is an ideal set-up for processing data at web scale.\n\nTransform data to efficient formats for sharing\n-----------------------------------------------\n\nA massive amount of data exists in human-readable formats such as JSON, XML and CSV,\nwhich are not very efficient in terms of space usage and need to be parsed on\nload to turn into arrays or tables. Much faster processing times can be had with\nmodern compact, optimised formats, such as parquet.\n\nIntake has a \"persist\" mechanism to transform any input data-source into the format\nmost appropriate for that type of data, e.g., parquet for tabular data. The persisted\ndata will be used in preference at analysis time, and the schedule for updating from\nthe original source is configurable. The location of these persisted data-sets can\nbe shared with others, so they can also gain the benefits, or the \"export\" variant\ncan be used to produce an independent version in the same format, together with a\nspec to reference it by; you would then share this spec with others.\n\nAccess data without leaking credentials\n---------------------------------------\n\nSecurity is important. Users' identity and authority to view specific data should be\nestablished before handing over any sensitive bytes. It is, unfortunately, all too\ncommon for data scientists to include their username, passwords or other credentials\ndirectly in code, so that it can run automatically, thus presenting a potential\nsecurity gap.\n\nIntake does not manage credentials or user identities directly, but does provide hooks\nfor fetching details from the environment or other service, and using the values in\ntemplating at the time of reading the data. Thus, the details are not included in\nthe code, but every access still requires for them to be present.\n\nIn other cases, you may want to require the user to provide their credentials every\ntime, rather that automatically establish them, and \"user parameters\" can be specified\nin Intake to cover this case.\n\nEstablish a data gateway\n------------------------\n\nThe Intake server protocol allows you fine-grained control over the set of data sources\nthat are listed, and exactly what to return to a user when they want to read some of\nthat data. This is an ideal opportunity to include authorisation checks,\naudit logging, and any more complicated access patterns, as required.\n\nBy streaming the data through a single channel on the server, rather than allowing\nusers direct access to the data storage backend, you can log and verify all access\nto your data.\n\nClear distinction between data curator and analyst roles\n--------------------------------------------------------\n\nIt is desirable to separate out two tasks: the definition of data-source specifications, and\naccessing and using data. This is so that those who understand the origins of the data\nand the implications of various formats and other storage options (such as chunk-size)\nshould make those decisions and encode what they have done into specs. It leaves the\ndata users, e.g., data scientists, free to find and use the data-sets appropriate for\ntheir work and simply get on with their job - without having to learn about various\nstorage formats and access APIs.\n\nThis separation is at the very core of what Intake was designed to do.\n\nUsers to be able to access data without learning every backend API\n------------------------------------------------------------------\n\nData formats and services are a wide mess of many libraries and APIs. A large amount of\ntime can be wasted in the life of a data scientist or engineer in finding out the details\nof the ones required by their work. Intake wraps these various libraries, REST APIs and\nsimilar, to provide a consistent experience for the data user. ``source.read()`` will\nsimply get all of the data into memory in the container type for that source - no\nfurther parameters or knowledge required.\n\nEven for the curator of data catalogs or data driver authors, the framework established by\nIntake provides a lot of convenience and simplification which allows each person to\ndeal with only the specifics of their job.\n\nData sources to be self-describing\n----------------------------------\n\nHaving a bunch of files in some directory is a very common pattern for data storage in the\nwild. There may or may not be a README file co-located giving some information in a\nhuman-readable form, but generally not structured - such files are usually different\nin every case.\n\nWhen a data source is encoded into a catalog, the spec offers a natural place to describe\nwhat that data is, along with the possibility to provide an arbitrary amount of\nstructured metadata and to describe any parameters that are to be exposed for user\nchoice. Furthermore, Intake data sources each have a particular container type, so\nthat users know whether to expect a dataframe, array, etc., and simple introspection\nmethods like ``describe`` and ``discover`` which return basic information about the\ndata without having to load all of it into memory first.\n\nA data source hierarchy for natural structuring\n-----------------------------------------------\n\nUsually, the set of data sources held by an organisation have relationships to one another,\nand would be poorly served to be provided as a simple flat list of everything available.\nIntake allows catalogs to refer to other catalogs. This means, that you can group data\nsources by various facets (type, department, time...) and establish hierarchical\ndata-source trees within which to find the particular data most likely to be of interest.\nSince the catalogs live outside and separate from the data files themselves, as many\nhierarchy structures as thought useful could be created.\n\nFor even more complicated data source meta-structures, it is possible to store all the\ndetails and even metadata in some external service (e.g., traditional SQL tables) with which\nIntake can interact to perform queries and return particular subsets of the\navailable data sources.\n\nExpose several data collections under a single system\n-----------------------------------------------------\n\nThere are already several catalog-like data services in existence in the world, and\nsome organisation may have several of these in-house for various different purposes.\nFor example, an SQL server may hold details of customer lists and transactions, but\nhistorical time-series and reference data may be held separately in archival data formats like\nparquet on a file-storage system; while real-time system monitoring is done by a\ntotally unrelated system such as Splunk or elastic search.\n\nOf course, Intake can read from various file formats and data services. However, it\ncan also interpret the internal conception of data catalogs that some data services may\nhave. For example, all of the tables known to the SQL server, or all of the pre-defined\nqueries in Splunk can be automatically included as catalogs in Intake, and take their\nplace amongst the regular YAML-specified data sources, with exactly the same usage for\nall of them.\n\nThese data sources and their hierarchical structure can then be exposed via the\ngraphical data browser, for searching, selecting and visualising data-sets.\n\nModern visualisations for all data-sets\n---------------------------------------\n\nIntake is integrated with the comprehensive ``holoviz`` suite, particularly ``hvplot``, to\nbring simple yet powerful data visualisations to any Intake data source by using just one\nsingle method for everything. These plots are interactive, and can include server-side\ndynamic aggregation of very large data-sets to display more data points than the\nbrowser can handle.\n\nYou can specify specific plot types right in the data source definition, to have these\ncustomised visualisations available to the user as simple one-liners known to\nreveal the content of the data, or even view the same visuals right in the graphical\ndata source browser application. Thus, Intake is already an all-in-one data investigation\nand dashboarding app.\n\n\nUpdate data specifications in real time\n---------------------------------------\n\nIntake data catalogs are not limited to reading static specification from\nfiles. They can also execute queries on remote data services and return lists of\ndata sources dynamically at runtime. New data sources may appear, for example,\nas directories of data files are pushed to a storage service, or new tables are\ncreated within a SQL server.\n\nDistribute data in a custom format\n----------------------------------\n\nSometimes, the well-known data formats are just not right for a given data-set,\nand a custom-built format is required. In such cases, the code to read the data\nmay not exist in any library. Intake allows for code to be distributed along\nwith data source specs/catalogs or even files in a single ``conda`` package.\nThat encapsulates everything needed to describe and use that particular data,\nand can then be distributed as a single entity, and installed with a one-liner.\n\nFurthermore, should the few builtin container types (sequence, array, dataframe)\nnot be sufficient, you can supply your own, and then build drivers that use it.\nThis was done, for example, for ``xarray``-type data, where multiple related\nN-D arrays share a coordinate system and metadata. By creating this container,\na whole world of scientific and engineering data was opened up to Intake. Creating\nnew containers is not hard, though, and we foresee more coming, such as\nmachine-learning models and streaming/real-time data.\n\nCreate Intake data-sets from scratch\n------------------------------------\n\nIf you have a set of files or a data service which you wish to make into a data-set,\nso that you can include it in a catalog, you should use the set of functions\n``intake.open_*``, where you need to pick the function appropriate for your\nparticular data. You can use tab-completion to list the set of data drivers you have\ninstalled, and find others you may not yet have installed at :ref:`plugin-directory`.\nOnce you have determined the right set of parameters to load the data in the manner\nyou wish, you can use the source's ``.yaml()`` method to find the spec that describes\nthe source, so you can insert it into a catalog (with appropriate description and\nmetadata). Alternatively, you can open a YAML file as a catalog with ``intake.open_catalog``\nand use its ``.add()`` method to insert the source into the corresponding file.\n\n.. raw:: html\n\n    <script data-goatcounter=\"https://intake.goatcounter.com/count\"\n        async src=\"//gc.zgo.at/count.js\"></script>\n"
  },
  {
    "path": "docs/source/user2.rst",
    "content": ".. catalog_user:\n\nCatalog User\n============\n\nSo someone has sent you an Intake URL or other way to load a catalog. What happens next?\nLet's do the simplest thing and load a public catalog\n\n.. code-block:: python\n\n    cat = intake.from_yaml_file(\"s3://mymdtemp/intake_1.yaml\", anon=True)\n\n(this is the same catalog as is made in the example `tutorial noteboook`_)\n\n.. _tutorial noteboook: https://github.com/intake/intake/blob/master/examples/Take2.ipynb\n\nDisplaying the catalog shows that it has four named datasets and some automatically\npopulated \"user parameters\".\n\nInteresting attributes of the catalog are:\n\n- ``cat.data``: full description of the original datasets\n\n.. code-block:: python\n\n    {'7e0b327a50eef58d': DataDescription type intake.readers.datatypes:CSV\n      kwargs {'metadata': {}, 'storage_options': {'anon': True}, 'url': '{CATALOG_DIR}/intake_1.csv'}}\n\n- ``cat.entries``: readers, ways to load the data\n\n.. code-block:: python\n\n    {'capitals': Entry for reader: intake.readers.convert:Pipeline\n      kwargs: {'out_instances': ['pandas:DataFrame', 'pandas:DataFrame', 'pandas:DataFrame', 'pandas:DataFrame'],\n               'steps': [\n                    ['{data(tute)}', [], {}],\n                    ['{func(intake.readers.transform:Method)}', [], {'method_name': 'a'}],\n                    ['{func(intake.readers.transform:Method)}', [], {'method_name': 'str'}],\n                    ['{func(intake.readers.transform:Method)}', [], {'method_name': 'capitalize'}]]}\n      producing: pandas:DataFrame,\n     'inverted': Entry for reader: intake.readers.convert:Pipeline\n      kwargs: {'out_instances': ['pandas:DataFrame', 'pandas:DataFrame'],\n               'steps': [\n                    ['{data(tute)}', [], {}],\n                    ['{func(intake.readers.transform:Method)}', [], {'args': ['b'], 'ascending': False, 'method_name': 'sort_values'}]]}\n      producing: pandas:DataFrame,\n     'multi': Entry for reader: intake.readers.convert:Pipeline\n      kwargs: {'out_instances': ['pandas:DataFrame', 'pandas:DataFrame'],\n               'steps': [\n                    ['{data(tute)}', [], {}],\n                    ['{func(intake.readers.transform:Method)}', [], {'c': '{data(capitals)}', 'method_name': 'assign'}]]}\n      producing: pandas:DataFrame,\n     'tute': Entry for reader: intake.readers.readers:PandasCSV\n      kwargs: {'data': '{data(7e0b327a50eef58d)}'}\n      producing: pandas:DataFrame}\n\n- ``cat.aliases``: names to associate with readers or data (these are the ones used with tab-completion)\n\n.. code-block:: python\n\n    {'capitals': 'capitals',\n     'inverted': 'inverted',\n     'multi': 'multi',\n     'tute': 'tute'}\n\n- ``cat.user_parameters``: values that can be used in templated values (see below)\n\n.. code-block:: python\n\n    {'CATALOG_PATH': 's3://mymdtemp/intake_1.yaml',\n     'CATALOG_DIR': 's3://mymdtemp',\n     'STORAGE_OPTIONS': {'anon': True}}\n\n\nYou can even get an overall view of everything in the catalog using ``cat.to_dict()``, which gives you\nback essentially the same information as was contained in the YAML file we read the catalog from.\nAlso notice, that there is metadata associated with the whole catalog, and each of the\ndata and reader descriptions. All the readers depend on the one dataset (\"multi\" depends on\nit twice) and all are Pipelines (with \"steps\") except \"tute\".\n\nKey Concepts\n------------\n\nA few definitions that will help you:\n\n- data: a set of numbers of various forms, which can be used to infer information\n  about some domain\n\n- dataset: a specific delimited amount of data, often a single file, a directory of\n  files or a single request or query to some service. The output of any Intake reader\n  is also a \"dataset\", as represented in a live python session\n\n- data: the basic information needed to uniquely identify a dataset, such\n  as data type, URL/paths, server location, query. Intake supports many data types (:ref:`data`).\n  A description ought to also contain descriptive information in its metadata.\n\n- reader: how a given dataset should be handled/loaded. This is more specific than the data\n  itself, since there may be many different ways to read the data. For instance, CSVs are a\n  very common and simple data format, and virtually all (table-oriented) data packages can read them.\n\n- pipeline: a sequence of operations on a dataset. In Intake, this is just a type of reader, although\n  it is possible to refer to the output of any particular stage.\n\n- catalog: a collection of datasets and their reader descriptions. Each dataset may be referred to by\n  multiple readers, and a reader may refer to multiple datasets, although the latter is less common\n  (think of JOIN operations).\n\n- templates, user-parameters: in the catalog definition of the one dataset, you will notice special\n  syntax for part of the URL value to be filled in. See below for how to work with this.\n\nReader API\n----------\n\nBefore accessing any of the entries in a catalog, you should introspect them to see if it is\nwhat you are after. There should be descriptive text, other metadata and of course the contents\nof the data/readers, as shown above. It is important to note, that extracting readers (the next\nstep) already comes with security implications, such as evaluating environment variables and\nmaking imports. The \"allow_*\" keys in the intake configuration, ``intake.conf`` define what\nis generally allowed.\n\nAs an end-user, you will generally interact with readers. Get them from the catalog by attribute\naccess or item access; the latter is required where the name is not a valid python identifier\nor conflicts with a method. The following two line are exactly equivalent:\n\n.. code-block::\n\n    reader = cat.tute\n    reader = cat[\"tute\"]\n    reader.pprint()\n\n    {'kwargs': {'data': {'url': 's3://mymdtemp/intake_1.csv',\n                     'storage_options': {'anon': True},\n                     'metadata': {}}},\n     'metadata': {},\n     'output_instance': 'pandas:DataFrame'}\n\n\n.. note::\n\n    We will work on the best way to represent the various instances, especially in the notebook.\n    For the time being, you can always use the ``.pprint()`` method, or introspect the\n    instance's attributes.\n\nWe notice that this is NOT exactly the same as the entry in the original catalog with name\n\"tute\". In particular: the reader is a concrete instance of a subclass of\n:class:`intake.readers.readers.BaseReader`,\nit contains the data definition it referenced and the URL of which has been expanded to the\nfull \"s3://..\".\n\nThe most obvious thing to do to a reader is read: this is, after all, what they are for. We\nalready know to expect an output type a pandas DataFrame. ``reader.doc()`` provides the\ndocstring of the target function, in this case ``read_csv()``. You can\npass extra or override arguments, with exactly the same names and meaning as the\noriginal docstring (some readers might provide extra functionality or possibilities).\n\n.. code-block:: python\n\n    reader.read()\n\n       Unnamed: 0   a  b\n    0           0  ho  4\n    1           1  hi  5\n\n    reader(index_col=[0]).read()\n\n        a  b\n    0  ho  4\n    1  hi  5\n\nFor large datasets, you may try ``.discover()`` instead, which is generally a small subset\nof the data, depending on the format and library. For small datasets like this one, you get\nexactly the same output.\n\nTemplates\n---------\n\nReturning to the mysterious \"s3://\" URL in the reader instance above. This was created from\nthe URL \"{CATALOG_DIR}/intake_1.csv\" using templating. You may recall that the catalog had\na user_parameter of this name, whose value was auto-populated from the URL we used to read\nthe catalog file. This means that the data file and catalog describing it could be moved\ntogether to a new location without having to edit the catalog. On the other hand, if the\nURL were not templated, moving/copying the catalog would still refer to the original\ndata location (sometimes this is what you want).\n\nThis particular user_parameter was global to the catalog, and to assign a new value before\ntemplating, you would do\n\n.. code-block:: python\n\n    cat2 = cat(CATALOG_DIR=\"new_value\")\n\n(so ``cat2.tute`` would not have a different data URL and no longer load!). It is also\npossible to have parameters associated with the data description and/or specific readers,\nand for any parameter to be used in multiple places. They can also have specific types,\ndefaults and constraints/choices. If a template refers to a parameter that is missing\nor has no value set, it will be left unchanged, and the data in question will probably not load.\n"
  },
  {
    "path": "docs/source/walkthrough2.rst",
    "content": "Creator Walkthrough\n===================\n\nAs soon as you have used a catalog, you may wonder how to create the - look no further.\nEVen if you don't make catalogs, you may find the data and reader class recommenders useful.\n\nPreamble\n--------\n\nIntake is a bunch of _readers_. A reader is a class that encodes how to load some\nparticular data, including all the arguments that will be passed to some third-party\npackage. Thus, you can encode anything from the simplest ``pd.read_csv`` call to\nfar more complex things. You can also act on readers, to record a set of\noperations known as a Pipeline. A Pipeline is just a special kind of reader.\n\nCatalogs contain readers and data definitions. Readers can and should be saved in Catalogs,\nwhich can in the simplest case be saved as text files, but can also be generated from\nmany data services. Catalogs and readers are the central themes of intake.\n\n.. note::\n\n    \"Load\" means make a machine representations: many data containers may be lazy,\n    where you get an object on which you can act, but the data is only read on\n    demand for actions that require it.\n\n\nSimple Example\n--------------\n\nLet's consider a typical Pandas line to read a CSV file. Although there are many ways to\nload data in python, this is one of the most common.\n\n.. code-block:: python\n\n    import pandas as pd\n    url = \"s3://mymdtemp/intake_1.csv\"\n    df = pd.read_csv(url, storage_options={\"anon\": True}, usecols=[1, 2])\n\nSimple enough, so long as you have ``fsspec`` and ``s3fs`` installed. Note the use of extra\noptions to the storage backend (this file is public) and up-front column selection.\n\nTo encode this with intake, you can do\n\n.. code-block:: python\n\n    import intake\n    reader = intake.reader_from_call(\n        'df = pd.read_csv(url, storage_options={\"anon\": True}, usecols=[1, 2])')\n\nWhere you could have used ``_i``, the IPython shorthand for \"the last line of input\" instead of\nthe copy/pasted line. The \"reader\" object encodes all the things we asked for, and we\ncan execute the process by calling ``reader.read()`` to get the same result as before. So\nwhat's the point?\n\nWe can put this reader into a catalog:\n\n.. code-block:: python\n\n    cat = intake.entry.Catalog()\n    cat[\"tute\"] = reader\n    cat.to_yaml_file(\"intake_1.yaml\")\n\nwhere the path could be anyplace you can write to, such as a local file. This can be the\nwhole of the \"catalog producer\" workflow.\n\nA public version has been put alongside the CSV, so as the data consumer, you can read this\nwherever you are. The following could be the whole of a data consumer workflow:\n\n.. code-block:: python\n\n    import intake\n    cat = intake.from_yaml_file(\"s3://mymdtemp/intake_1.yaml\", anon=True)\n    cat.tute.read()\n\nNote that ``cat.tute`` has a lot of optional metadata that we have not filled out. It\nalso provides documentation of the function it will call at read time (``cat.tute.doc()``),\nin case you want to add or change arguments.\nThe point is not to know every time which specific dataset you need (although that happens too),\nbut a way to view all of your available data and find the right one for your job before loading it.\n\nSimilarly, we could have started with the URL alone\n\n.. code-block:: python\n\n    intake.datatypes.recommend(\"s3://mymdtemp/intake_1.csv\", storage_options={\"anon\": True})\n\nWhich says that this URL likely refers to a CSV file. In this case, that is not surprising,\nbut in many cases, it can be useful to have Intake guess the file type for us.\n\n\n**Summary**: In this section we saw one way to easily create a data loading spec (\"reader\"), save it\ninto a catalog and then load this data using the catalog. This is data distribution\nwithout any server.\n\nSlightly less Simple\n--------------------\n\nLet's do trivial transforms to our trivial dataset. Continuing from above:\n\n.. code-block:: python\n\n    cat[\"capitals\"] = reader.a.str.capitalize()\n    cat[\"inverted\"] = reader.sort_values(\"b\", ascending=False)\n    cat.to_yaml_file(\"intake_1.yaml\")\n\nAgain, this could be persisted anywhere, but the path above includes all three datasets:\n\n.. code-block:: python\n\n    import intake\n    cat = intake.from_yaml_file(\"s3://mymdtemp/intake_1.yaml\", anon=True)\n    list(cat)  # -> ['capitals', 'inverted', 'tute']\n\nNow we have three datasets all based off the same original file. Investigating ``cat.data``,\nyou can see that there is exactly one definition: a CSV with the URL as defined above,\nand investigating ``cat.entries``, you can see how the three things reference it and\none-another. We can also make readers that depend on multiple data sources:\n\n.. code-block:: python\n\n    cat[\"multi\"] = cat.tute.assign(c=cat.capitals)\n\n(Since we only have one base dataset, this depends on itself, but there is still only\none value in ``cat.data``, the original CSV)\n\n.. note::\n\n    We made derived datasets by knowing and calling the pandas API explicitly. These methods\n    are available by tab-completion (in a jupyter session, for example). Or one might\n    interrogate the ``.transform`` attribute of any reader to know which methods are\n    defined in Intake, or expected to be available for the given type (a DataFrame in\n    this case).\n\n**Summary** : here we showed that you can create processing pipelines from datasets and save\nthese derived datasets as new definitions. The syntax is the same as whatever package you\nintend to do the actual processing.\n\nMultiple Readers\n----------------\n\nIn the previous examples, we knew we were starting with a CSV and wanted to make\npipelines using pandas. Often, life is not that simple! It can take work to figure\nout where to look for data, and then what type that data is and which engine is\nbest suited to work with it. In fact, the answer to that might depend on a number\nof factors and not be the same for all users on all days. So, Intake allows\nyou to make multiple pipelines from the same data with different engines and\nleave the choice of which to use to runtime. It will also help you find\npathways to transform your data between frameworks to get to your desired outcome.\n\nThere is only one data entity in our example mini-catalog, so let's grab it:\n\n.. code-block:: python\n\n    cat = intake.from_yaml_file(\"s3://mymdtemp/intake_1.yaml\", anon=True)\n    key = list(cat.data)[0]\n    data = cat[key]\n\n.. note::\n\n    The key of this in ``cat.data`` is a hashed hex string. We could have given this\n    data entry a name, but it was auto-added to the catalog when assing the \"tute\"\n    reader.\n\nThis is a ``CSV`` data instance, a subclass of ``intake.readers.datatypes.BaseData``.\nIt can be read with multiple readers, since CSVs are so common. ``data.possible_readers``\nlists the classes that can read this grouped by whether they can be imported, and\n``data.possible_outputs`` gives the expected output instance from each importable\nreader. In both case, more specific readers (for CSVs) come before the less specific\nones (that read any file). We could instantiate these readers directly, or call\nthe ``.to_reader`` method to pick one. The following two are equivalent, and\npick dask.dataframe as the backend to read the data (assuming it is installed).\n\n.. code-block:: python\n\n    intake.readers.readers.DaskCSV(data)\n    data.to_reader(\"dask\")  # this searches the outputs for something matching \"dask\"\n\nBoth lines produce a DaskCSV reader, which we can also put in a catalog, or just\n``.read()`` it to get a ``dask.dataframe.DataFrame`` output. We could have done the\nsame for Ray, Spark or DuckDB (and more to come, like cuDF, Modin, Polars, and\nthat's just for dataframes).\n\n**Summary**: a dataset can be read with multiple reader types, and we can save prescriptions\nin a catalog for each, with various sets of parameters is desired, but still have\nthe ability to go back to the data definition and pick something else.\n\nConversions\n-----------\n\nIntake can handle many filetypes (see ``intake.datatypes``) and many readers with\nvarious engines/packages (see ``intake.readers.readers``). These are, by design,\nsimple to write, and many more will be coming to fill out the data loading space.\nHowever, you may well find yourself needing to convert from one representation to\nanother. For example, you may wish to use DuckDB for efficient querying of remote\nParquet files, but then want the output as a Pandas dataframe for building an ML\nmodel. You can, of course, just save the DuckDB reader with the query, and convert\nto Pandas in every session, but wouldn't it be nice to encode that you mean to\ndo this conversion = and then you can chain further operations with the Pandas API,\nif you wish.\n\nIntake supports the following web of conversions, and more coming all the time.\nIt's too complex to see the details!\n\n.. image::  ./_static/images/out.png\n   :alt: Conversions Web\n\nFinding the specific conversion search might look something like\n\n.. code-block:: python\n\n    data = intake.datatypes.Parquet(\"my.parq\")\n    reader = data.to_reader(\"Duck\")\n    reader.transform\n\nWhere from the repr we see that a converter ``DuckToPandas`` exists and does what you expect.\n\nOr, you can often find a whole chain of conversions to get to where you want to go; starting\nwith either a URL (and any storage_options), data instance or reader instance. For the\nreader above, let's say we want to get an image (PNG) representation:\n\n.. code-block:: python\n\n    intake.auto_pipeline(reader, outtype=\"PNG\")\n\nproduces a pipeline going DuckDB->Pandas->matplotlib Figure->PNG file. If you try to run it\nit will fail, saying that the output node (the last one) needs a URL. So you would actually\ndo the following:\n\n.. code-block:: python\n\n    output = intake.auto_pipeline(reader, outtype=\"PNG\")\n    output(url=\"out.png\").read()\n\nWhere obviously you could make extra arguments to the ToMatplotlib stage to customise the\ngraph. Any arguements or any node in the pipeline can be changed before running or\npersisting in a catalog, and any pipeline from a catalog can have its arguments overridden\nat runtime, if desired.\nThe final PNG file would be a nice addition to the metadata of the original data prescription,\nsee ``intake.readers.metadata_fields`` for suggested field names and descriptions.\n\n.. note::\n\n    There are many classes derived from ``BaseConverter``, and by convention those we call\n    \"converters\" keep the same data but in different representations, \"transforms\" change the data\n    but keep the same representations, and \"output\" produce side-effects and return a\n    BaseData instance. These rules are loose and may be violated.\n\n**Summary**: Intake handles not only many data types and compute engines, but knows how to\nconvert between them, providing some handy utilities for guessing the best pipeline to\nget to a given output.\n\nComplex Example\n---------------\n\nThe following code performs a rather typical workflow, recreating the \"persist\" functionality\nin V1 (using only standard blocks, no special code). This is somewhat verbose and explicit, for\nthe sake of clarity.\n\n.. note::\n\n    Saving local copies of files is more common, and can usually be achieved by adding\n    \"simplecache::\" (and a defined local directory) to the URL for readers that use `fsspec``.\n    However, some data sources are not files, and of course it is often a good idea to\n    save a more efficient format of the data, as in the case here.\n\n.. code-block:: python\n\n    from intake.readers.readers import Condition, PandasCSV, PandasParquet, FileExistsReader\n\n    fn = f\"{tmpdir}/file.parquet\"\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    part = PandasCSV(data)\n\n    output = part.PandasToParquet(url=fn).transform(PandasParquet)\n    data2 = intake.readers.datatypes.Parquet(url=fn)\n    cached = PandasParquet(data=data2)\n    reader2 = Condition(cached, if_false=output, condition=FileExistsReader(data2))\n\nThe pipeline can be described as:\n- there is a CSV file, ``dataframe_file``\n- there may be a parquet version of this, ``fn``\n- if the parquet file does not exist, load the CSV using pandas, save it to parquet and load that\n- if the parquet file already exists, load that without looking at the CSV.\n\nThere are of course many ways that one might achieve this and more complex \"conditions\" for when\nto run the conversion pipeline. However, the ``reader2`` object encodes the whole thing, and can\nbe safely stored in a catalog. A user can then use this standard condition, choose to remake the\nparquet, or just load the CSV without accessing the parquet at all. It would be reasonable to\nupdate the metadata of ``data`` or the readers to show the expected columns types and row count\n(if they are not expected to change).\n\n**Summary**: you can branch and join pipelines and save the whole complicated tree in catalogs,\nallowing complex patterns like conditional caching.\n\nExtracting User Parameters\n--------------------------\n\nTo come\n"
  },
  {
    "path": "examples/Take2.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"f5f6d518-dfe8-4651-b315-44bb55b885be\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import intake\\n\",\n    \"import pandas as pd\\n\",\n    \"import hvplot.pandas\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"718dfddd-14eb-4a4e-ad9c-19f0c9f10f3c\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Guess reader from existing code\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"1eb57314-f056-465e-ba75-22d5d0ca232b\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"url = \\\"s3://mymdtemp/intake_1.csv\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"31dde28e-718d-462e-b9bf-30601db948ca\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"df = pd.read_csv(url, storage_options={\\\"anon\\\": True}, usecols=[1, 2])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"1daac4ed-e5af-4a63-8a12-d97022e90d99\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"reader = intake.reader_from_call(_i)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"bda407ee-f26c-4788-b367-1fc3f370ccbb\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"reader\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"3acf782f-b887-4da7-a8b3-78463c52c8eb\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"reader.kwargs\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"c718b8bf-0489-4e29-b31c-b354f6373224\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Or guess from the URL alone\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"9900e6f4-b896-40ad-bbaf-cb2563ae8252\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# uses URL alone, but can also match on magic bytes\\n\",\n    \"intake.datatypes.recommend(url)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"771538c7-1467-4b1c-82ef-a9ace372465c\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data = intake.readers.datatypes.CSV(url, storage_options={\\\"anon\\\": True})\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"4d9bd954-7c31-4638-80bf-ffc4cc7fb452\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### \\\"What can read this?\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"4dac3e18-f60a-4b91-946b-36d766a73bfe\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data.possible_outputs\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"654e74d0-aa6a-4211-a04d-4fc6886c58df\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data.possible_readers\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"4ddebb3c-745d-4294-9dee-583214075df7\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# same reader as original\\n\",\n    \"# reader = data.to_reader(\\\"pandas:DataFrame\\\")\\n\",\n    \"reader = intake.readers.readers.PandasCSV(data)\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"5e7ae6c4-6db4-4c42-b076-ea4225ba08df\",\n   \"metadata\": {},\n   \"source\": [\n    \"```python\\n\",\n    \"class PandasCSV(Pandas):\\n\",\n    \"    implements = {datatypes.CSV}\\n\",\n    \"    func = \\\"pandas:read_csv\\\"\\n\",\n    \"    url_arg = \\\"filepath_or_buffer\\\"\\n\",\n    \"\\n\",\n    \"    def discover(self, **kw):\\n\",\n    \"        kw[\\\"nrows\\\"] = 10\\n\",\n    \"        kw.pop(\\\"skipfooter\\\", None)\\n\",\n    \"        kw.pop(\\\"chunksize\\\", None)\\n\",\n    \"        return self.read(**kw)\\n\",\n    \"```\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"dc10228a-ae60-4551-a2a0-81e399d51130\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### Reader API\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"d4d9651d-ef91-4611-b5ae-ee3d762e7196\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"reader.read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"da8e7b50-f7f5-4520-ae44-3c2e5d6a91ad\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"print(reader.doc())\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"98112835-41a3-48b5-bc92-7e923c263c38\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# known transforms and what they make\\n\",\n    \"reader.transform\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"f9d0ff3e-1723-4b8a-b549-7ad703c56cfa\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# but have access to full DataFrame API\\n\",\n    \"dir(reader)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"4d1fbfc6-70b9-4d84-80f3-fca293d8860f\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# or \\\"pd\\\" namespace (useful for some packages)\\n\",\n    \"reader.pd\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"08249340-d143-4463-b534-6b6ef83f6f79\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### So lets make a catalog and a pipeline using pandas syntax\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"843a8a67-5ac8-4128-b8fc-5a897d344578\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat = intake.entry.Catalog()\\n\",\n    \"cat[\\\"tute\\\"] = reader\\n\",\n    \"cat[\\\"capitals\\\"] = reader.a.str.capitalize()\\n\",\n    \"cat[\\\"inverted\\\"] = reader.sort_values(\\\"b\\\", ascending=False)\\n\",\n    \"cat[\\\"multi\\\"] = cat.tute.assign(c=cat.capitals)  # <- uses multiple readers\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"7ccf12af-dd3b-429f-be8b-a7fcc749c7a1\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"reader.a.str.capitalize()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"b1eb4728-eda8-467d-a4f0-be280309cc0f\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# what gets stored in the catalog entry?\\n\",\n    \"cat.entries[\\\"multi\\\"].kwargs\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"9393b669-9489-41fb-ad2f-c98970410770\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"a8a123f6-17cd-404b-acd4-aef832a6e544\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat.tute.read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"18d0bb78-d902-4689-9586-0ebc1686af32\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat.data  # just one data item\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"4c2a6379-1bb6-4258-996c-823ffc928eb9\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### To and from catalog file, which you can put anywhere\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"ca865c7e-a271-4ec9-a44d-efbd68138da0\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat.to_yaml_file(\\\"intake_1.yaml\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"7b9df0f9-a86a-4b73-9a1f-e355ab5be230\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# a \\\"shared\\\" one I prepared for everyone\\n\",\n    \"cat = intake.from_yaml_file(\\\"s3://mymdtemp/intake_1.yaml\\\", anon=True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"529a98be-c500-42c4-8b8e-fc8d535d60f4\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# yes, you have <tab> completion\\n\",\n    \"cat.tute\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"24e83c33-3ba7-4012-ac08-ac3b40521c42\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat.inverted.read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"e3b35a6f-8583-421a-8321-65a62bcf0777\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### And now you can go about your work; but some convenience functions might still be useful.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"a27fec42-c46a-442a-9574-83b4d312223f\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# add arguments to make a reader you can persist\\n\",\n    \"cat.inverted.ToHvPlot(explorer=True).read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"c7b89802-4b83-4587-9103-416ae6423100\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"cat.inverted.ToMatplotlib.read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"68c24625-9aab-405a-b2bd-8b6a7a43bb8d\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# you can even have Intake guess the whole pipeline\\n\",\n    \"intake.auto_pipeline(data, \\\"PNG\\\", avoid=\\\"Geo\\\")\"\n   ]\n  },\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"50504f5b-622c-43f7-ae68-5e71730bc2ab\",\n   \"metadata\": {},\n   \"source\": [\n    \"#### But pandas was not the only engine that can work on this data. We can play with the API or make more readers to persist in the catalog.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"c684789c-0038-4f12-8668-640f3f5553c8\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"31a9d94c-5025-4de2-99f8-eb966afa26e4\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data.to_reader(\\\"dask\\\").read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"7a4a9622-0b1f-4bab-887e-0e64e4a5098e\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"data.to_reader(\\\"ray\\\").read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"d481550f-4bda-4864-9b29-c163301b1f36\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# dask-on-ray!\\n\",\n    \"data.to_reader(\\\"dask\\\").DaskToRay.read()\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": null,\n   \"id\": \"d00b8b13-6478-4691-b5d8-b76cde9cecd5\",\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": []\n  }\n ],\n \"metadata\": {\n  \"language_info\": {\n   \"name\": \"python\",\n   \"pygments_lexer\": \"ipython3\"\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 5\n}\n"
  },
  {
    "path": "intake/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\n\ntry:\n    from intake._version import __version__\nexcept ImportError:\n    __version__ = \"2.dev\"  # fallback\n\n# legacy immediate imports\nfrom intake.utils import import_name, logger\nfrom intake.catalog.base import VersionError\nfrom intake.source import registry\nfrom intake.config import conf\nfrom intake.readers import (\n    BaseData,\n    reader_from_call,\n    recommend,\n    BaseReader,\n    BaseConverter,\n    Pipeline,\n    auto_pipeline,\n    path,\n    DataDescription,\n    ReaderDescription,\n    BaseUserParameter,\n    SimpleUserParameter,\n    user_parameters,\n    transform,\n    output,\n    catalogs,\n    entry,\n    datatypes,\n)\nfrom intake.readers.entry import Catalog\nimport intake.readers.importlist  # do this last, as it triggers more imports needing intake\n\n# legacy on-demand imports\nimports = {\n    \"DataSource\": \"intake.source.base:DataSource\",\n    \"Schema\": \"intake.source.base:Schema\",\n    \"load_combo_catalog\": \"intake.catalog.default:load_combo_catalog\",\n    \"gui\": \"intake.interface:instance\",\n    \"interface\": \"intake.interface\",\n    \"cat\": \"intake.catalog:builtin\",\n    \"output_notebook\": \"intake.interface:output_notebook\",\n    \"register_driver\": \"intake.source:register_driver\",\n    \"unregister_driver\": \"intake.source:unregister_driver\",\n}\nfrom_yaml_file = entry.Catalog.from_yaml_file\n\n\ndef __getattr__(attr):\n    \"\"\"Lazy attribute propagator\n\n    Defers inputs of functions until they are needed, according to the\n    contents of the ``imports`` (submodules and classes) and ``openers``\n    (functions which instantiate data sources directly)\n    dicts. All keys in ``openers``\n    must start with \"open_\", else they will be ignored.\n    \"\"\"\n    gl = globals()\n\n    if attr == \"__all__\":\n        return __dir__()\n\n    if attr in gl:\n        return gl[attr]\n\n    if attr in imports:\n        dest = imports[attr]\n        gl[attr] = import_name(dest)\n        return gl[attr]\n\n    if attr[:5] == \"open_\":\n        if attr[5:] in registry.drivers.enabled_plugins():\n            driver = registry[attr[5:]]  # \"open_...\"\n            return driver\n        else:\n            registered_methods = [f\"open_{driver}\" for driver in registry.drivers.enabled_plugins()]\n            raise AttributeError(\n                f\"Unknown open method '{attr}'. \"\n                \"Do you need to install a new driver from the plugin directory? \"\n                \"https://intake.readthedocs.io/en/latest/plugin-directory.html\\n\"\n                f\"Registered opener methods: {registered_methods}\"\n            )\n\n    raise AttributeError(attr)\n\n\ndef __dir__(*_, **__):\n    openers = [\"open_\" + name for name in registry.drivers.enabled_plugins()]\n    return sorted(list(globals()) + list(imports) + openers)\n\n\ndef open_catalog(uri=None, **kwargs):\n    \"\"\"Create a Catalog object\n\n    *New in V2*: if the URL is a single file, and loading it as a V1 catalog fails because of\n    the stated version, it will be opened again as a V2 catalog. This will mean reading\n    the file twice, so calling ``from_yaml_file`` directly ie better.\n\n    Can load YAML catalog files, connect to an intake server, or create any\n    arbitrary Catalog subclass instance. In the general case, the user should\n    supply ``driver=`` with a value from the plugins registry which has a\n    container type of catalog. File locations can generally be remote, if\n    specifying a URL protocol.\n\n    The default behaviour if not specifying the driver is as follows:\n\n    - if ``uri`` is a single string ending in \"yml\" or \"yaml\", open it as a\n      catalog file\n    - if ``uri`` is a list of strings, a string containing a glob character\n      (\"*\") or a string not ending in \"y(a)ml\", open as a set of catalog\n      files. In the latter case, assume it is a directory.\n    - if ``uri`` begins with protocol ``\"intake:\"``, connect to a remote\n      Intake server\n    - if ``uri`` is ``None`` or missing, create a base Catalog object without entries.\n\n    Parameters\n    ----------\n    uri: str or pathlib.Path\n        Designator for the location of the catalog.\n    kwargs:\n        passed to subclass instance, see documentation of the individual\n        catalog classes. For example, ``yaml_files_cat`` (when specifying\n        multiple uris or a glob string) takes the additional\n        parameter ``flatten=True|False``, specifying whether all data sources\n        are merged in a single namespace, or each file becomes\n        a sub-catalog.\n\n    See also\n    --------\n    intake.open_yaml_files_cat, intake.open_yaml_file_cat,\n    intake.open_intake_remote\n    \"\"\"\n    driver = kwargs.pop(\"driver\", None)\n    if isinstance(uri, os.PathLike):\n        uri = os.fspath(uri)\n    if driver is None:\n        if uri:\n            if (isinstance(uri, str) and \"*\" in uri) or (\n                (isinstance(uri, (list, tuple))) and len(uri) > 1\n            ):\n                # glob string or list of files/globs\n                driver = \"yaml_files_cat\"\n            elif isinstance(uri, (list, tuple)) and len(uri) == 1:\n                uri = uri[0]\n                if \"*\" in uri[0]:\n                    # single glob string in a list\n                    driver = \"yaml_files_cat\"\n                else:\n                    # single filename in a list\n                    driver = \"yaml_file_cat\"\n            elif isinstance(uri, str):\n                # single URL\n                if uri.startswith(\"intake:\"):\n                    # server\n                    driver = \"intake_remote\"\n                else:\n                    if uri.endswith((\".yml\", \".yaml\")):\n                        driver = \"yaml_file_cat\"\n                    else:\n                        uri = uri.rstrip(\"/\") + \"/*.y*ml\"\n                        driver = \"yaml_files_cat\"\n            else:\n                raise ValueError(\"URI not understood: %s\" % uri)\n        else:\n            # empty cat\n            driver = \"catalog\"\n    if \"_file\" not in driver:\n        kwargs.pop(\"fs\", None)\n    if driver not in registry:\n        raise ValueError(\n            f\"Unknown catalog driver '{driver}'. \"\n            \"Do you need to install a new driver from the plugin directory? \"\n            \"https://intake.readthedocs.io/en/latest/plugin-directory.html\\n\"\n            f\"Current registry: {list(sorted(registry))}\"\n        )\n    try:\n        return registry[driver](uri, **kwargs)\n    except VersionError:\n        # warn that we are switching to V2? The file will be read twice\n        return from_yaml_file(uri, **kwargs)\n"
  },
  {
    "path": "intake/catalog/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom .base import Catalog\nfrom .default import load_combo_catalog\nfrom .local import EntrypointsCatalog, MergedCatalog\n\n\ndef _make_builtin():\n    return MergedCatalog(\n        [EntrypointsCatalog(), load_combo_catalog()],\n        name=\"builtin\",\n        description=\"Generated from data packages found on your intake search path\",\n    )\n\n\ndef __getattr__(name):\n    \"\"\"Only make the builtin catalog on request\"\"\"\n    global builtin\n    if name == \"builtin\":\n        builtin = _make_builtin()\n        return builtin\n    raise AttributeError(name)\n"
  },
  {
    "path": "intake/catalog/base.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport keyword\nimport logging\nimport re\nimport time\n\nfrom ..source.base import DataSource, DataSourceBase, NoEntry\nfrom .utils import reload_on_change\n\nlogger = logging.getLogger(\"intake\")\n\n\nclass VersionError(Exception):\n    ...\n\n\nclass Catalog(DataSource):\n    \"\"\"Manages a hierarchy of data sources as a collective unit.\n\n    A catalog is a set of available data sources for an individual\n    entity (remote server, local  file, or a local\n    directory of files). This can be expanded to include a\n    collection of subcatalogs, which are then managed as a single unit.\n\n    A catalog is created with a single URI or a group of URIs. A URI can\n    either be a URL or a file path.\n\n    Each catalog in the hierarchy is responsible for caching the most recent\n    refresh time to prevent overeager queries.\n\n    Attributes\n    ----------\n    metadata : dict\n        Arbitrary information to carry along with the data source specs.\n    \"\"\"\n\n    # emulate a DataSource\n    container = \"catalog\"\n    name = \"catalog\"\n    auth = None\n\n    def __init__(\n        self,\n        entries=None,\n        name=None,\n        description=None,\n        metadata=None,\n        ttl=60,\n        getenv=True,\n        getshell=False,\n        persist_mode=\"default\",\n        storage_options=None,\n        user_parameters=None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        entries : dict, optional\n            Mapping of {name: entry}\n        name : str, optional\n            Unique identifier for catalog. This takes precedence over whatever\n            is stated in the cat file itself. Defaults to None.\n        description : str, optional\n            Description of the catalog. This takes precedence over whatever\n            is stated in the cat file itself. Defaults to None.\n        metadata: dict\n            Additional information about this data\n        ttl : float, optional\n            Lifespan (time to live) of cached modification time. Units are in\n            seconds. Defaults to 1.\n        getenv: bool\n            Can parameter default fields take values from the environment\n        getshell: bool\n            Can parameter default fields run shell commands\n        persist_mode: ['always', 'default', 'never']\n            Defines the use of persisted sources: if 'always', will use a\n            persisted version of a data source, if it exists, if 'never' will\n            always use the original source. If 'default', persisted sources\n            will be used if they have not expired, and re-persisted and used\n            if they have.\n        storage_options : dict\n            If using a URL beginning with 'intake://' (remote Intake server),\n            parameters to pass to requests when issuing http commands; otherwise\n            parameters to pass to remote backend file-system. Ignored for\n            normal local files.\n        \"\"\"\n        super(Catalog, self).__init__()\n        self.name = name\n        self.description = description\n        self.metadata = metadata or {}\n        self.ttl = ttl\n        self.getenv = getenv\n        self.getshell = getshell\n        self.storage_options = storage_options\n        if isinstance(user_parameters, dict) and user_parameters:\n            from .local import UserParameter\n\n            self.user_parameters = {\n                name: (UserParameter(name=name, **up) if isinstance(up, dict) else up)\n                for name, up in user_parameters.items()\n            }\n        elif isinstance(user_parameters, (list, tuple)):\n            self.user_parameters = {up[\"name\"]: up for up in user_parameters}\n        else:\n            self.user_parameters = {}\n        if persist_mode not in [\"always\", \"never\", \"default\"]:\n            # should be True, False, None ?\n            raise ValueError(\"Persist mode (%s) not understood\" % persist_mode)\n        self.pmode = persist_mode\n\n        if entries and isinstance(entries, str):\n            raise ValueError(\n                \"The class intake.Catalog does not accept a string for \"\n                \"`entries`\\n\"\n                \"Did you mean to use `intake.open_catalog`? Note that in \"\n                \"versions of intake <=0.5.4 `intake.Catalog` was an \"\n                \"alias for `intake.open_catalog`. It is now the intake base \"\n                \"Catalog class.\"\n            )\n        self.updated = time.time()\n        self._entries = entries if entries is not None else self._make_entries_container()\n        self.force_reload()\n\n    @classmethod\n    def from_dict(cls, entries, **kwargs):\n        \"\"\"\n        Create Catalog from the given set of entries\n\n        Parameters\n        ----------\n        entries : dict-like\n            A mapping of name:entry which supports dict-like functionality,\n            e.g., is derived from ``collections.abc.Mapping``.\n        kwargs : passed on the constructor\n            Things like metadata, name; see ``__init__``.\n\n        Returns\n        -------\n        Catalog instance\n        \"\"\"\n        cat = cls(**kwargs)\n        cat._entries = entries\n        return cat\n\n    @property\n    def kwargs(self):\n        return dict(name=self.name, ttl=self.ttl)\n\n    def _make_entries_container(self):\n        \"\"\"Subclasses may override this to return some other dict-like.\n\n        See RemoteCatalog below for the motivating example for this hook. This\n        is typically useful for large Catalogs backed by dynamic resources such\n        as databases.\n\n        The object returned by this method must implement:\n\n        * ``__iter__()`` -> an iterator of entry names\n        * ``__getitem__(key)`` -> an Entry\n        * ``items()`` -> an iterator of ``(key, Entry)`` pairs\n\n        For best performance the object should also implement:\n\n        * ``__len__()`` -> int\n        * ``__contains__(key)`` -> boolean\n\n        In ``__len__`` or ``__contains__`` are not implemented, intake will\n        fall back on iterating through the entire catalog to compute its length\n        or check for containment, which may be expensive on large catalogs.\n        \"\"\"\n        return {}\n\n    def _load(self):\n        \"\"\"Override this: load catalog entries\"\"\"\n        pass\n\n    def force_reload(self):\n        \"\"\"Imperative reload data now\"\"\"\n        self.updated = time.time()\n        self._load()\n\n    def reload(self):\n        \"\"\"Reload catalog if sufficient time has passed\"\"\"\n        if (self.ttl is not None) and (time.time() - self.updated > self.ttl):\n            self.force_reload()\n\n    @property\n    def version(self):\n        # default version for pre-v1 files\n        return self.metadata.get(\"version\", 1)\n\n    @reload_on_change\n    def search(self, text, depth=2):\n        import copy\n\n        words = text.lower().split()\n        entries = {\n            k: copy.copy(v)\n            for k, v in self.walk(depth=depth).items()\n            if any(word in str(v.describe().values()).lower() for word in words)\n        }\n        cat = Catalog.from_dict(\n            entries,\n            name=self.name + \"_search\",\n            ttl=self.ttl,\n            getenv=self.getenv,\n            getshell=self.getshell,\n            metadata=(self.metadata or {}).copy(),\n            storage_options=self.storage_options,\n            user_parameters=self.user_parameters.copy(),\n        )\n        cat.metadata[\"search\"] = {\"text\": text, \"upstream\": self.name}\n        cat.cat = self\n        for e in entries.values():\n            e._catalog = cat\n        return cat\n\n    def filter(self, func):\n        \"\"\"\n        Create a Catalog of a subset of entries based on a condition\n\n        .. warning ::\n\n           This function operates on CatalogEntry objects not DataSource\n           objects.\n\n        .. note ::\n\n            Note that, whatever specific class this is performed on,\n            the return instance is a Catalog. The entries are passed\n            unmodified, so they will still reference the original catalog\n            instance and include its details such as directory,.\n\n        Parameters\n        ----------\n        func : function\n            This should take a CatalogEntry and return True or False. Those\n            items returning True will be included in the new Catalog, with the\n            same entry names\n\n        Returns\n        -------\n        Catalog\n           New catalog with Entries that still refer to their parents\n        \"\"\"\n        return Catalog.from_dict(\n            {key: entry for key, entry in self._entries.items() if func(entry)}\n        )\n\n    @reload_on_change\n    def walk(self, sofar=None, prefix=None, depth=2):\n        \"\"\"Get all entries in this catalog and sub-catalogs\n\n        Parameters\n        ----------\n        sofar: dict or None\n            Within recursion, use this dict for output\n        prefix: list of str or None\n            Names of levels already visited\n        depth: int\n            Number of levels to descend; needed to truncate circular references\n            and for cleaner output\n\n        Returns\n        -------\n        Dict where the keys are the entry names in dotted syntax, and the\n        values are entry instances.\n        \"\"\"\n        out = sofar if sofar is not None else {}\n        prefix = [] if prefix is None else prefix\n        for name, item in self._entries.items():\n            if item._container == \"catalog\" and depth > 1:\n                # recurse with default open parameters\n                try:\n                    item().walk(out, prefix + [name], depth - 1)\n                except Exception as e:\n                    print(e)\n                    pass  # ignore inability to descend\n            n = \".\".join(prefix + [name])\n            out[n] = item\n        return out\n\n    def items(self):\n        \"\"\"Get an iterator over (key, source) tuples for the catalog entries.\"\"\"\n        for name, entry in self._get_entries().items():\n            yield name, entry()\n\n    def values(self):\n        \"\"\"Get an iterator over the sources for catalog entries.\"\"\"\n        for entry in self._get_entries().values():\n            yield entry()\n\n    def serialize(self):\n        \"\"\"\n        Produce YAML version of this catalog.\n\n        Note that this is not the same as ``.yaml()``, which produces a YAML\n        block referring to this catalog.\n        \"\"\"\n        import yaml\n\n        output = {\n            \"metadata\": self.metadata,\n            \"sources\": {},\n            \"name\": self.name,\n            \"description\": self.description,\n        }\n        for key, entry in self._entries.items():\n            kw = entry._captured_init_kwargs.copy()\n            kw.pop(\"catalog\", None)\n            kw[\"parameters\"] = {\n                k.name: k.__getstate__()[\"kwargs\"] for k in kw.get(\"parameters\", [])\n            }\n            try:\n                if issubclass(kw[\"driver\"], DataSourceBase):\n                    kw[\"driver\"] = \".\".join([kw[\"driver\"].__module__, kw[\"driver\"].__name__])\n            except TypeError:\n                pass  # ignore exception for a string input\n            output[\"sources\"][key] = kw\n        return yaml.dump(output)\n\n    def save(self, url, storage_options=None):\n        \"\"\"\n        Output this catalog to a file as YAML\n\n        Parameters\n        ----------\n        url : str\n            Location to save to, perhaps remote\n        storage_options : dict\n            Extra arguments for the file-system\n        \"\"\"\n        from fsspec import open_files\n\n        with open_files([url], **(storage_options or {}), mode=\"wt\")[0] as f:\n            f.write(self.serialize())\n\n    @reload_on_change\n    def _get_entry(self, name):\n        entry = self._entries[name]\n        entry._catalog = self\n        entry._pmode = self.pmode\n\n        up_names = set(\n            (up[\"name\"] if isinstance(up, dict) else up.name) for up in entry._user_parameters\n        )\n        ups = [up for name, up in self.user_parameters.items() if name not in up_names]\n        entry._user_parameters = ups + (entry._user_parameters or [])\n        return entry()\n\n    def configure_new(self, **kwargs):\n        from .local import UserParameter\n\n        ups = {}\n        for k, v in kwargs.copy().items():\n            for up in self.user_parameters.values():\n                if isinstance(up, dict):\n                    if k == up[\"name\"]:\n                        kw = up.copy()\n                        kw[\"default\"] = v\n                        ups[k] = UserParameter(**kw)\n                        kwargs.pop(k)\n                else:\n                    if k == up.name:\n                        kw = up._captured_init_kwargs.copy()\n                        kw[\"default\"] = v\n                        kw[\"name\"] = k\n                        ups[k] = UserParameter(**kw)\n                        kwargs.pop(k)\n\n        new = super().configure_new(**kwargs)\n        new.user_parameters.update(ups)\n        return new\n\n    __call__ = get = configure_new\n\n    @reload_on_change\n    def _get_entries(self):\n        return self._entries\n\n    def __iter__(self):\n        \"\"\"Return an iterator over catalog entry names.\"\"\"\n        return iter(self._get_entries())\n\n    def keys(self):\n        \"\"\"Entry names in this catalog as an iterator (alias for __iter__)\"\"\"\n        return iter(self)\n\n    def __len__(self):\n        return len(self._get_entries())\n\n    def __contains__(self, key):\n        # Avoid iterating through all entries.\n        return key in self._get_entries()  # triggers reload_on_change\n\n    def __dir__(self):\n        # Include tab-completable entries and normal attributes.\n        return [\n            entry\n            for entry in self\n            if re.match(\"[_A-Za-z][_a-zA-Z0-9]*$\", entry)\n            and not keyword.iskeyword(entry)  # valid Python identifer\n        ] + list(  # not a Python keyword\n            self.__dict__.keys()\n        )\n\n    def _ipython_key_completions_(self):\n        return list(self)\n\n    def __repr__(self):\n        return \"<Intake catalog: %s>\" % self.name\n\n    def __getattr__(self, item):\n        # we need this special case here because the (deprecated) entry\n        # property on the base class\n        if item == \"entry\":\n            raise NoEntry(\"Source was not made from a catalog entry\")\n        if not item.startswith(\"_\"):\n            # Fall back to __getitem__.\n            try:\n                return self[item]  # triggers reload_on_change\n            except KeyError as e:\n                raise AttributeError(item) from e\n        raise AttributeError(item)\n\n    def __setitem__(self, key, entry):\n        \"\"\"Add entry to catalog\n\n        This relies on the `_entries` attribute being mutable, which it normally\n        is. Note that if a catalog automatically reloads, any entry added here\n        may be very transient\n\n        Parameters\n        ----------\n        key : str\n            Key to give the entry in the cat\n        entry : CatalogEntry\n            The entry to include (could be local, remote)\n        \"\"\"\n        self._entries[key] = entry\n\n    def pop(self, key):\n        \"\"\"Remove entry from catalog and return it\n\n        This relies on the `_entries` attribute being mutable, which it normally\n        is. Note that if a catalog automatically reloads, any entry removed here\n        may soon reappear\n\n        Parameters\n        ----------\n        key : str\n            Key to give the entry in the cat\n        \"\"\"\n        return self._entries.pop(key)\n\n    def __getitem__(self, key):\n        \"\"\"Return a catalog entry by name.\n\n        Can also use attribute syntax, like ``cat.entry_name``, or\n        item lookup cat['non-python name']. This enables walking through\n        nested directories with cat.name1.name2, cat['name1.name2'] *or*\n        cat['name1', 'name2']\n        \"\"\"\n        if not isinstance(key, list) and key in self:\n            # triggers reload_on_change\n            s = self._get_entry(key)\n            if s.container == \"catalog\":\n                s.name = key\n                s.user_parameters.update(self.user_parameters.copy())\n                return s\n            return s\n        if isinstance(key, str) and \".\" in key:\n            key = key.split(\".\")\n        if isinstance(key, list):\n            parts = list(key)[:]\n            prefix = \"\"\n            while parts:\n                bit = parts.pop(0)\n                prefix = prefix + (\".\" if prefix else \"\") + bit\n                if prefix in self._entries:\n                    rest = \".\".join(parts)\n                    try:\n                        out = self._entries[prefix][rest]\n                        return out()\n                    except KeyError:\n                        # name conflict like \"thing\" and \"think.oi\", where it's\n                        # the latter we are after\n                        continue\n        elif isinstance(key, tuple):\n            out = self\n            for part in key:\n                out = out[part]\n            return out()\n        raise KeyError(key)\n\n    def discover(self):\n        return {\n            \"container\": \"catalog\",\n            \"shape\": None,\n            \"dtype\": None,\n            \"metadata\": self.metadata,\n        }\n\n    def _close(self):\n        # TODO: maybe close all entries?\n        pass\n\n    @property\n    def gui(self):\n        if not hasattr(self, \"_gui\"):\n            from ..interface import output_notebook\n            from ..interface.gui import GUI\n\n            output_notebook()\n            self._gui = GUI({self.name: self})\n        else:\n            self._gui.visible = True\n        return self._gui\n"
  },
  {
    "path": "intake/catalog/default.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport json\nimport os\nimport subprocess\nimport sys\n\nimport platformdirs\n\nfrom intake.config import conf\nfrom intake.utils import make_path_posix\n\nfrom .local import Catalog, YAMLFilesCatalog\n\n\ndef load_user_catalog():\n    \"\"\"Return a catalog for the platform-specific user Intake directory\"\"\"\n    cat_dir = user_data_dir()\n    if not os.path.isdir(cat_dir):\n        return Catalog()\n    else:\n        return YAMLFilesCatalog(cat_dir)\n\n\ndef user_data_dir():\n    \"\"\"Return the user Intake catalog directory\"\"\"\n    return platformdirs.user_data_dir(appname=\"intake\", appauthor=\"intake\")\n\n\ndef load_global_catalog():\n    \"\"\"Return a catalog for the environment-specific Intake directory\"\"\"\n    cat_dir = global_data_dir()\n    if not os.path.isdir(cat_dir):\n        return Catalog()\n    else:\n        return YAMLFilesCatalog(cat_dir)\n\n\nCONDA_VAR = \"CONDA_PREFIX\"\nVIRTUALENV_VAR = \"VIRTUAL_ENV\"\n\n\ndef conda_prefix():\n    \"\"\"Fallback: ask conda in PATH for its prefix\"\"\"\n    try:\n        out = subprocess.check_output([\"conda\", \"info\", \"--json\"])\n        return json.loads(out.decode())[\"default_prefix\"]\n    except (subprocess.CalledProcessError, json.JSONDecodeError, OSError):\n        return False\n\n\ndef which(program):\n    \"\"\"Emulate posix ``which``\"\"\"\n    import distutils.spawn\n\n    return distutils.spawn.find_executable(program)\n\n\ndef global_data_dir():\n    \"\"\"Return the global Intake catalog dir for the current environment\"\"\"\n    prefix = False\n    if VIRTUALENV_VAR in os.environ:\n        prefix = os.environ[VIRTUALENV_VAR]\n    elif CONDA_VAR in os.environ:\n        prefix = sys.prefix\n    elif which(\"conda\"):\n        # conda exists but is not activated\n        prefix = conda_prefix()\n\n    if prefix:\n        # conda and virtualenv use Linux-style directory pattern\n        return make_path_posix(os.path.join(prefix, \"share\", \"intake\"))\n    else:\n        return platformdirs.site_data_dir(appname=\"intake\", appauthor=\"intake\")\n\n\ndef load_combo_catalog():\n    \"\"\"Load a union of the user and global catalogs for convenience\"\"\"\n    user_dir = user_data_dir()\n    global_dir = global_data_dir()\n    desc = \"Generated from data packages found on your intake search path\"\n    cat_dirs = []\n    if os.path.isdir(user_dir):\n        cat_dirs.append(user_dir + \"/*.yaml\")\n        cat_dirs.append(user_dir + \"/*.yml\")\n    if os.path.isdir(global_dir):\n        cat_dirs.append(global_dir + \"/*.yaml\")\n        cat_dirs.append(global_dir + \"/*.yml\")\n    for path_dir in conf.get(\"catalog_path\", []):\n        if path_dir != \"\":\n            if not path_dir.endswith((\"yaml\", \"yml\")):\n                cat_dirs.append(path_dir + \"/*.yaml\")\n                cat_dirs.append(path_dir + \"/*.yml\")\n            else:\n                cat_dirs.append(path_dir)\n\n    return YAMLFilesCatalog(cat_dirs, name=\"builtin\", description=desc)\n"
  },
  {
    "path": "intake/catalog/entry.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom ..utils import DictSerialiseMixin, pretty_describe\n\n\nclass CatalogEntry(DictSerialiseMixin):\n    \"\"\"A single item appearing in a catalog\n\n    This is the base class, used by local entries (i.e., read from a YAML file)\n    and by remote entries (read from a server).\n    \"\"\"\n\n    def __init__(self, getenv=True, getshell=False):\n        self._default_source = None\n        self.getenv = getenv\n        self.getshell = getshell\n        self._pmode = \"default\"\n\n    def describe(self):\n        \"\"\"Get a dictionary of attributes of this entry.\n\n        Returns: dict with keys\n          name: str\n              The name of the catalog entry.\n          container : str\n              kind of container used by this data source\n          description : str\n              Markdown-friendly description of data source\n          direct_access : str\n              Mode of remote access: forbid, allow, force\n          user_parameters : list[dict]\n              List of user parameters defined by this entry\n\n        \"\"\"\n        raise NotImplementedError\n\n    def get(self, **user_parameters):\n        \"\"\"Open the data source.\n\n        Equivalent to calling the catalog entry like a function.\n\n        Parameters\n        ----------\n          user_parameters : dict\n              Values for user-configurable parameters for this data source\n\n        Returns\n        -------\n        DataSource\n        \"\"\"\n        raise NotImplementedError\n\n    def __call__(self, persist=None, **kwargs):\n        \"\"\"Instantiate DataSource with given user arguments\"\"\"\n        s = self.get(**kwargs)\n        s._entry = self\n        s._passed_kwargs = list(kwargs)\n        return s\n\n    @property\n    def container(self):\n        return getattr(self, \"_container\", None)\n\n    @container.setter\n    def container(self, cont):\n        # so that .container (which sources always have)  always reflects ._container,\n        # which is the variable name for entries.\n        self._container = cont\n\n    @property\n    def plots(self):\n        \"\"\"List custom associated quick-plots\"\"\"\n        return list(self._metadata.get(\"plots\", {}))\n\n    def _ipython_display_(self):\n        \"\"\"Display the entry as a rich object in an IPython session.\"\"\"\n        import json\n\n        from IPython.display import display\n\n        contents = self.describe()\n        display(\n            {\n                \"application/json\": json.dumps(contents),\n                \"text/plain\": pretty_describe(contents),\n            },\n            metadata={\"application/json\": {\"root\": contents[\"name\"]}},\n            raw=True,\n        )\n\n    def _yaml(self):\n        return {\"sources\": {self.name: self.describe()}}\n\n    def __iter__(self):\n        # If the entry is a catalog, this allows list(cat.entry)\n        if self._container == \"catalog\":\n            return iter(self())\n        else:\n            raise ValueError(\"Cannot iterate a catalog entry\")\n\n    def __getitem__(self, item):\n        \"\"\"Pass getitem to data source, assuming default parameters\n\n        Also supports multiple items ([.., ..]), in which case the first\n        component only will be used to instantiate, and the rest passed on.\n        \"\"\"\n        if isinstance(item, tuple):\n            if len(item) > 1:\n                return self()[item[0]].__getitem__(item[1:])\n            else:\n                item = item[0]\n        return self()[item]\n\n    def __repr__(self):\n        return pretty_describe(self.describe())\n\n    @property\n    def gui(self):\n        if not hasattr(self, \"_gui\"):\n            from .gui import EntryGUI\n\n            self._gui = EntryGUI(source=self, visible=True)\n        else:\n            self._gui.visible = True\n        return self._gui\n"
  },
  {
    "path": "intake/catalog/exceptions.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\n\nclass CatalogException(Exception):\n    \"\"\"Basic exception for errors raised by catalog\"\"\"\n\n\nclass PermissionDenied(CatalogException):\n    \"\"\"Raised when user requests functionality that they do not have permission\n    to access.\n    \"\"\"\n\n\nclass ShellPermissionDenied(PermissionDenied):\n    \"\"\"The user does not have permission to execute shell commands.\"\"\"\n\n    def __init__(self, msg=None):\n        if msg is None:\n            msg = \"Additional permissions needed to execute shell commands.\"\n        super(ShellPermissionDenied, self).__init__(msg)\n\n\nclass EnvironmentPermissionDenied(PermissionDenied):\n    \"\"\"The user does not have permission to read environment variables.\"\"\"\n\n    def __init__(self, msg=None):\n        if msg is None:\n            msg = \"Additional permissions needed to read environment variables.\"\n        super(EnvironmentPermissionDenied, self).__init__(msg)\n\n\nclass ValidationError(CatalogException):\n    \"\"\"Something's wrong with the catalog spec\"\"\"\n\n    def __init__(self, message, errors):\n        super(ValidationError, self).__init__(message)\n        self.errors = errors\n\n\nclass DuplicateKeyError(ValidationError):\n    \"\"\"Catalog contains key duplications\"\"\"\n\n    def __init__(self, context, context_mark, problem, problem_mark):\n        line = problem_mark.line\n        column = problem_mark.column\n        msg = \"duplicate key found on line {}, column {}\".format(line + 1, column + 1)\n        super(DuplicateKeyError, self).__init__(msg, [])\n\n\nclass ObsoleteError(ValidationError):\n    pass\n\n\nclass ObsoleteParameterError(ObsoleteError):\n    def __init__(self):\n        msg = \"\"\"Detected old syntax. See details for upgrade instructions to new syntax:\n\n[old syntax]\n\nparameters:\n  - name: abc\n    type: str\n\n[new syntax]\n\nparameters:\n  abc:\n    type: str\n\"\"\"\n        super(ObsoleteParameterError, self).__init__(msg, [])\n\n\nclass ObsoleteDataSourceError(ObsoleteError):\n    def __init__(self):\n        msg = \"\"\"Detected old syntax. See details for upgrade instructions to new syntax:\n\n[old syntax]\n\nsources:\n  - name: abc\n    driver: csv\n\n[new syntax]\n\nsources:\n  abc:\n    driver: csv\n\"\"\"\n        super(ObsoleteDataSourceError, self).__init__(msg, [])\n"
  },
  {
    "path": "intake/catalog/gui.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\ntry:\n    from ..interface.gui import GUI\n\nexcept ImportError:\n\n    class GUI(object):\n        def __init__(self, *args, **kwargs):\n            pass\n\n        def __repr__(self):\n            raise RuntimeError(\n                \"Please install panel to use the GUI (`conda \"\n                \"install -c conda-forge panel>0.8.0`)\"\n            )\n\nexcept Exception:\n\n    class GUI(object):\n        def __init__(self, *args, **kwargs):\n            pass\n\n        def __repr__(self):\n            raise RuntimeError(\n                \"Initialization of GUI failed, even though \"\n                \"panel is installed. Please update it \"\n                \"to a more recent version (`conda install -c \"\n                \"conda-forge panel==0.5.1`).\"\n            )\n"
  },
  {
    "path": "intake/catalog/local.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport collections\nfrom importlib.metadata import entry_points\nimport inspect\nimport logging\nimport os\nimport warnings\n\nfrom fsspec import get_filesystem_class, open_files\nfrom fsspec.core import split_protocol\n\nfrom .. import __version__\nfrom ..source import get_plugin_class, register_driver\nfrom ..utils import DictSerialiseMixin, classname, make_path_posix, yaml_load\nfrom . import exceptions\nfrom .base import Catalog, DataSource, VersionError\nfrom .entry import CatalogEntry\nfrom .utils import COERCION_RULES, _has_catalog_dir, coerce, expand_defaults, merge_pars\n\nlogger = logging.getLogger(\"intake\")\n\n\nclass UserParameter(DictSerialiseMixin):\n    \"\"\"\n    A user-settable item that is passed to a DataSource upon instantiation.\n\n    For string parameters, default may include special functions ``func(args)``,\n    which *may* be expanded from environment variables or by executing a shell\n    command.\n\n    Parameters\n    ----------\n    name: str\n        the key that appears in the DataSource argument strings\n    description: str\n        narrative text\n    type: str\n        one of list ``(COERSION_RULES)``\n    default: type value\n        same type as ``type``. It a str, may include special functions\n        env, shell, client_env, client_shell.\n    min, max: type value\n        for validation of user input\n    allowed: list of type\n        for validation of user input\n    \"\"\"\n\n    def __init__(\n        self,\n        name,\n        description=None,\n        type=None,\n        default=None,\n        min=None,\n        max=None,\n        allowed=None,\n    ):\n        self.name = name\n        self.description = description\n        self.type = type or __builtins__[\"type\"](default).__name__\n        self.min = min\n        self.max = max\n        self.allowed = allowed\n\n        self._default = default\n        try:\n            self.default = coerce(self.type, default)\n        except (ValueError, TypeError):\n            self.default = None\n        self.expanded_default = self.default\n\n        if self.min:\n            self.min = coerce(self.type, self.min)\n\n        if self.max:\n            self.max = coerce(self.type, self.max)\n\n        if self.allowed and type != \"mlist\":\n            self.allowed = [coerce(self.type, item) for item in self.allowed]\n\n    def __repr__(self):\n        return f\"<{self.__class__.__name__} {self.name!r}>\"\n\n    __str__ = __repr__\n\n    def describe(self):\n        \"\"\"Information about this parameter\"\"\"\n        desc = {\n            \"name\": self.name,\n            \"description\": self.description,\n            # the Parameter might not have a type at all\n            \"type\": self.type or \"unknown\",\n        }\n        for attr in [\"min\", \"max\", \"allowed\", \"default\"]:\n            v = getattr(self, attr)\n            if v is not None:\n                desc[attr] = v\n        return desc\n\n    def expand_defaults(self, client=False, getenv=True, getshell=False):\n        \"\"\"Compile env, client_env, shell and client_shell commands\"\"\"\n        if not isinstance(self._default, str):\n            self.expanded_default = self._default\n        else:\n            self.expanded_default = coerce(\n                self.type, expand_defaults(self._default, client, getenv, getshell)\n            )\n\n    def validate(self, value):\n        \"\"\"Does value meet parameter requirements?\"\"\"\n        if self.type is not None:\n            value = coerce(self.type, value)\n\n        if self.type == \"mlist\":\n            for v in value:\n                if v not in self.allowed:\n                    raise ValueError(\"Item %s not in allowed list\", v)\n            return value\n\n        if self.min is not None and value < self.min:\n            raise ValueError(\"%s=%s is less than %s\" % (self.name, value, self.min))\n        if self.max is not None and value > self.max:\n            raise ValueError(\"%s=%s is greater than %s\" % (self.name, value, self.max))\n        if self.allowed is not None and value not in self.allowed:\n            raise ValueError(\n                \"%s=%s is not one of the allowed values: %s\"\n                % (self.name, value, \",\".join(map(str, self.allowed)))\n            )\n\n        return value\n\n\nclass LocalCatalogEntry(CatalogEntry):\n    \"\"\"A catalog entry on the local system\"\"\"\n\n    def __init__(\n        self,\n        name,\n        description,\n        driver,\n        direct_access=True,\n        args={},\n        cache=[],\n        parameters=[],\n        metadata={},\n        catalog_dir=\"\",\n        getenv=True,\n        getshell=False,\n        catalog=None,\n    ):\n        \"\"\"\n\n        Parameters\n        ----------\n        name: str\n            How this entry is known, normally from its key in a YAML file, or\n            if that is not provided then from name of file, or name of dir if\n            file name is 'catalog.yaml' or 'catalog.yml'.\n        description: str\n            Brief text about the target source\n        driver: str, list, dict or DataSource subclass\n            The plugin(s) that can load this. Can be a simple name like \"csv\",\n            which will be looked up in the registry, a fully-qualified class\n            name (\"package.mod.Class\"), a list of these which would all work,\n            a dictionary of the same with reasonable names, or an explicit\n            class derived from DataSource.\n        direct_access: bool\n            Is the client allowed to attempt to reach this data\n        args: dict\n            Passed when instantiating the plugin DataSource\n        parameters: list\n            UserParameters that can be set\n        metadata: dict\n            Additional information about this data\n        catalog_dir: str\n            Location of the catalog, if known\n        getenv: bool\n            Can parameter default fields take values from the environment\n        getshell: bool\n            Can parameter default fields run shell commands\n        catalog: bool\n            Catalog object in which this entry belongs\n        \"\"\"\n        self._name = name\n        self._default_source = None\n        self._description = description\n        self._driver = driver\n        self._direct_access = direct_access\n        self._open_args = args\n        self._cache = cache\n        self._user_parameters = parameters\n        self._metadata = metadata or {}\n        self._catalog_dir = catalog_dir\n        self._filesystem = None\n        self._catalog = catalog\n        if isinstance(driver, str):\n            dr = get_plugin_class(driver)\n            self._plugin = [dr] if dr is not None else []\n            containers = set(p.container for p in self._plugin)\n        elif isinstance(driver, list):\n            self._plugin = [get_plugin_class(d) for d in driver]\n            self._plugin = [p for p in self._plugin if p is not None]\n            containers = set(p.container for p in self._plugin)\n        elif isinstance(driver, dict):\n            self._plugin = {d: get_plugin_class(driver[d][\"class\"]) for d in driver}\n            self._plugin = {k: v for k, v in self._plugin.items() if v is not None}\n            containers = set(p.container for p in self._plugin.values())\n        elif inspect.isclass(driver) and issubclass(driver, DataSource):\n            self._plugin = [driver]\n            containers = {driver.container}\n        else:\n            raise TypeError(\"Driver was not a string, list, dict or DataSource:\" \" %s\" % driver)\n        if len(containers) > 1:\n            # this is an error, because cat is poorly specified, even if other\n            # plugins are OK\n            raise ValueError(\"Plugins for a data source must have only one \" \"container.\")\n        if len(containers) == 0:\n            # this is only debug, this single entry won't work, but cat is OK.\n            # you get an error if you try to actually use this entry\n            logger.debug(\"No plugins for entry: %s\" % self.name)\n            containers = [None]\n        self._container = list(containers)[0]\n        super(LocalCatalogEntry, self).__init__(getenv=getenv, getshell=getshell)\n\n    @property\n    def name(self):\n        return self._name\n\n    def describe(self):\n        \"\"\"Basic information about this entry\"\"\"\n        if isinstance(self._plugin, list):\n            pl = [p.name for p in self._plugin]\n        elif isinstance(self._plugin, dict):\n            pl = {k: classname(v) for k, v in self._plugin.items()}\n        else:\n            pl = self._plugin if isinstance(self._plugin, str) else self._plugin.name\n        return {\n            \"name\": self._name,\n            \"container\": self._container,\n            \"plugin\": pl,  # deprecated\n            \"driver\": pl,\n            \"description\": self._description,\n            \"direct_access\": self._direct_access,\n            \"user_parameters\": [u.describe() for u in self._user_parameters],\n            \"metadata\": self._metadata,\n            \"args\": self._open_args,\n        }\n\n    def _create_open_args(self, user_parameters):\n        plugin = user_parameters.pop(\"plugin\", None)\n\n        md = self._metadata.copy() if self._metadata is not None else {}\n        md[\"catalog_dir\"] = self._catalog_dir\n        if user_parameters.pop(\"cache\", None) or self._cache:\n            md[\"cache\"] = user_parameters.pop(\"cache\", None) or self._cache\n        params = {\n            \"metadata\": md,\n            \"CATALOG_DIR\": self._catalog_dir,\n        }\n        params.update(self._open_args)\n        if (\n            \"storage_options\" not in params\n            and self._filesystem is not None\n            and self._filesystem.storage_options\n            and _has_catalog_dir(params)\n        ):\n            params[\"storage_options\"] = self._filesystem.storage_options\n        open_args = merge_pars(\n            params,\n            user_parameters,\n            self._user_parameters,\n            getshell=self.getshell,\n            getenv=self.getenv,\n            client=False,\n        )\n\n        if len(self._plugin) == 0:\n            raise ValueError(\n                \"No plugins loaded for this entry: %s\\n\"\n                \"A listing of installable plugins can be found \"\n                \"at https://intake.readthedocs.io/en/latest/plugin\"\n                \"-directory.html .\" % self._driver\n            )\n        elif isinstance(self._plugin, list):\n            plugin = self._plugin[0]\n        else:\n            # dict\n            if plugin is None:\n                # default selection for dict\n                plugin = list(self._plugin)[0]\n            spec = self._driver[plugin]\n            open_args.update(spec.get(\"args\", {}))\n            try:\n                plugin = self._plugin[plugin]\n            except KeyError:\n                raise ValueError(\n                    \"Attempt to select unavailable plugin %s, \"\n                    \"perhaps import of plugin failed\" % plugin\n                )\n        return plugin, open_args\n\n    def get(self, **user_parameters):\n        \"\"\"Instantiate the DataSource for the given parameters\"\"\"\n        if not user_parameters and self._default_source is not None:\n            return self._default_source\n\n        plugin, open_args = self._create_open_args(user_parameters)\n        data_source = plugin(**open_args)\n        data_source.catalog_object = self._catalog\n        data_source.name = self.name\n        data_source.description = self._description\n        data_source.cat = self._catalog\n\n        # Cache the default source if there are no user parameters.\n        if not user_parameters:\n            self._default_source = data_source\n\n        return data_source\n\n    def clear_cached_default_source(self):\n        \"\"\"\n        Clear a cached default source so it can be created anew (if, for instance,\n        it depends on changing environment variables or execution context)\n        \"\"\"\n        self._default_source = None\n\n\nclass CatalogParser(object):\n    \"\"\"Loads entries from a YAML spec\"\"\"\n\n    def __init__(self, data, getenv=True, getshell=False, context=None):\n        self._context = context if context else {}\n        self._errors = []\n        self._warnings = []\n        self.getenv = getenv\n        self.getshell = getshell\n        self._data = self._parse(data)\n\n    @property\n    def ok(self):\n        return len(self._errors) == 0\n\n    @property\n    def data(self):\n        return self._data\n\n    @property\n    def errors(self):\n        return self._errors\n\n    @property\n    def warnings(self):\n        return self._warnings\n\n    def error(self, msg, obj, key=None):\n        if key is not None:\n            self._errors.append(str((msg, obj, key)))\n        else:\n            self._errors.append(str((msg, obj)))\n\n    def warning(self, msg, obj, key=None):\n        if key is None:\n            self._warnings.append(str((msg, obj)))\n        else:\n            self._warnings.append(str((msg, obj, key)))\n\n    def _parse_plugins(self, data):\n        if \"plugins\" not in data:\n            return\n\n        if not isinstance(data[\"plugins\"], dict):\n            self.error(\"value of key 'plugins' must be a dictionary\", data, \"plugins\")\n            return\n\n        if \"source\" not in data[\"plugins\"]:\n            self.error(\"missing key 'source'\", data[\"plugins\"])\n            return\n\n        if not isinstance(data[\"plugins\"][\"source\"], list):\n            self.error(\"value of key 'source' must be a list\", data[\"plugins\"], \"source\")\n            return\n\n        for plugin_source in data[\"plugins\"][\"source\"]:\n            if not isinstance(plugin_source, dict):\n                self.error(\n                    \"value in list of plugins sources must be a \" \"dictionary\",\n                    data[\"plugins\"],\n                    \"source\",\n                )\n                continue\n\n            elif \"module\" in plugin_source:\n                import intake\n\n                intake.import_name(plugin_source[\"module\"])\n            elif \"dir\" in plugin_source:\n                self.error(\n                    \"The key 'dir', and in general the feature of registering \"\n                    \"plugins from a directory of Python scripts outside of \"\n                    \"sys.path, is no longer supported. Use 'module'.\",\n                    plugin_source,\n                )\n            else:\n                self.error(\"missing 'module'\", plugin_source)\n\n    def _getitem(self, obj, key, dtype, required=True, default=None, choices=None):\n        if key in obj:\n            if isinstance(obj[key], dtype):\n                if choices and obj[key] not in choices:\n                    self.error(\n                        \"value '{}' is invalid (choose from {})\".format(obj[key], choices),\n                        obj,\n                        key,\n                    )\n                else:\n                    return obj[key]\n            else:\n                self.error(\n                    \"value '{}' is not expected type '{}'\".format(obj[key], dtype.__name__),\n                    obj,\n                    key,\n                )\n            return None\n        elif required:\n            self.error(\"missing required key '{}'\".format(key), obj)\n            return None\n        elif default:\n            return default\n\n        return None if dtype is object else dtype()\n\n    def _parse_user_parameter(self, name, data):\n        valid_types = list(COERCION_RULES)\n\n        params = {\n            \"name\": name,\n            \"description\": self._getitem(data, \"description\", str),\n            \"type\": self._getitem(data, \"type\", str, choices=valid_types),\n            \"default\": self._getitem(data, \"default\", object, required=False),\n            \"min\": self._getitem(data, \"min\", object, required=False),\n            \"max\": self._getitem(data, \"max\", object, required=False),\n            \"allowed\": self._getitem(data, \"allowed\", object, required=False),\n        }\n\n        if params[\"description\"] is None or params[\"type\"] is None:\n            return None\n\n        return UserParameter(**params)\n\n    def _parse_data_source(self, name, data):\n        if data.pop(\"remote\", False):\n            return\n        elif \"cls\" in data:\n            from intake.utils import remake_instance\n\n            return remake_instance(data)\n        else:\n            return self._parse_data_source_local(name, data)\n\n    def _parse_data_source_local(self, name, data):\n        ds = {\n            \"name\": name,\n            \"description\": self._getitem(data, \"description\", str, required=False),\n            \"driver\": self._getitem(data, \"driver\", object),\n            \"direct_access\": self._getitem(\n                data,\n                \"direct_access\",\n                str,\n                required=False,\n                default=\"forbid\",\n                choices=[\"forbid\", \"allow\", \"force\"],\n            ),\n            \"args\": self._getitem(data, \"args\", dict, required=False),\n            \"cache\": self._getitem(data, \"cache\", list, required=False),\n            \"metadata\": self._getitem(data, \"metadata\", dict, required=False),\n        }\n\n        if ds[\"driver\"] is None:\n            return None\n\n        ds[\"parameters\"] = []\n\n        if \"parameters\" in data:\n            if isinstance(data[\"parameters\"], list):\n                raise exceptions.ObsoleteParameterError\n\n            if not isinstance(data[\"parameters\"], dict):\n                self.error(\"value of key 'parameters' must be a dictionary\", data, \"parameters\")\n                return None\n\n            for name, parameter in data[\"parameters\"].items():\n                if not isinstance(name, str):\n                    self.error(\n                        \"key '{}' must be a string\".format(name),\n                        data[\"parameters\"],\n                        name,\n                    )\n                    continue\n\n                if not isinstance(parameter, dict):\n                    self.error(\n                        \"value of key '{}' must be a dictionary\" \"\".format(name),\n                        data[\"parameters\"],\n                        name,\n                    )\n                    continue\n\n                obj = self._parse_user_parameter(name, parameter)\n                if obj:\n                    ds[\"parameters\"].append(obj)\n\n        return LocalCatalogEntry(\n            catalog_dir=self._context[\"root\"],\n            getenv=self.getenv,\n            getshell=self.getshell,\n            **ds,\n        )\n\n    def _parse_data_sources(self, data):\n        sources = []\n\n        if \"sources\" not in data:\n            self.error(\"missing key 'sources'\", data)\n            return sources\n\n        if isinstance(data[\"sources\"], list):\n            raise exceptions.ObsoleteDataSourceError\n\n        if not isinstance(data[\"sources\"], dict):\n            self.error(\"value of key 'sources' must be a dictionary\", data, \"sources\")\n            return sources\n\n        for name, source in data[\"sources\"].items():\n            if not isinstance(name, str):\n                self.error(\"key '{}' must be a string\".format(name), data[\"sources\"], name)\n                continue\n\n            if not isinstance(source, dict):\n                self.error(\n                    \"value of key '{}' must be a dictionary\" \"\".format(name),\n                    data[\"sources\"],\n                    name,\n                )\n                continue\n\n            obj = self._parse_data_source(name, source)\n            if obj:\n                sources.append(obj)\n\n        return sources\n\n    def _parse(self, data):\n        if not isinstance(data, dict):\n            self.error(\"catalog must be a dictionary\", data)\n            return\n        if (data.get(\"version\", None) or data.get(\"metadata\", {}).get(\"version\", None) or 1) > 1:\n            raise VersionError(\"Not a V1 Catalog; perhaps use intake.open_catalog\")\n\n        return dict(\n            plugin_sources=self._parse_plugins(data),\n            data_sources=self._parse_data_sources(data),\n            metadata=data.get(\"metadata\", {}),\n            name=data.get(\"name\"),\n            description=data.get(\"description\"),\n        )\n\n\ndef get_dir(path):\n    if \"://\" in path:\n        protocol, _ = split_protocol(path)\n        out = get_filesystem_class(protocol)._parent(path)\n        if \"://\" not in out:\n            # some FSs strip this, some do not\n            out = protocol + \"://\" + out\n        return out\n    path = make_path_posix(os.path.join(os.getcwd(), os.path.dirname(path)))\n    if path[-1] != \"/\":\n        path += \"/\"\n    return path\n\n\nclass YAMLFileCatalog(Catalog):\n    \"\"\"Catalog as described by a single YAML file\"\"\"\n\n    version = __version__\n    container = \"catalog\"\n    partition_access = None\n    name = \"yaml_file_cat\"\n\n    def __init__(self, path=None, text=None, autoreload=True, **kwargs):\n        \"\"\"\n        Parameters\n        ----------\n        path: str\n            Location of the file to parse (can be remote)\n        text: str (DEPRECATED)\n            YAML contents of catalog, takes precedence over path\n        autoreload : bool\n            Whether to watch the source file for changes; make False if you want\n            an editable Catalog\n        \"\"\"\n        self.path = path\n        if text is not None:\n            logger.warning(\"YAMLFileCatalog `text` argument is deprecated\")\n            warnings.warn(\"`text` argument is deprecated\", DeprecationWarning)\n            self.parse(text)\n            self.autoreload = False\n        else:\n            self.autoreload = autoreload  # set this to False if don't want reloads\n        self.filesystem = kwargs.pop(\"fs\", None)\n        self.access = \"name\" not in kwargs\n        super(YAMLFileCatalog, self).__init__(**kwargs)\n\n    def _load(self, reload=False):\n        \"\"\"Load text of catalog file and pass to parse\n\n        Will do nothing if auto-reload is off and reload is not explicitly\n        requested\n        \"\"\"\n        if self.access is False:\n            # skip first load, if cat has given name (i.e., is subcat)\n            self.updated = 0\n            self.access = True\n            return\n        if self.autoreload or reload:\n            # First, we load from YAML, failing if syntax errors are found\n            options = self.storage_options or {}\n            if hasattr(self.path, \"path\") or hasattr(self.path, \"read\"):\n                file_open = self.path\n                self.path = make_path_posix(\n                    getattr(self.path, \"path\", getattr(self.path, \"name\", \"file\"))\n                )\n            elif self.filesystem is None:\n                file_open = open_files(self.path, mode=\"rb\", **options)\n                assert len(file_open) == 1\n                file_open = file_open[0]\n                self.filesystem = file_open.fs\n            else:\n                file_open = self.filesystem.open(self.path, mode=\"rb\")\n            self._dir = get_dir(self.path)\n\n            with file_open as f:\n                text = f.read().decode()\n            if \"!template \" in text:\n                logger.warning(\"Use of '!template' deprecated - fixing\")\n                text = text.replace(\"!template \", \"\")\n            self.parse(text)\n\n    def add(self, source, name=None, path=None, storage_options=None):\n        \"\"\"Add sources to the catalog and save into the original file\n\n        This adds the source into the catalog dictionary, and saves the\n        resulting catalog as YAML. Typically, this would be used to update a\n        catalog file in-place. Optionally, the new catalog can be saved to a\n        new location, in which case the new catalog is returned.\n\n        Note that if a source of the given name exists, it will be clobbered.\n\n        Parameters\n        ----------\n        source : DataSource instance\n            The source whose spec we want to save\n        name : str or None\n            The name the source is to have in the catalog; use the source's\n            name attribute, if not given.\n        path : str or None\n            Location to save the new catalog; if None, the original location\n            from which it was loaded\n        storage_options : dict or None\n            If saving to a new location, use these arguments for the filesystem\n            backend\n\n        Returns\n        -------\n        YAMLFileCatalog instance, containing the new entry\n        \"\"\"\n        import yaml\n\n        entries = self._entries.copy()\n        name = name or source.name or \"source\"\n        entries[name] = source\n\n        if path is None:\n            options = self.storage_options or {}\n            file_open = open_files([self.path], mode=\"wt\", **options)\n        else:\n            options = storage_options or {}\n            file_open = open_files([path], mode=\"wt\", **options)\n        assert len(file_open) == 1\n        file_open = file_open[0]\n\n        data = {\"metadata\": self.metadata, \"sources\": {}}\n        for e in entries:\n            data[\"sources\"][e] = list(entries[e]._yaml()[\"sources\"].values())[0]\n        with file_open as f:\n            yaml.dump(data, f, default_flow_style=False)\n\n        if path:\n            return self\n        else:\n            return YAMLFileCatalog(\n                self.path, storage_options=storage_options, autoreload=self.autoreload\n            )\n\n    def parse(self, text):\n        \"\"\"Create entries from catalog text\n\n        Normally the text comes from the file at self.path via the ``_load()``\n        method, but could be explicitly set instead. A copy of the text is\n        kept in attribute ``.text`` .\n\n        Parameters\n        ----------\n        text : str\n            YAML formatted catalog spec\n        \"\"\"\n        self.text = text\n        data = yaml_load(self.text)\n\n        if data is None:\n            raise exceptions.CatalogException(\"No YAML data in file\")\n\n        # Second, we validate the schema and semantics\n        context = dict(root=self._dir)\n        result = CatalogParser(data, context=context, getenv=self.getenv, getshell=self.getshell)\n        if result.errors:\n            raise exceptions.ValidationError(\n                \"Catalog '{}' has validation errors:\\n\\n{}\"\n                \"\".format(self.path, \"\\n\".join(result.errors)),\n                result.errors,\n            )\n\n        cfg = result.data\n\n        self._entries = {}\n        shared_parameters = data.get(\"metadata\", {}).get(\"parameters\", {})\n        self.user_parameters.update(\n            {name: UserParameter(name, **attrs) for name, attrs in shared_parameters.items()}\n        )\n\n        for entry in cfg[\"data_sources\"]:\n            entry._catalog = self\n            self._entries[entry.name] = entry\n            entry._filesystem = self.filesystem\n\n        meta = self.metadata.copy()\n        meta.update(cfg.get(\"metadata\", {}))\n        self.metadata = meta\n        self.name = self.name or cfg.get(\"name\") or self.name_from_path\n        self.description = self.description or cfg.get(\"description\")\n\n    @property\n    def name_from_path(self):\n        \"\"\"If catalog is named 'catalog' take name from parent directory\"\"\"\n        name = os.path.splitext(os.path.basename(self.path))[0]\n        if name == \"catalog\":\n            name = os.path.basename(os.path.dirname(self.path))\n        return name.replace(\".\", \"_\")\n\n\nclass YAMLFilesCatalog(Catalog):\n    \"\"\"Catalog as described by a multiple YAML files\"\"\"\n\n    version = (__version__,)\n    container = \"catalog\"\n    partition_access = None\n    name = \"yaml_files_cat\"\n\n    def __init__(self, path, flatten=True, **kwargs):\n        \"\"\"\n        Parameters\n        ----------\n        path: str\n            Location of the files to parse (can be remote), including possible\n            glob (*) character(s). Can also be list of paths, without glob\n            characters.\n        flatten: bool (True)\n            Whether to list all entries in the cats at the top level (True)\n            or create sub-cats from each file (False).\n        \"\"\"\n        self.path = path\n        self._flatten = flatten\n        self._kwargs = kwargs.copy()\n        self._cat_files = []\n        self._cats = {}\n        self.access = \"name\" not in kwargs\n        super(YAMLFilesCatalog, self).__init__(**kwargs)\n\n    def _load(self):\n        # initial: find cat files\n        # if flattening, need to get all entries from each.\n        if self.access is False:\n            # skip first load, if cat has given name (i.e., is subcat)\n            self.updated = 0\n            self.access = True\n            return\n        self._entries.clear()\n        options = self.storage_options or {}\n        if isinstance(self.path, (list, tuple)):\n            files = sum([open_files(p, mode=\"rb\", **options) for p in self.path], [])\n            self.name = self.name or \"%i files\" % len(files)\n            self.description = self.description or f\"Catalog generated from {len(files)} files\"\n            self.path = [make_path_posix(p) for p in self.path]\n        else:\n            if isinstance(self.path, str) and \"*\" not in self.path:\n                self.path = self.path + \"/*\"\n            files = open_files(self.path, mode=\"rb\", **options)\n            self.path = make_path_posix(self.path)\n            self.name = self.name or self.path\n            self.description = (\n                self.description or f\"Catalog generated from all files found in {self.path}\"\n            )\n        if not set(f.path for f in files) == set(f.path for f in self._cat_files):\n            # glob changed, reload all\n            self._cat_files = files\n            self._cats.clear()\n        for f in files:\n            name = os.path.split(f.path)[-1].replace(\".yaml\", \"\").replace(\".yml\", \"\")\n            kwargs = self.kwargs.copy()\n            kwargs[\"path\"] = f.path\n            d = make_path_posix(os.path.dirname(f.path))\n            if f.path not in self._cats:\n                entry = LocalCatalogEntry(\n                    name,\n                    \"YAML file: %s\" % name,\n                    \"yaml_file_cat\",\n                    True,\n                    kwargs,\n                    [],\n                    [],\n                    self.metadata,\n                    d,\n                )\n                if self._flatten:\n                    # store a concrete Catalog\n                    try:\n                        cat = entry()\n                        cat.reload()\n                        self.user_parameters.update(cat.user_parameters)\n                        self._cats[f.path] = cat\n                    except IOError as e:\n                        logger.info('Loading \"%s\" as a catalog failed: %s' \"\" % (entry, e))\n                else:\n                    # store a catalog entry\n                    self._cats[f.path] = entry\n        entries = {}\n        for name, entry in list(self._cats.items()):\n            if self._flatten:\n                entry.reload()\n                inter = set(entry._entries).intersection(entries)\n                if inter:\n                    raise ValueError(\n                        \"Conflicting names when flattening multiple\"\n                        \" catalogs. Sources %s exist in more than\"\n                        \" one\" % inter\n                    )\n                entries.update(entry._entries)\n            else:\n                entries[entry._name] = entry\n        self._entries.update(entries)\n\n\nclass MergedCatalog(Catalog):\n    \"\"\"\n    A Catalog that merges the entries of a list of catalogs.\n    \"\"\"\n\n    def __init__(self, catalogs, *args, **kwargs):\n        self._catalogs = catalogs\n        super().__init__(*args, **kwargs)\n\n    def _load(self):\n        for catalog in self._catalogs:\n            catalog._load()\n        self._entries = collections.ChainMap(*(catalog._entries for catalog in self._catalogs))\n\n\nclass EntrypointEntry(CatalogEntry):\n    \"\"\"\n    A catalog entry for an entrypoint.\n\n    \"\"\"\n\n    def __init__(self, entrypoint):\n        self._entrypoint = entrypoint\n        self._container = None\n        self._user_parameters = []\n        super().__init__()\n\n    def __repr__(self):\n        return f\"<Entry '{self.name}'>\"\n\n    @property\n    def name(self):\n        return self._entrypoint.name\n\n    def describe(self):\n        \"\"\"Basic information about this entry\"\"\"\n        if self._container is None:\n            self._container = self().container\n        return {\n            \"name\": self.name,\n            \"module_name\": self._entrypoint.module_name,\n            \"object_name\": self._entrypoint.object_name,\n            \"distro\": self._entrypoint.distro,\n            \"extras\": self._entrypoint.extras,\n            \"container\": self._container,\n        }\n\n    def __call__(self, **kwargs):\n        \"\"\"Instantiate the DataSource for the given parameters\"\"\"\n        source = self._entrypoint.load()\n        if kwargs:\n            source = source.configure_new(**kwargs)\n        return source\n\n    get = __call__\n\n\nclass EntrypointsCatalog(Catalog):\n    \"\"\"\n    A catalog of discovered entrypoint catalogs.\n    \"\"\"\n\n    def __init__(self, *args, entrypoints_group=\"intake.catalogs\", paths=None, **kwargs):\n        self._entrypoints_group = entrypoints_group\n        self._paths = paths\n        super().__init__(*args, **kwargs)\n\n    def _load(self):\n        eps = entry_points()\n        if hasattr(eps, \"select\"):  # Python 3.10+ / importlib_metadata >= 3.9.0\n            catalogs = eps.select(group=self._entrypoints_group)\n            catalogs = {ep.name: ep for ep in catalogs}\n        else:\n            catalogs = eps.get(\"intake.drivers\", [])\n        self.name = self.name or \"EntrypointsCatalog\"\n        self.description = self.description or f\"EntrypointsCatalog of {len(catalogs)} catalogs.\"\n        for name, entrypoint in catalogs.items():\n            try:\n                self._entries[name] = EntrypointEntry(entrypoint)\n            except Exception as e:\n                warnings.warn(f\"Failed to load {name}, {entrypoint}, {e!r}.\")\n\n\n# Register these early in the import process to support the default catalog\n# which is built at import time. (Without this, 'yaml_file_cat' is looked for\n# in intake.registry before the registry has been populated.)\nregister_driver(\"yaml_file_cat\", YAMLFileCatalog, clobber=True)\nregister_driver(\"yaml_files_cat\", YAMLFilesCatalog, clobber=True)\n"
  },
  {
    "path": "intake/catalog/tests/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n"
  },
  {
    "path": "intake/catalog/tests/cache_data/states.csv",
    "content": "\"state\",\"slug\",\"code\",\"nickname\",\"website\",\"admission_date\",\"admission_number\",\"capital_city\",\"capital_url\",\"population\",\"population_rank\",\"constitution_url\",\"state_flag_url\",\"state_seal_url\",\"map_image_url\",\"landscape_background_url\",\"skyline_background_url\",\"twitter_url\",\"facebook_url\"\n\"Alabama\",\"alabama\",\"AL\",\"Yellowhammer State\",\"http://www.alabama.gov\",\"1819-12-14\",22,\"Montgomery\",\"http://www.montgomeryal.gov\",4833722,23,\"http://alisondb.legislature.state.al.us/alison/default.aspx\",\"https://cdn.civil.services/us-states/flags/alabama-large.png\",\"https://cdn.civil.services/us-states/seals/alabama-large.png\",\"https://cdn.civil.services/us-states/maps/alabama-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/alabama.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/alabama.jpg\",\"https://twitter.com/alabamagov\",\"https://www.facebook.com/alabamagov\"\n\"Alaska\",\"alaska\",\"AK\",\"The Last Frontier\",\"http://alaska.gov\",\"1959-01-03\",49,\"Juneau\",\"http://www.juneau.org\",735132,47,\"http://www.legis.state.ak.us/basis/folioproxy.asp?url=http://wwwjnu01.legis.state.ak.us/cgi-bin/folioisa.dll/acontxt/query=*/doc/{t1}?\",\"https://cdn.civil.services/us-states/flags/alaska-large.png\",\"https://cdn.civil.services/us-states/seals/alaska-large.png\",\"https://cdn.civil.services/us-states/maps/alaska-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/alaska.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/alaska.jpg\",\"https://twitter.com/alaska\",\"https://www.facebook.com/AlaskaLocalGovernments\"\n\"Arizona\",\"arizona\",\"AZ\",\"The Grand Canyon State\",\"https://az.gov\",\"1912-02-14\",48,\"Phoenix\",\"https://www.phoenix.gov\",6626624,15,\"http://www.azleg.gov/Constitution.asp\",\"https://cdn.civil.services/us-states/flags/arizona-large.png\",\"https://cdn.civil.services/us-states/seals/arizona-large.png\",\"https://cdn.civil.services/us-states/maps/arizona-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/arizona.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/arizona.jpg\",,\n\"Arkansas\",\"arkansas\",\"AR\",\"The Natural State\",\"http://arkansas.gov\",\"1836-06-15\",25,\"Little Rock\",\"http://www.littlerock.org\",2959373,32,\"http://www.arkleg.state.ar.us/assembly/Summary/ArkansasConstitution1874.pdf\",\"https://cdn.civil.services/us-states/flags/arkansas-large.png\",\"https://cdn.civil.services/us-states/seals/arkansas-large.png\",\"https://cdn.civil.services/us-states/maps/arkansas-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/arkansas.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/arkansas.jpg\",\"https://twitter.com/arkansasgov\",\"https://www.facebook.com/Arkansas.gov\"\n\"California\",\"california\",\"CA\",\"Golden State\",\"http://www.ca.gov\",\"1850-09-09\",31,\"Sacramento\",\"http://www.cityofsacramento.org\",38332521,1,\"http://www.leginfo.ca.gov/const-toc.html\",\"https://cdn.civil.services/us-states/flags/california-large.png\",\"https://cdn.civil.services/us-states/seals/california-large.png\",\"https://cdn.civil.services/us-states/maps/california-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/california.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/california.jpg\",\"https://twitter.com/cagovernment\",\n\"Colorado\",\"colorado\",\"CO\",\"The Centennial State\",\"https://www.colorado.gov\",\"1876-08-01\",38,\"Denver\",\"http://www.denvergov.org\",5268367,22,\"https://www.colorado.gov/pacific/archives/government\",\"https://cdn.civil.services/us-states/flags/colorado-large.png\",\"https://cdn.civil.services/us-states/seals/colorado-large.png\",\"https://cdn.civil.services/us-states/maps/colorado-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/colorado.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/colorado.jpg\",\"https://twitter.com/coloradogov\",\"https://www.facebook.com/Colorado.gov\"\n\"Connecticut\",\"connecticut\",\"CT\",\"Constitution State\",\"http://www.ct.gov\",\"1788-01-09\",5,\"Hartford\",\"http://www.hartford.gov\",3596080,29,\"http://www.ct.gov/sots/cwp/view.asp?a=3188&q=392288\",\"https://cdn.civil.services/us-states/flags/connecticut-large.png\",\"https://cdn.civil.services/us-states/seals/connecticut-large.png\",\"https://cdn.civil.services/us-states/maps/connecticut-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/connecticut.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/connecticut.jpg\",,\n\"Delaware\",\"delaware\",\"DE\",\"The First State / The Diamond State\",\"http://delaware.gov\",\"1787-12-07\",1,\"Dover\",\"http://www.cityofdover.com\",925749,45,\"http://www.state.de.us/facts/constit/welcome.htm\",\"https://cdn.civil.services/us-states/flags/delaware-large.png\",\"https://cdn.civil.services/us-states/seals/delaware-large.png\",\"https://cdn.civil.services/us-states/maps/delaware-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/delaware.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/delaware.jpg\",\"https://twitter.com/delaware_gov\",\"https://www.facebook.com/delaware.gov\"\n\"Florida\",\"florida\",\"FL\",\"Sunshine State\",\"http://www.myflorida.com\",\"1845-03-03\",27,\"Tallahassee\",\"https://www.talgov.com/Main/Home.aspx\",19552860,4,\"http://www.leg.state.fl.us/Statutes/index.cfm\",\"https://cdn.civil.services/us-states/flags/florida-large.png\",\"https://cdn.civil.services/us-states/seals/florida-large.png\",\"https://cdn.civil.services/us-states/maps/florida-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/florida.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/florida.jpg\",,\n\"Georgia\",\"georgia\",\"GA\",\"Peach State\",\"http://georgia.gov\",\"1788-01-02\",4,\"Atlanta\",\"http://www.atlantaga.gov\",9992167,8,\"http://sos.ga.gov/admin/files/Constitution_2013_Final_Printed.pdf\",\"https://cdn.civil.services/us-states/flags/georgia-large.png\",\"https://cdn.civil.services/us-states/seals/georgia-large.png\",\"https://cdn.civil.services/us-states/maps/georgia-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/georgia.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/georgia.jpg\",\"http://twitter.com/georgiagov\",\"http://www.facebook.com/pages/georgiagov/29760668054\"\n\"Hawaii\",\"hawaii\",\"HI\",\"Aloha State\",\"https://www.ehawaii.gov\",\"1959-08-21\",50,\"Honolulu\",\"http://www.co.honolulu.hi.us\",1404054,40,\"http://lrbhawaii.org/con\",\"https://cdn.civil.services/us-states/flags/hawaii-large.png\",\"https://cdn.civil.services/us-states/seals/hawaii-large.png\",\"https://cdn.civil.services/us-states/maps/hawaii-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/hawaii.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/hawaii.jpg\",\"https://twitter.com/ehawaiigov\",\"https://www.facebook.com/ehawaii.gov\"\n\"Idaho\",\"idaho\",\"ID\",\"Gem State\",\"https://www.idaho.gov\",\"1890-07-03\",43,\"Boise\",\"http://www.cityofboise.org\",1612136,39,\"http://www.legislature.idaho.gov/idstat/IC/Title003.htm\",\"https://cdn.civil.services/us-states/flags/idaho-large.png\",\"https://cdn.civil.services/us-states/seals/idaho-large.png\",\"https://cdn.civil.services/us-states/maps/idaho-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/idaho.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/idaho.jpg\",\"https://twitter.com/IDAHOgov\",\n\"Illinois\",\"illinois\",\"IL\",\"Prairie State\",\"https://www.illinois.gov\",\"1818-12-03\",21,\"Springfield\",\"http://www.springfield.il.us\",12882135,5,\"http://www.ilga.gov/commission/lrb/conmain.htm\",\"https://cdn.civil.services/us-states/flags/illinois-large.png\",\"https://cdn.civil.services/us-states/seals/illinois-large.png\",\"https://cdn.civil.services/us-states/maps/illinois-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/illinois.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/illinois.jpg\",,\n\"Indiana\",\"indiana\",\"IN\",\"Hoosier State\",\"http://www.in.gov\",\"1816-12-11\",19,\"Indianapolis\",\"http://www.indy.gov/Pages/Home.aspx\",6570902,16,\"http://www.law.indiana.edu/uslawdocs/inconst.html\",\"https://cdn.civil.services/us-states/flags/indiana-large.png\",\"https://cdn.civil.services/us-states/seals/indiana-large.png\",\"https://cdn.civil.services/us-states/maps/indiana-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/indiana.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/indiana.jpg\",\"https://twitter.com/in_gov\",\"https://www.facebook.com/IndianaGovernment\"\n\"Iowa\",\"iowa\",\"IA\",\"Hawkeye State\",\"https://www.iowa.gov\",\"1846-12-28\",29,\"Des Moines\",\"http://www.ci.des-moines.ia.us\",3090416,30,\"http://publications.iowa.gov/135/1/history/7-7.html\",\"https://cdn.civil.services/us-states/flags/iowa-large.png\",\"https://cdn.civil.services/us-states/seals/iowa-large.png\",\"https://cdn.civil.services/us-states/maps/iowa-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/iowa.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/iowa.jpg\",\"https://twitter.com/IAGOVTWEETS\",\n\"Kansas\",\"kansas\",\"KS\",\"Sunflower State\",\"https://www.kansas.gov\",\"1861-01-29\",34,\"Topeka\",\"http://www.topeka.org\",2893957,34,\"https://kslib.info/405/Kansas-Constitution\",\"https://cdn.civil.services/us-states/flags/kansas-large.png\",\"https://cdn.civil.services/us-states/seals/kansas-large.png\",\"https://cdn.civil.services/us-states/maps/kansas-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/kansas.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/kansas.jpg\",\"http://www.twitter.com/ksgovernment\",\"http://www.facebook.com/pages/Topeka-KS/Kansasgov-Kansas-Government-Online/52068474220\"\n\"Kentucky\",\"kentucky\",\"KY\",\"Bluegrass State\",\"http://kentucky.gov\",\"1792-06-01\",15,\"Frankfort\",\"http://frankfort.ky.gov\",4395295,26,\"http://www.lrc.state.ky.us/Legresou/Constitu/intro.htm\",\"https://cdn.civil.services/us-states/flags/kentucky-large.png\",\"https://cdn.civil.services/us-states/seals/kentucky-large.png\",\"https://cdn.civil.services/us-states/maps/kentucky-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/kentucky.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/kentucky.jpg\",\"https://twitter.com/kygov\",\"https://www.facebook.com/kygov\"\n\"Louisiana\",\"louisiana\",\"LA\",\"Pelican State\",\"http://louisiana.gov\",\"1812-04-30\",18,\"Baton Rouge\",\"http://brgov.com\",4625470,25,\"http://senate.legis.state.la.us/Documents/Constitution\",\"https://cdn.civil.services/us-states/flags/louisiana-large.png\",\"https://cdn.civil.services/us-states/seals/louisiana-large.png\",\"https://cdn.civil.services/us-states/maps/louisiana-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/louisiana.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/louisiana.jpg\",,\n\"Maine\",\"maine\",\"ME\",\"Pine Tree State\",\"http://www.maine.gov\",\"1820-03-15\",23,\"Augusta\",\"http://www.augustamaine.gov\",1328302,41,\"http://www.maine.gov/legis/const\",\"https://cdn.civil.services/us-states/flags/maine-large.png\",\"https://cdn.civil.services/us-states/seals/maine-large.png\",\"https://cdn.civil.services/us-states/maps/maine-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/maine.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/maine.jpg\",\"https://twitter.com/mainegov_news\",\"http://www.facebook.com/pages/Augusta-ME/Mainegov/98519328240\"\n\"Maryland\",\"maryland\",\"MD\",\"Old Line State\",\"http://www.maryland.gov\",\"1788-04-28\",7,\"Annapolis\",\"http://www.annapolis.gov\",5928814,19,\"http://msa.maryland.gov/msa/mdmanual/43const/html/const.html\",\"https://cdn.civil.services/us-states/flags/maryland-large.png\",\"https://cdn.civil.services/us-states/seals/maryland-large.png\",\"https://cdn.civil.services/us-states/maps/maryland-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/maryland.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/maryland.jpg\",\"https://twitter.com/statemaryland\",\"https://www.facebook.com/statemaryland\"\n\"Massachusetts\",\"massachusetts\",\"MA\",\"Bay State\",\"http://www.mass.gov\",\"1788-02-06\",6,\"Boston\",\"http://www.ci.boston.ma.us\",6692824,14,\"http://www.state.ma.us/legis/const.htm\",\"https://cdn.civil.services/us-states/flags/massachusetts-large.png\",\"https://cdn.civil.services/us-states/seals/massachusetts-large.png\",\"https://cdn.civil.services/us-states/maps/massachusetts-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/massachusetts.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/massachusetts.jpg\",\"http://twitter.com/massgov\",\"https://www.facebook.com/massgov\"\n\"Michigan\",\"michigan\",\"MI\",\"Wolverine State / Great Lakes State\",\"http://www.michigan.gov\",\"1837-01-26\",26,\"Lansing\",\"http://cityoflansingmi.com\",9895622,9,\"http://www.legislature.mi.gov/(S(hrowl12tg05hemnnkidim1jb))/mileg.aspx?page=GetObject&objectname=mcl-Constitution\",\"https://cdn.civil.services/us-states/flags/michigan-large.png\",\"https://cdn.civil.services/us-states/seals/michigan-large.png\",\"https://cdn.civil.services/us-states/maps/michigan-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/michigan.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/michigan.jpg\",\"https://twitter.com/migov\",\"https://www.facebook.com/MIgovernment\"\n\"Minnesota\",\"minnesota\",\"MN\",\"North Star State / Land of 10,000 Lakes\",\"https://mn.gov\",\"1858-05-11\",32,\"Saint Paul\",\"http://www.stpaul.gov\",5420380,21,\"http://www.house.leg.state.mn.us/cco/rules/mncon/preamble.htm\",\"https://cdn.civil.services/us-states/flags/minnesota-large.png\",\"https://cdn.civil.services/us-states/seals/minnesota-large.png\",\"https://cdn.civil.services/us-states/maps/minnesota-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/minnesota.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/minnesota.jpg\",,\n\"Mississippi\",\"mississippi\",\"MS\",\"Magnolia State\",\"http://www.ms.gov\",\"1817-12-10\",20,\"Jackson\",\"http://www.city.jackson.ms.us\",2991207,31,\"http://law.justia.com/constitution/mississippi\",\"https://cdn.civil.services/us-states/flags/mississippi-large.png\",\"https://cdn.civil.services/us-states/seals/mississippi-large.png\",\"https://cdn.civil.services/us-states/maps/mississippi-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/mississippi.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/mississippi.jpg\",\"https://twitter.com/msdotgov\",\"https://www.facebook.com/msdotgov\"\n\"Missouri\",\"missouri\",\"MO\",\"Show Me State\",\"https://www.mo.gov\",\"1821-08-10\",24,\"Jefferson City\",\"http://www.jeffcitymo.org\",6044171,18,\"http://www.moga.mo.gov/mostatutes/moconstn.html\",\"https://cdn.civil.services/us-states/flags/missouri-large.png\",\"https://cdn.civil.services/us-states/seals/missouri-large.png\",\"https://cdn.civil.services/us-states/maps/missouri-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/missouri.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/missouri.jpg\",\"https://twitter.com/MoGov\",\"https://www.facebook.com/mogov\"\n\"Montana\",\"montana\",\"MT\",\"Treasure State\",\"http://mt.gov\",\"1889-11-08\",41,\"Helena\",\"http://www.ci.helena.mt.us\",1015165,44,\"http://courts.mt.gov/content/library/docs/72constit.pdf\",\"https://cdn.civil.services/us-states/flags/montana-large.png\",\"https://cdn.civil.services/us-states/seals/montana-large.png\",\"https://cdn.civil.services/us-states/maps/montana-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/montana.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/montana.jpg\",,\n\"Nebraska\",\"nebraska\",\"NE\",\"Cornhusker State\",\"http://www.nebraska.gov\",\"1867-03-01\",37,\"Lincoln\",\"http://lincoln.ne.gov\",1868516,37,\"http://www.state.ne.us/legislative/statutes/C\",\"https://cdn.civil.services/us-states/flags/nebraska-large.png\",\"https://cdn.civil.services/us-states/seals/nebraska-large.png\",\"https://cdn.civil.services/us-states/maps/nebraska-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/nebraska.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/nebraska.jpg\",\"https://twitter.com/Nebraskagov\",\"https://www.facebook.com/nebraska.gov\"\n\"Nevada\",\"nevada\",\"NV\",\"The Silver State\",\"http://nv.gov\",\"1864-10-31\",36,\"Carson City\",\"http://www.carson.org\",2790136,35,\"http://www.leg.state.nv.us/Const/NvConst.html\",\"https://cdn.civil.services/us-states/flags/nevada-large.png\",\"https://cdn.civil.services/us-states/seals/nevada-large.png\",\"https://cdn.civil.services/us-states/maps/nevada-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/nevada.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/nevada.jpg\",,\n\"New Hampshire\",\"new-hampshire\",\"NH\",\"Granite State\",\"https://www.nh.gov\",\"1788-06-21\",9,\"Concord\",\"http://www.concordnh.gov\",1323459,42,\"http://www.state.nh.us/constitution/constitution.html\",\"https://cdn.civil.services/us-states/flags/new-hampshire-large.png\",\"https://cdn.civil.services/us-states/seals/new-hampshire-large.png\",\"https://cdn.civil.services/us-states/maps/new-hampshire-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/new-hampshire.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/new-hampshire.jpg\",\"https://twitter.com/nhgov\",\n\"New Jersey\",\"new-jersey\",\"NJ\",\"Garden State\",\"http://www.state.nj.us\",\"1787-12-18\",3,\"Trenton\",\"http://www.trentonnj.org\",8899339,11,\"http://www.njleg.state.nj.us/lawsconstitution/consearch.asp\",\"https://cdn.civil.services/us-states/flags/new-jersey-large.png\",\"https://cdn.civil.services/us-states/seals/new-jersey-large.png\",\"https://cdn.civil.services/us-states/maps/new-jersey-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/new-jersey.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/new-jersey.jpg\",,\n\"New Mexico\",\"new-mexico\",\"NM\",\"Land of Enchantment\",\"http://www.newmexico.gov\",\"1912-01-06\",47,\"Santa Fe\",\"http://www.santafenm.gov\",2085287,36,\"http://www.loc.gov/law/guide/us-nm.html\",\"https://cdn.civil.services/us-states/flags/new-mexico-large.png\",\"https://cdn.civil.services/us-states/seals/new-mexico-large.png\",\"https://cdn.civil.services/us-states/maps/new-mexico-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/new-mexico.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/new-mexico.jpg\",,\n\"New York\",\"new-york\",\"NY\",\"Empire State\",\"http://www.ny.gov\",\"1788-07-26\",11,\"Albany\",\"http://www.albanyny.org\",19651127,3,\"https://www.dos.ny.gov/info/constitution.htm\",\"https://cdn.civil.services/us-states/flags/new-york-large.png\",\"https://cdn.civil.services/us-states/seals/new-york-large.png\",\"https://cdn.civil.services/us-states/maps/new-york-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/new-york.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/new-york.jpg\",\"https://twitter.com/nygov\",\n\"North Carolina\",\"north-carolina\",\"NC\",\"Old North State / Tar Heel State\",\"http://www.nc.gov\",\"1789-11-21\",12,\"Raleigh\",\"http://www.raleigh-nc.org\",9848060,10,\"http://statelibrary.dcr.state.nc.us/nc/stgovt/preconst.htm\",\"https://cdn.civil.services/us-states/flags/north-carolina-large.png\",\"https://cdn.civil.services/us-states/seals/north-carolina-large.png\",\"https://cdn.civil.services/us-states/maps/north-carolina-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/north-carolina.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/north-carolina.jpg\",\"https://twitter.com/NCdotGov\",\n\"North Dakota\",\"north-dakota\",\"ND\",\"Peace Garden State / Flickertail State / Roughrider State\",\"http://www.nd.gov\",\"1889-11-02\",39,\"Bismarck\",\"http://www.bismarck.org\",723393,48,\"http://www.legis.nd.gov/information/statutes/const-laws.html\",\"https://cdn.civil.services/us-states/flags/north-dakota-large.png\",\"https://cdn.civil.services/us-states/seals/north-dakota-large.png\",\"https://cdn.civil.services/us-states/maps/north-dakota-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/north-dakota.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/north-dakota.jpg\",\"https://twitter.com/ExperienceND\",\"https://www.facebook.com/ExperienceND\"\n\"Ohio\",\"ohio\",\"OH\",\"Buckeye State\",\"https://ohio.gov\",\"1803-03-01\",17,\"Columbus\",\"http://ci.columbus.oh.us\",11570808,7,\"http://www.legislature.state.oh.us/constitution.cfm\",\"https://cdn.civil.services/us-states/flags/ohio-large.png\",\"https://cdn.civil.services/us-states/seals/ohio-large.png\",\"https://cdn.civil.services/us-states/maps/ohio-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/ohio.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/ohio.jpg\",\"https://twitter.com/ohgov\",\n\"Oklahoma\",\"oklahoma\",\"OK\",\"Sooner State\",\"https://www.ok.gov\",\"1907-11-16\",46,\"Oklahoma City\",\"http://www.okc.gov\",3850568,28,\"http://oklegal.onenet.net/okcon\",\"https://cdn.civil.services/us-states/flags/oklahoma-large.png\",\"https://cdn.civil.services/us-states/seals/oklahoma-large.png\",\"https://cdn.civil.services/us-states/maps/oklahoma-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/oklahoma.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/oklahoma.jpg\",\"https://twitter.com/okgov\",\"https://www.facebook.com/okgov\"\n\"Oregon\",\"oregon\",\"OR\",\"Beaver State\",\"http://www.oregon.gov\",\"1859-02-14\",33,\"Salem\",\"http://www.cityofsalem.net/Pages/default.aspx\",3930065,27,\"http://bluebook.state.or.us/state/constitution/constitution.htm\",\"https://cdn.civil.services/us-states/flags/oregon-large.png\",\"https://cdn.civil.services/us-states/seals/oregon-large.png\",\"https://cdn.civil.services/us-states/maps/oregon-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/oregon.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/oregon.jpg\",,\n\"Pennsylvania\",\"pennsylvania\",\"PA\",\"Keystone State\",\"http://www.pa.gov\",\"1787-12-12\",2,\"Harrisburg\",\"http://harrisburgpa.gov\",12773801,6,\"http://sites.state.pa.us/PA_Constitution.html\",\"https://cdn.civil.services/us-states/flags/pennsylvania-large.png\",\"https://cdn.civil.services/us-states/seals/pennsylvania-large.png\",\"https://cdn.civil.services/us-states/maps/pennsylvania-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/pennsylvania.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/pennsylvania.jpg\",\"https://www.facebook.com/visitPA\",\"https://twitter.com/visitPA\"\n\"Rhode Island\",\"rhode-island\",\"RI\",\"The Ocean State\",\"https://www.ri.gov\",\"1790-05-29\",13,\"Providence\",\"http://www.providenceri.com\",1051511,43,\"http://webserver.rilin.state.ri.us/RiConstitution\",\"https://cdn.civil.services/us-states/flags/rhode-island-large.png\",\"https://cdn.civil.services/us-states/seals/rhode-island-large.png\",\"https://cdn.civil.services/us-states/maps/rhode-island-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/rhode-island.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/rhode-island.jpg\",\"https://twitter.com/rigov\",\"https://www.facebook.com/RIgov-Rhode-Island-Government-Online-24056655991\"\n\"South Carolina\",\"south-carolina\",\"SC\",\"Palmetto State\",\"http://www.sc.gov\",\"1788-05-23\",8,\"Columbia\",\"http://www.columbiasc.net\",4774839,24,\"http://www.scstatehouse.gov/scconstitution/scconst.php\",\"https://cdn.civil.services/us-states/flags/south-carolina-large.png\",\"https://cdn.civil.services/us-states/seals/south-carolina-large.png\",\"https://cdn.civil.services/us-states/maps/south-carolina-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/south-carolina.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/south-carolina.jpg\",\"https://twitter.com/scgov\",\"http://www.facebook.com/pages/SCgov/12752057990\"\n\"South Dakota\",\"south-dakota\",\"SD\",\"Mount Rushmore State\",\"http://sd.gov\",\"1889-11-02\",40,\"Pierre\",\"http://ci.pierre.sd.us\",844877,46,\"http://legis.sd.gov/statutes/Constitution\",\"https://cdn.civil.services/us-states/flags/south-dakota-large.png\",\"https://cdn.civil.services/us-states/seals/south-dakota-large.png\",\"https://cdn.civil.services/us-states/maps/south-dakota-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/south-dakota.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/south-dakota.jpg\",,\n\"Tennessee\",\"tennessee\",\"TN\",\"Volunteer State\",\"https://www.tn.gov\",\"1796-06-01\",16,\"Nashville\",\"http://www.nashville.gov\",6495978,17,\"http://www.capitol.tn.gov/about/docs/TN-Constitution.pdf\",\"https://cdn.civil.services/us-states/flags/tennessee-large.png\",\"https://cdn.civil.services/us-states/seals/tennessee-large.png\",\"https://cdn.civil.services/us-states/maps/tennessee-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/tennessee.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/tennessee.jpg\",\"https://twitter.com/TNVacation\",\"https://www.facebook.com/tnvacation\"\n\"Texas\",\"texas\",\"TX\",\"Lone Star State\",\"https://www.texas.gov\",\"1845-12-29\",28,\"Austin\",\"http://www.austintexas.gov\",26448193,2,\"http://www.constitution.legis.state.tx.us\",\"https://cdn.civil.services/us-states/flags/texas-large.png\",\"https://cdn.civil.services/us-states/seals/texas-large.png\",\"https://cdn.civil.services/us-states/maps/texas-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/texas.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/texas.jpg\",\"https://twitter.com/texasgov\",\"http://www.facebook.com/Texas.gov\"\n\"Utah\",\"utah\",\"UT\",\"The Beehive State\",\"https://utah.gov\",\"1896-01-04\",45,\"Salt Lake City\",\"http://www.slcgov.com\",2900872,33,\"http://le.utah.gov/UtahCode/chapter.jsp?code=Constitution\",\"https://cdn.civil.services/us-states/flags/utah-large.png\",\"https://cdn.civil.services/us-states/seals/utah-large.png\",\"https://cdn.civil.services/us-states/maps/utah-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/utah.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/utah.jpg\",\"https://twitter.com/UtahGov\",\"https://www.facebook.com/utahgov\"\n\"Vermont\",\"vermont\",\"VT\",\"Green Mountain State\",\"http://vermont.gov\",\"1791-03-04\",14,\"Montpelier\",\"http://www.montpelier-vt.org\",626630,49,\"http://www.leg.state.vt.us/statutes/const2.htm\",\"https://cdn.civil.services/us-states/flags/vermont-large.png\",\"https://cdn.civil.services/us-states/seals/vermont-large.png\",\"https://cdn.civil.services/us-states/maps/vermont-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/vermont.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/vermont.jpg\",\"https://twitter.com/vermontgov\",\"https://www.facebook.com/MyVermont\"\n\"Virginia\",\"virginia\",\"VA\",\"Old Dominion State\",\"https://www.virginia.gov\",\"1788-06-25\",10,\"Richmond\",\"http://www.richmondgov.com\",8260405,12,\"http://hodcap.state.va.us/publications/Constitution-01-13.pdf\",\"https://cdn.civil.services/us-states/flags/virginia-large.png\",\"https://cdn.civil.services/us-states/seals/virginia-large.png\",\"https://cdn.civil.services/us-states/maps/virginia-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/virginia.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/virginia.jpg\",,\n\"Washington\",\"washington\",\"WA\",\"The Evergreen State\",\"http://www.wa.gov\",\"1889-11-11\",42,\"Olympia\",\"http://www.ci.olympia.wa.us\",6971406,13,\"http://www.leg.wa.gov/lawsandagencyrules/pages/constitution.aspx\",\"https://cdn.civil.services/us-states/flags/washington-large.png\",\"https://cdn.civil.services/us-states/seals/washington-large.png\",\"https://cdn.civil.services/us-states/maps/washington-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/washington.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/washington.jpg\",\"https://twitter.com/wagov\",\"\"\n\"West Virginia\",\"west-virginia\",\"WV\",\"Mountain State\",\"http://www.wv.gov\",\"1863-06-20\",35,\"Charleston\",\"http://www.cityofcharleston.org\",1854304,38,\"http://www.legis.state.wv.us/WVCODE/WV_CON.cfm\",\"https://cdn.civil.services/us-states/flags/west-virginia-large.png\",\"https://cdn.civil.services/us-states/seals/west-virginia-large.png\",\"https://cdn.civil.services/us-states/maps/west-virginia-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/west-virginia.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/west-virginia.jpg\",\"https://twitter.com/wvgov\",\"https://www.facebook.com/wvgov\"\n\"Wisconsin\",\"wisconsin\",\"WI\",\"Badger State\",\"https://www.wisconsin.gov\",\"1848-05-29\",30,\"Madison\",\"http://www.ci.madison.wi.us\",5742713,20,\"http://www.legis.state.wi.us/rsb/2wiscon.html\",\"https://cdn.civil.services/us-states/flags/wisconsin-large.png\",\"https://cdn.civil.services/us-states/seals/wisconsin-large.png\",\"https://cdn.civil.services/us-states/maps/wisconsin-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/wisconsin.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/wisconsin.jpg\",,\n\"Wyoming\",\"wyoming\",\"WY\",\"Equality State\",\"http://www.wyo.gov\",\"1890-07-10\",44,\"Cheyenne\",\"http://www.cheyennecity.org\",582658,50,\"http://legisweb.state.wy.us/statutes/constitution.aspx?file=titles/97Title97.htm\",\"https://cdn.civil.services/us-states/flags/wyoming-large.png\",\"https://cdn.civil.services/us-states/seals/wyoming-large.png\",\"https://cdn.civil.services/us-states/maps/wyoming-large.png\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/landscape/wyoming.jpg\",\"https://cdn.civil.services/us-states/backgrounds/1280x720/skyline/wyoming.jpg\",,\n"
  },
  {
    "path": "intake/catalog/tests/catalog.yml",
    "content": "plugins:\n  source:\n    - module: intake.catalog.tests.example1_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n  allowed_list_absolute:\n    description: pick one list out of a few\n    driver: example1\n    args:\n      arg1: \"{{up}}\"\n    parameters:\n      mylist:\n        description: \"\"\n        type: list\n        default: []\n        allowed:\n          - []\n          - [\"one\"]\n          - [\"one\", \"two\"]\n  allowed_list_multi:\n    description: pick multiple values from an allowed list\n    driver: example1\n    args:\n      args1: \"{{up}}\"\n    parameters:\n      mylist:\n        description: \"\"\n        type: list\n        default: []\n        allowed: [\"one\", \"two\", \"three\"]\n"
  },
  {
    "path": "intake/catalog/tests/catalog1.yml",
    "content": "name: name_in_cat\nmetadata:\n  test: true\nplugins:\n  source:\n    - module: intake.catalog.tests.example1_source\n    - module: intake.catalog.tests.example_plugin_dir.example2_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n  nested:\n    description: around again\n    driver: yaml_file_cat\n    args:\n      path: '{{ CATALOG_DIR }}/catalog1.yml'\n  entry1:\n    description: entry1 full\n    metadata:\n      foo: 'bar'\n      bar: [1, 2, 3]\n    driver: csv\n    # Default direct_access is \"forbid\" by default\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_*.csv'\n  entry1_part:\n    description: entry1 part\n    parameters: # User defined parameters\n      part:\n        description: part of filename\n        type: str\n        default: \"1\"\n        allowed: [\"1\", \"2\"]\n    metadata:\n      foo: 'baz'\n      bar: [2, 4, 6]\n    driver: csv\n    direct_access: \"allow\"\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n  remote_env:\n    description: env gets interpreted in server\n    driver: intake.conftest.TestSource\n    args:\n      urlpath: 'path-{{intake_test}}'\n    parameters:\n      intake_test:\n        description: none\n        type: str\n        default: 'env(INTAKE_TEST)'\n  local_env:\n    description: env gets interpreted in client\n    driver: csv\n    args:\n      urlpath: 'path-{{intake_test}}'\n    parameters:\n      intake_test:\n        description: none\n        type: str\n        default: 'client_env(INTAKE_TEST)'\n  text:\n    description: textfiles in this dir\n    driver: textfiles\n    args:\n      urlpath: \"{{ CATALOG_DIR }}/*.yml\"\n  arr:\n    description: small array\n    driver: numpy\n    args:\n      path: \"{{ CATALOG_DIR }}/small.npy\"\n      chunks: 5\n  datetime:\n    description: datetime parameters\n    driver: intake.conftest.TestSource\n    args:\n      urlpath: \"{{time}}\"\n    parameters:\n      time:\n        description: some time\n        type: datetime\n"
  },
  {
    "path": "intake/catalog/tests/catalog_alias.yml",
    "content": "sources:\n  input_data:\n    description: a local data file\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n  arr_cache:\n    description: small array\n    driver: numpy\n    args:\n      path: \"{{ CATALOG_DIR }}/small.npy\"\n      chunks: 5\n  alias0:\n    driver: intake.source.derived.AliasSource\n    args:\n      target: input_data\n  barebones:\n    driver: intake.source.derived.GenericTransform\n    args:\n      targets:\n        - input_data\n      transform: builtins.len\n      transform_kwargs: {}\n  alias1:\n    driver: alias\n    args:\n      target: \"{{choice}}\"\n      mapping:\n        first: input_data\n        second: arr_cache\n    parameters:\n      choice:\n        description: which to alias\n        type: str\n        default: first\n        allowed: [\"first\", \"second\"]\n  derive_cols:\n    driver: intake.source.derived.Columns\n    args:\n      targets:\n        - input_data\n      columns: [\"state\", \"slug\"]\n  derive_cols_func:\n    driver: intake.source.derived.DataFrameTransform\n    args:\n      targets:\n        - input_data\n      transform: \"intake.source.tests.test_derived._pick_columns\"\n      transform_kwargs:\n        columns: [\"state\", \"slug\"]\n  other_cat:\n    driver: intake.source.derived.Columns\n    args:\n      targets:\n        - \"{{CATALOG_DIR}}/catalog1.yml:entry1\"\n      columns: [\"name\", \"score\"]\n  alias_other_cat:\n    driver: alias\n    args:\n      target: \"{{CATALOG_DIR}}/catalog1.yml:entry1\"\n      kwargs:\n        csv_kwargs: {parse_dates: True}\n      cat_kwargs:\n        getenv: true\n"
  },
  {
    "path": "intake/catalog/tests/catalog_caching.yml",
    "content": "metadata:\n  test: true\nplugins:\n  source:\n    - module: intake.catalog.tests.example1_source\n    - module: intake.catalog.tests.example2_source\nsources:\n  test_cache:\n    description: cache a csv file from the local filesystem\n    driver: csv\n    cache:\n      - argkey: urlpath\n        regex: '{{ CATALOG_DIR }}/cache_data'\n        type: file\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n  test_cache_new:\n    description: cache a csv file from the local filesystem\n    driver: csv\n    args:\n      urlpath: 'filecache://{{ CATALOG_DIR }}/cache_data/states.csv'\n      storage_options:\n        target_protocol: 'file'\n        cache_storage: \"{{env(TEST_CACHE_DIR)}}\"\n\n  test_multiple_cache:\n    description: testing what happens when there are multiple cache specs\n    driver: csv\n    cache:\n      - argkey: urlpath\n        regex: '{{ CATALOG_DIR }}/cache_data'\n        type: file\n      - argkey: urlpath\n        regex: '{{ CATALOG_DIR }}'\n        type: file\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n  test_list_cache:\n    description: testing what happens when there are multiple cache specs\n    driver: csv\n    cache:\n      - argkey: urlpath\n        regex: '{{ CATALOG_DIR }}/cache_data'\n        type: file\n    args:\n      urlpath: ['{{ CATALOG_DIR }}/cache_data/states.csv', '{{ CATALOG_DIR }}/cache_data/states.csv']\n  test_bad_type_cache_spec:\n    description: cache a csv file from the local filesystem\n    driver: csv\n    cache:\n      - argkey: urlpath\n        regex: '{{ CATALOG_DIR }}/cache_data'\n        type: noidea\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n  text_cache:\n    description: textfiles in this dir\n    driver: textfiles\n    cache:\n      - argkey: urlpath\n        regex: '{{ CATALOG_DIR }}'\n        type: file\n    args:\n      urlpath: \"{{ CATALOG_DIR }}/*.yml\"\n  arr_cache:\n    description: small array\n    driver: numpy\n    cache:\n      - argkey: path\n        regex: '{{ CATALOG_DIR }}'\n        type: file\n    args:\n      path: \"{{ CATALOG_DIR }}/small.npy\"\n      chunks: 5\n  test_no_regex:\n    description: cache a csv file from the local filesystem\n    driver: csv\n    cache:\n      - argkey: urlpath\n        type: file\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n  test_regex_no_match:\n    description: regex does not match urlpath\n    driver: csv\n    cache:\n      - argkey: urlpath\n        regex: 'xxx'\n        type: file\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n  test_regex_partial_match:\n    description: regex matches some part of the url\n    driver: csv\n    cache:\n      - argkey: urlpath\n        regex: '_data'\n        type: file\n    args:\n      urlpath: '{{ CATALOG_DIR }}/cache_data/states.csv'\n"
  },
  {
    "path": "intake/catalog/tests/catalog_dup_parameters.yml",
    "content": "sources:\n  entry1_part:\n    description: entry1 part\n    parameters:\n      part:\n        description: a\n        type: str\n      part:\n        description: b\n        type: int\n    driver: csv\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n  entry2_part:\n    description: entry2 part\n    parameters:\n      part:\n        description: a\n        type: str\n    driver: csv\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry2_{{ part }}.csv'\n"
  },
  {
    "path": "intake/catalog/tests/catalog_dup_sources.yml",
    "content": "sources:\n  entry1_part:\n    description: entry1 part\n    parameters:\n      part:\n        description: a\n        type: str\n    driver: csv\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n  entry1_part:\n    description: entry1 part\n    parameters:\n      part:\n        description: a\n        type: str\n    driver: csv\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n"
  },
  {
    "path": "intake/catalog/tests/catalog_hierarchy.yml",
    "content": "sources:\n  a.b.c:\n    description: abc\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/entry1_*.csv'\n  a.b.d:\n    description: abc\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/entry1_*.csv'\n  c:\n    description: abc\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/entry1_*.csv'\n  a.c:\n    description: abc\n    driver: csv\n    parameters:\n      part:\n        description: part of filename\n        type: str\n        default: \"1\"\n        allowed: [\"1\", \"2\"]\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n"
  },
  {
    "path": "intake/catalog/tests/catalog_named.yml",
    "content": "name: name_in_spec\ndescription: This is a catalog with a description in the yaml\nmetadata:\n  some: thing\nplugins:\n  source:\n    - module: intake.catalog.tests.example1_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n"
  },
  {
    "path": "intake/catalog/tests/catalog_non_dict.yml",
    "content": "- 1\n- 2\n- 3\n"
  },
  {
    "path": "intake/catalog/tests/catalog_search/example_packages/ep/__init__.py",
    "content": "class TestCatalog:\n    ...\n"
  },
  {
    "path": "intake/catalog/tests/catalog_search/example_packages/ep-0.1.dist-info/entry_points.txt",
    "content": "[intake.catalogs]\nep1 = ep:TestCatalog\n"
  },
  {
    "path": "intake/catalog/tests/catalog_search/yaml.yml",
    "content": "plugins:\n  source:\n    - module: intake.catalog.tests.example1_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n"
  },
  {
    "path": "intake/catalog/tests/catalog_union_1.yml",
    "content": "plugins:\n  source:\n    - module: intake.catalog.tests.example1_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n"
  },
  {
    "path": "intake/catalog/tests/catalog_union_2.yml",
    "content": "plugins:\n  source:\n    - module: intake.catalog.tests.example1_source\n    - module: intake.catalog.tests.example2_source\nsources:\n  entry1:\n    description: entry1 full\n    metadata:\n      foo: 'bar'\n      bar: [1, 2, 3]\n    driver: csv\n    # Default direct_access is \"forbid\" by default\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_*.csv'\n  entry1_part:\n    description: entry1 part\n    parameters: # User defined parameters\n      part:\n        description: part of filename\n        type: str\n        default: \"1\"\n        allowed: [\"1\", \"2\"]\n    metadata:\n      foo: 'baz'\n      bar: [2, 4, 6]\n    driver: csv\n    direct_access: \"allow\"\n    args: # passed to the open() method\n      urlpath: '{{ CATALOG_DIR }}/entry1_{{ part }}.csv'\n"
  },
  {
    "path": "intake/catalog/tests/conftest.py",
    "content": "import os.path\n\nimport pytest\n\nfrom intake import open_catalog\n\n\n@pytest.fixture\ndef catalog1():\n    path = os.path.dirname(__file__)\n    return open_catalog(os.path.join(path, \"catalog1.yml\"))\n"
  },
  {
    "path": "intake/catalog/tests/data_source_missing.yml",
    "content": "plugins:\n  source: []\n"
  },
  {
    "path": "intake/catalog/tests/data_source_name_non_string.yml",
    "content": "sources:\n  1: foo\n"
  },
  {
    "path": "intake/catalog/tests/data_source_non_dict.yml",
    "content": "sources: foo\n"
  },
  {
    "path": "intake/catalog/tests/data_source_value_non_dict.yml",
    "content": "sources:\n  foo: 1\n"
  },
  {
    "path": "intake/catalog/tests/dot-nest.yaml",
    "content": "sources:\n  self:\n    description: this cat\n    driver: yaml_file_cat\n    args:\n      path: \"{{CATALOG_DIR}}/dot-nest.yaml\"\n  selfdot.dot:\n    description: this cat\n    driver: yaml_file_cat\n    args:\n      path: \"{{CATALOG_DIR}}/dot-nest.yaml\"\n  self.dot:\n    description: this cat\n    driver: yaml_file_cat\n    args:\n      path: \"{{CATALOG_DIR}}/dot-nest.yaml\"\n  leaf:\n    description: leaf\n    driver: csv\n    args:\n      urlpath: \"\"\n  leafdot.dot:\n    description: leaf-dot\n    driver: csv\n    args:\n      urlpath: \"\"\n  leaf.dot:\n    description: leaf-dot\n    driver: csv\n    args:\n      urlpath: \"\"\n"
  },
  {
    "path": "intake/catalog/tests/entry1_1.csv",
    "content": "name,score,rank\nAlice1,100.5,1\nBob1,50.3,2\nCharlie1,25,3\nEve1,25,3\n"
  },
  {
    "path": "intake/catalog/tests/entry1_2.csv",
    "content": "name,score,rank\nAlice2,100.5,1\nBob2,50.3,2\nCharlie2,25,3\nEve2,25,3\n"
  },
  {
    "path": "intake/catalog/tests/example1_source.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source.base import DataSource\n\n\nclass ExampleSource(DataSource):\n    name = \"example1\"\n    version = \"0.1\"\n    container = \"dataframe\"\n    partition_access = True\n\n    def __init__(self, **kwargs):\n        self.kwargs = kwargs\n        super(ExampleSource, self).__init__()\n"
  },
  {
    "path": "intake/catalog/tests/example_plugin_dir/example2_source.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source.base import DataSource\n\n\nclass Ex2Plugin(DataSource):\n    name = \"example2\"\n    version = \"0.1\"\n    container = \"dataframe\"\n    partition_access = True\n\n    def __init__(self):\n        super(Ex2Plugin, self).__init__()\n"
  },
  {
    "path": "intake/catalog/tests/multi_plugins.yaml",
    "content": "sources:\n  tables0:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"short form\"\n    driver:\n      - csv\n    metadata: {}\n  tables1:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"long form\"\n    driver:\n      - intake.source.csv.CSVSource\n    metadata: {}\n  tables2:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"same plugin twice\"\n    driver:\n      - csv\n      - intake.source.csv.CSVSource\n    metadata: {}\n  tables3:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"user's choice with extra param\"\n    driver:\n      myplug:\n          class: intake.source.csv.CSVSource\n      myplug2:\n          class: intake.source.csv.CSVSource\n          args:\n              csv_kwargs: true\n    metadata: {}\n  tables4:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"neither plugins exist\"\n    driver:\n      myplug:\n          class: doesnotexist\n      myplug2:\n          class: also.none.Class\n    metadata: {}\n  tables5:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"only second plugin exists\"\n    driver:\n      myplug:\n          class: doesnotexist\n      myplug2:\n          class: csv\n    metadata: {}\n  tables6:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"no valid plugin in list\"\n    driver:\n      myplug:\n          class: doesnotexist\n    metadata: {}\n  tables7:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"no valid plugin\"\n    driver: doesnotexist\n    metadata: {}\n"
  },
  {
    "path": "intake/catalog/tests/multi_plugins2.yaml",
    "content": "sources:\n  tables6:\n    args:\n        urlpath: \"{{ CATALOG_DIR }}/files*\"\n    description: \"incompatible plugins\"\n    driver:\n      myplug:\n          class: csv\n      myplug2:\n          class: numpy\n    metadata: {}\n"
  },
  {
    "path": "intake/catalog/tests/obsolete_data_source_list.yml",
    "content": "sources:\n  - name: a\n    driver: csv\n"
  },
  {
    "path": "intake/catalog/tests/obsolete_params_list.yml",
    "content": "sources:\n  a:\n    driver: csv\n    parameters:\n      - name: b\n"
  },
  {
    "path": "intake/catalog/tests/params_missing_required.yml",
    "content": "sources:\n  a:\n    description: A\n"
  },
  {
    "path": "intake/catalog/tests/params_name_non_string.yml",
    "content": "sources:\n  a:\n    driver: csv\n    parameters:\n      1: {}\n"
  },
  {
    "path": "intake/catalog/tests/params_non_dict.yml",
    "content": "sources:\n  a:\n    driver: csv\n    parameters: b\n"
  },
  {
    "path": "intake/catalog/tests/params_value_bad_choice.yml",
    "content": "sources:\n  a:\n    driver: csv\n    parameters:\n      b:\n        description: B\n        type: string\n"
  },
  {
    "path": "intake/catalog/tests/params_value_bad_type.yml",
    "content": "sources:\n  a:\n    driver: csv\n    parameters:\n      b:\n        description: 1\n        type: str\n"
  },
  {
    "path": "intake/catalog/tests/params_value_non_dict.yml",
    "content": "sources:\n  a:\n    driver: csv\n    parameters:\n      b: 1\n"
  },
  {
    "path": "intake/catalog/tests/plugins_non_dict.yml",
    "content": "plugins: 0\nsources: {}\n"
  },
  {
    "path": "intake/catalog/tests/plugins_source_missing.yml",
    "content": "plugins:\n  s0urce: []\nsources: {}\n"
  },
  {
    "path": "intake/catalog/tests/plugins_source_missing_key.yml",
    "content": "plugins:\n  source:\n    - directory: /tmp\nsources: {}\n"
  },
  {
    "path": "intake/catalog/tests/plugins_source_non_dict.yml",
    "content": "plugins:\n  source:\n    - module\nsources: {}\n"
  },
  {
    "path": "intake/catalog/tests/plugins_source_non_list.yml",
    "content": "plugins:\n  source: module\nsources: {}\n"
  },
  {
    "path": "intake/catalog/tests/plugins_source_non_string.yml",
    "content": "plugins:\n  source:\n    - module: 0\nsources: {}\n"
  },
  {
    "path": "intake/catalog/tests/test_alias.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\n\nimport intake\n\nhere = os.path.abspath(os.path.dirname(__file__))\nfn = os.path.join(here, \"catalog_alias.yml\")\n\n\ndef test_simple():\n    cat = intake.open_catalog(fn)\n    s = cat.alias0()\n    assert s.container == \"other\"\n    out = str(s.discover())\n    assert s.container == \"dataframe\"\n    assert \"state\" in out\n\n\ndef test_mapping():\n    cat = intake.open_catalog(fn)\n    s = cat.alias1()\n    assert s.container == \"other\"\n    out = str(s.discover())\n    assert s.container == \"dataframe\"\n    assert \"state\" in out\n\n    s = cat.alias1(choice=\"second\")\n    assert s.container == \"other\"\n    out = str(s.discover())\n    assert s.container == \"ndarray\"\n    assert \"int64\" in out\n\n\ndef test_other_cat():\n    cat = intake.open_catalog(fn)\n    other = cat.alias_other_cat\n\n    assert other.source is None\n    assert other.cat.name == cat.name\n\n    _ = other.discover()\n\n    assert other.source is not None\n    assert other.source.cat.name == \"name_in_cat\"\n    assert other.source._csv_kwargs == {\"parse_dates\": True}\n    assert other.source.cat._captured_init_kwargs == {\"getenv\": True}\n\n\ndef test_alias():\n    \"\"\"make sure aliases are set up correctly\"\"\"\n\n    cat = intake.entry.Catalog()\n\n    filepath = os.path.join(here, \"entry1_1.csv\")\n    data = intake.readers.datatypes.CSV(filepath)\n    reader = intake.readers.readers.PandasCSV(data)\n\n    # modify reader\n    reader_modified = reader.name.lower()\n\n    # create catalog entry\n    cat[\"entry1_1\"] = reader_modified\n\n    # Make sure that only the key/alias/name is\n    # in the list of entries and aliases\n    assert list(cat) == [\"entry1_1\"]\n    assert cat.aliases == {\"entry1_1\": \"entry1_1\"}\n"
  },
  {
    "path": "intake/catalog/tests/test_catalog_save.py",
    "content": "\"\"\"\nTest saving catalogs.\n\"\"\"\nimport os\n\nimport intake\nfrom intake.catalog import Catalog\nfrom intake.catalog.local import LocalCatalogEntry\n\n\ndef test_catalog_description(tmpdir):\n    \"\"\"Make sure the description comes through the save.\"\"\"\n\n    tmpdir = str(tmpdir)\n    cat_path = os.path.join(tmpdir, \"desc_test.yaml\")\n\n    cat1 = Catalog.from_dict(\n        {\n            \"name\": LocalCatalogEntry(\n                \"name\",\n                description=\"description\",\n                driver=intake.catalog.local.YAMLFileCatalog,\n            )\n        },\n        name=\"overall catalog name\",\n        description=\"overall catalog description\",\n    )\n\n    cat1.save(cat_path)\n\n    cat2 = intake.open_catalog(cat_path)\n\n    assert cat2.description is not None\n"
  },
  {
    "path": "intake/catalog/tests/test_core.py",
    "content": "import pytest\n\nfrom intake.catalog.base import Catalog\n\n\ndef test_no_entry():\n    cat = Catalog()\n    cat2 = cat.configure_new()\n    assert isinstance(cat2, Catalog)\n    assert cat.auth is None\n    assert cat2.auth is None\n\n\ndef test_regression():\n    with pytest.raises(ValueError):\n        Catalog(\"URI\")\n"
  },
  {
    "path": "intake/catalog/tests/test_default.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport sys\nfrom pathlib import Path\n\nfrom intake.catalog import default\nfrom intake.catalog.base import Catalog\n\n\ndef test_which():\n    p = default.which(\"python\")\n    assert Path(p).resolve() == Path(sys.executable).resolve()\n\n\ndef test_load():\n    cat = default.load_user_catalog()\n    assert isinstance(cat, Catalog)\n    cat = default.load_global_catalog()\n    assert isinstance(cat, Catalog)\n"
  },
  {
    "path": "intake/catalog/tests/test_discovery.py",
    "content": "import os\nimport sys\n\nfrom ..local import EntrypointsCatalog, MergedCatalog, YAMLFilesCatalog\n\n\ndef test_catalog_discovery():\n    basedir = os.path.dirname(__file__)\n    yaml_glob = os.path.join(basedir, \"catalog_search\", \"*.yml\")\n    example_packages = os.path.join(basedir, \"catalog_search\", \"example_packages\")\n\n    test_catalog = MergedCatalog(\n        [\n            EntrypointsCatalog(paths=[example_packages]),\n            YAMLFilesCatalog(path=[yaml_glob]),\n        ]\n    )\n\n    assert \"use_example1\" in test_catalog\n    assert \"ep1\" in test_catalog\n\n\ndef test_deferred_import():\n    \"See https://github.com/intake/intake/pull/541\"\n    # We are going to mess with sys.modules here, so to be safe let's put it\n    # back the way it was at the end.\n    import intake.catalog\n\n    intake.catalog.builtin = None\n    mods = sys.modules.copy()\n    try:\n        sys.modules.pop(\"intake\")\n        sys.modules.pop(\"intake.catalog\")\n        intake.catalog.__dict__.pop(\"builtin\")\n        assert \"builtin\" not in intake.catalog.__dict__\n        assert intake.cat is not None\n    finally:\n        sys.modules.update(mods)\n"
  },
  {
    "path": "intake/catalog/tests/test_gui.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport pytest\n\n\ndef panel_importable():\n    try:\n        import panel as pn  # noqa\n\n        return True\n    except:\n        return False\n\n\nEXPECTED_ERROR_TEXT = \"Please install panel to use the GUI\"\n\n\n@pytest.mark.skipif(panel_importable(), reason=\"panel is importable, so skip\")\ndef test_cat_no_panel_does_not_raise_errors(catalog1):\n    assert catalog1.name == \"name_in_cat\"\n\n\n@pytest.mark.skipif(panel_importable(), reason=\"panel is importable, so skip\")\ndef test_cat_no_panel_display_gui(catalog1):\n    with pytest.raises(RuntimeError, match=EXPECTED_ERROR_TEXT):\n        repr(catalog1.gui)\n\n\ndef test_cat_gui(catalog1):\n    pytest.importorskip(\"panel\")\n    assert repr(catalog1.gui).startswith(\"Intake\")\n\n\n@pytest.mark.skipif(panel_importable(), reason=\"panel is importable, so skip\")\ndef test_entry_no_panel_does_not_raise_errors(catalog1):\n    assert catalog1.entry1.name == \"entry1\"\n\n\n@pytest.mark.skipif(panel_importable(), reason=\"panel is importable, so skip\")\ndef test_entry_no_panel_display_gui(catalog1):\n    with pytest.raises(RuntimeError, match=EXPECTED_ERROR_TEXT):\n        repr(catalog1.entry1.gui)\n"
  },
  {
    "path": "intake/catalog/tests/test_local.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport datetime\nimport os.path\nimport shutil\nimport tempfile\nimport time\n\nimport pandas\nimport pytest\nfrom fsspec.implementations.local import make_path_posix\n\nfrom intake import open_catalog\nfrom intake.catalog import exceptions, local\nfrom intake.catalog.local import LocalCatalogEntry, UserParameter, get_dir\n\nfrom .util import assert_items_equal\n\n\ndef abspath(filename):\n    return make_path_posix(os.path.join(os.path.dirname(__file__), filename))\n\n\ndef test_local_catalog(catalog1):\n    assert_items_equal(\n        list(catalog1),\n        [\n            \"use_example1\",\n            \"nested\",\n            \"entry1\",\n            \"entry1_part\",\n            \"remote_env\",\n            \"local_env\",\n            \"text\",\n            \"arr\",\n            \"datetime\",\n        ],\n    )\n    assert len(catalog1) == 9\n    assert catalog1[\"entry1\"].describe() == {\n        \"name\": \"entry1\",\n        \"container\": \"dataframe\",\n        \"direct_access\": \"forbid\",\n        \"user_parameters\": [],\n        \"description\": \"entry1 full\",\n        \"args\": {\"urlpath\": \"{{ CATALOG_DIR }}/entry1_*.csv\"},\n        \"metadata\": {\"bar\": [1, 2, 3], \"foo\": \"bar\"},\n        \"plugin\": [\"csv\"],\n        \"driver\": [\"csv\"],\n    }\n    assert catalog1[\"entry1_part\"].describe() == {\n        \"name\": \"entry1_part\",\n        \"container\": \"dataframe\",\n        \"user_parameters\": [\n            {\n                \"name\": \"part\",\n                \"description\": \"part of filename\",\n                \"default\": \"1\",\n                \"type\": \"str\",\n                \"allowed\": [\"1\", \"2\"],\n            }\n        ],\n        \"description\": \"entry1 part\",\n        \"direct_access\": \"allow\",\n        \"args\": {\"urlpath\": \"{{ CATALOG_DIR }}/entry1_{{ part }}.csv\"},\n        \"metadata\": {\"foo\": \"baz\", \"bar\": [2, 4, 6]},\n        \"plugin\": [\"csv\"],\n        \"driver\": [\"csv\"],\n    }\n    assert catalog1[\"entry1\"].container == \"dataframe\"\n    md = catalog1[\"entry1\"].metadata\n    md.pop(\"catalog_dir\")\n    assert md[\"foo\"] == \"bar\"\n    assert md[\"bar\"] == [1, 2, 3]\n\n    # Use default parameters\n    assert catalog1[\"entry1_part\"].container == \"dataframe\"\n    # Specify parameters\n    assert catalog1[\"entry1_part\"].configure_new(part=\"2\").container == \"dataframe\"\n\n\ndef test_get_items(catalog1):\n    for key, entry in catalog1.items():\n        assert catalog1[key].describe() == entry.describe()\n\n\ndef test_nested(catalog1):\n    assert \"nested\" in catalog1\n    assert \"entry1\" in catalog1.nested.nested()\n    assert catalog1.entry1.read().equals(catalog1.nested.nested.entry1.read())\n    assert \"nested.nested\" not in catalog1.walk(depth=1)\n    assert \"nested.nested\" in catalog1.walk(depth=2)\n    assert catalog1.nested.cat == catalog1\n    assert catalog1.nested.nested.nested.cat.cat.cat is catalog1\n\n\ndef test_nested_gets_name_from_super(catalog1):\n    assert catalog1.name == \"name_in_cat\"\n    assert \"nested\" in catalog1\n    nested = catalog1.nested\n    assert nested.name == \"nested\"\n    assert nested().name == \"nested\"\n\n\ndef test_hash(catalog1):\n    assert catalog1.nested() == catalog1.nested.nested()\n\n\ndef test_getitem(catalog1):\n    assert list(catalog1) == list(catalog1[\"nested\"]())\n    assert list(catalog1) == list(catalog1[\"nested.nested\"]())\n    assert list(catalog1) == list(catalog1[\"nested\", \"nested\"]())\n\n\ndef test_source_plugin_config(catalog1):\n    from intake import registry\n\n    assert \"example1\" in registry\n    assert \"example2\" in registry\n\n\ndef test_metadata(catalog1):\n    assert hasattr(catalog1, \"metadata\")\n    assert catalog1.metadata[\"test\"] is True\n\n\ndef test_use_source_plugin_from_config(catalog1):\n    catalog1[\"use_example1\"]\n\n\ndef test_get_dir():\n    assert get_dir(\"file:///path/catalog.yml\") == \"file:///path\"\n    assert get_dir(\"https://example.com/catalog.yml\") == \"https://example.com\"\n    path = \"example/catalog.yml\"\n    out = get_dir(path)\n    assert os.path.isabs(out)\n    assert out.endswith(\"/example/\")\n    path = \"/example/catalog.yml\"\n    out = get_dir(path)\n    # it's ok if the first two chars indicate drive for win (C:)\n    assert \"/example/\" in [out, out[2:]]\n    path = \"example\"\n    out = get_dir(path)\n    assert os.path.isabs(out)\n    assert not out.endswith(\"/example\")\n    assert out.endswith(\"/\")\n\n\ndef test_entry_dir_function(catalog1):\n    assert \"nested\" in dir(catalog1.nested)\n\n\n@pytest.mark.parametrize(\n    \"dtype,expected\",\n    [\n        (\"bool\", False),\n        (\"datetime\", pandas.Timestamp(1970, 1, 1, 0, 0, 0)),\n        (\"float\", 0.0),\n        (\"int\", 0),\n        (\"list\", []),\n        (\"str\", \"\"),\n        (\"unicode\", \"\"),\n    ],\n)\ndef test_user_parameter_default_value(dtype, expected):\n    p = local.UserParameter(\"a\", \"a desc\", dtype)\n    assert p.validate(None) == expected\n\n\ndef test_user_parameter_repr():\n    p = local.UserParameter(\"a\", \"a desc\", \"str\")\n    expected = \"<UserParameter 'a'>\"\n    assert repr(p) == str(p) == expected\n\n\n@pytest.mark.parametrize(\n    \"dtype,given,expected\",\n    [\n        (\"bool\", \"true\", True),\n        (\"bool\", 0, False),\n        (\n            \"datetime\",\n            datetime.datetime(2018, 1, 1, 0, 34, 0),\n            pandas.Timestamp(2018, 1, 1, 0, 34, 0),\n        ),\n        (\"datetime\", \"2018-01-01 12:34AM\", pandas.Timestamp(2018, 1, 1, 0, 34, 0)),\n        (\"datetime\", 1234567890000000000, pandas.Timestamp(2009, 2, 13, 23, 31, 30)),\n        (\"float\", \"3.14\", 3.14),\n        (\"int\", \"1\", 1),\n        (\"list\", (3, 4), [3, 4]),\n        (\"str\", 1, \"1\"),\n        (\"unicode\", \"foo\", \"foo\"),\n    ],\n)\ndef test_user_parameter_coerce_value(dtype, given, expected):\n    p = local.UserParameter(\"a\", \"a desc\", dtype, given)\n    assert p.validate(given) == expected\n\n\n@pytest.mark.parametrize(\"given\", [\"now\", \"today\"])\ndef test_user_parameter_coerce_special_datetime(given):\n    p = local.UserParameter(\"a\", \"a desc\", \"datetime\", given)\n    assert type(p.validate(given)) == pandas.Timestamp\n\n\n@pytest.mark.parametrize(\n    \"dtype,given,expected\",\n    [\n        (\"float\", \"100.0\", 100.0),\n        (\"int\", \"20\", 20),\n        (\"int\", 20.0, 20),\n    ],\n)\ndef test_user_parameter_coerce_min(dtype, given, expected):\n    p = local.UserParameter(\"a\", \"a desc\", dtype, expected, min=given)\n    assert p.min == expected\n\n\n@pytest.mark.parametrize(\n    \"dtype,given,expected\",\n    [\n        (\"float\", \"100.0\", 100.0),\n        (\"int\", \"20\", 20),\n        (\"int\", 20.0, 20),\n    ],\n)\ndef test_user_parameter_coerce_max(dtype, given, expected):\n    p = local.UserParameter(\"a\", \"a desc\", dtype, expected, max=given)\n    assert p.max == expected\n\n\n@pytest.mark.parametrize(\n    \"dtype,given,expected\",\n    [\n        (\"float\", [50, \"100.0\", 150.0], [50.0, 100.0, 150.0]),\n        (\"int\", [1, \"2\", 3.0], [1, 2, 3]),\n    ],\n)\ndef test_user_parameter_coerce_allowed(dtype, given, expected):\n    p = local.UserParameter(\"a\", \"a desc\", dtype, expected[0], allowed=given)\n    assert p.allowed == expected\n\n\ndef test_user_parameter_validation_range():\n    p = local.UserParameter(\"a\", \"a desc\", \"int\", 1, min=0, max=3)\n\n    with pytest.raises(ValueError) as except_info:\n        p.validate(-1)\n    assert \"less than\" in str(except_info.value)\n\n    assert p.validate(0) == 0\n    assert p.validate(1) == 1\n    assert p.validate(2) == 2\n    assert p.validate(3) == 3\n\n    with pytest.raises(ValueError) as except_info:\n        p.validate(4)\n    assert \"greater than\" in str(except_info.value)\n\n\ndef test_user_parameter_validation_allowed():\n    p = local.UserParameter(\"a\", \"a desc\", \"int\", 1, allowed=[1, 2])\n\n    with pytest.raises(ValueError) as except_info:\n        p.validate(0)\n    assert \"allowed\" in str(except_info.value)\n\n    assert p.validate(1) == 1\n    assert p.validate(2) == 2\n\n    with pytest.raises(ValueError) as except_info:\n        p.validate(3)\n    assert \"allowed\" in str(except_info.value)\n\n\ndef test_user_pars_list():\n    # first case: allowed are all lists, must choose exactly one of them\n    # NB: order must match\n    p = local.UserParameter(\"\", \"\", \"list\", allowed=[[], [\"one\"], [\"one\", \"two\"]])\n    with pytest.raises(TypeError):\n        p.validate(0)\n    with pytest.raises((TypeError, ValueError)):\n        # unfortunately, a string does coerce to a list\n        p.validate(\"one\")\n    with pytest.raises(ValueError, match=\"allowed\"):\n        p.validate([\"two\"])\n    with pytest.raises(ValueError, match=\"allowed\"):\n        p.validate([\"two\", \"one\"])\n    p.validate([\"one\"])\n    p.validate([\"one\", \"two\"])\n\n\ndef test_user_pars_mlist():\n    # second case: allowed are not lists, can choose any number of them\n    # NB: repeats are allowed\n    p = local.UserParameter(\"\", \"\", \"mlist\", allowed=[\"one\", \"two\", \"three\"])\n    with pytest.raises(TypeError):\n        p.validate(0)\n    with pytest.raises((TypeError, ValueError)):\n        # unfortunately, a string does coerce to a list\n        p.validate(\"one\")\n    with pytest.raises(ValueError, match=\"allowed\"):\n        p.validate([\"two\", \"other\"])\n    p.validate([\"two\"])\n    p.validate([\"two\", \"two\"])\n    p.validate([\"two\", \"one\"])\n    p.validate([])\n\n\n@pytest.mark.parametrize(\n    \"filename\",\n    [\n        \"catalog_non_dict\",\n        \"data_source_missing\",\n        \"data_source_name_non_string\",\n        \"data_source_non_dict\",\n        \"data_source_value_non_dict\",\n        \"params_missing_required\",\n        \"params_name_non_string\",\n        \"params_non_dict\",\n        \"params_value_bad_choice\",\n        \"params_value_bad_type\",\n        \"params_value_non_dict\",\n        \"plugins_non_dict\",\n        \"plugins_source_missing\",\n        \"plugins_source_missing_key\",\n        \"plugins_source_non_dict\",\n        \"plugins_source_non_list\",\n    ],\n)\ndef test_parser_validation_error(filename):\n    with pytest.raises(exceptions.ValidationError):\n        list(open_catalog(abspath(filename + \".yml\")))\n\n\n@pytest.mark.parametrize(\n    \"filename\",\n    [\n        \"obsolete_data_source_list\",\n        \"obsolete_params_list\",\n    ],\n)\ndef test_parser_obsolete_error(filename):\n    with pytest.raises(exceptions.ObsoleteError):\n        open_catalog(abspath(filename + \".yml\"))\n\n\ndef test_union_catalog():\n    path = os.path.dirname(__file__)\n    uri1 = os.path.join(path, \"catalog_union_1.yml\")\n    uri2 = os.path.join(path, \"catalog_union_2.yml\")\n\n    union_cat = open_catalog([uri1, uri2])\n\n    assert_items_equal(list(union_cat), [\"entry1\", \"entry1_part\", \"use_example1\"])\n\n    expected = {\n        \"name\": \"entry1_part\",\n        \"container\": \"dataframe\",\n        \"user_parameters\": [\n            {\n                \"name\": \"part\",\n                \"description\": \"part of filename\",\n                \"default\": \"1\",\n                \"type\": \"str\",\n                \"allowed\": [\"1\", \"2\"],\n            }\n        ],\n        \"description\": \"entry1 part\",\n        \"direct_access\": \"allow\",\n    }\n    for k in expected:\n        assert union_cat.entry1_part.describe()[k] == expected[k]\n\n    # Implied creation of data source\n    assert union_cat.entry1.container == \"dataframe\"\n    md = union_cat.entry1.describe()[\"metadata\"]\n    assert md == dict(foo=\"bar\", bar=[1, 2, 3])\n\n    # Use default parameters in explict creation of data source\n    assert union_cat.entry1_part().container == \"dataframe\"\n    # Specify parameters in creation of data source\n    assert union_cat.entry1_part(part=\"2\").container == \"dataframe\"\n\n\ndef test_persist_local_cat(temp_cache):\n    # when persisted, multiple cat become one\n    from intake.catalog.local import YAMLFileCatalog\n\n    path = os.path.dirname(__file__)\n    uri1 = os.path.join(path, \"catalog_union_1.yml\")\n    uri2 = os.path.join(path, \"catalog_union_2.yml\")\n\n    s = open_catalog([uri1, uri2])\n    s2 = s.persist()\n    assert isinstance(s2, YAMLFileCatalog)\n    assert set(s) == set(s2)\n\n\ndef test_empty_catalog():\n    cat = open_catalog()\n    assert list(cat) == []\n\n\ndef test_nonexistent_error():\n    with pytest.raises(IOError):\n        local.YAMLFileCatalog(\"nonexistent\")\n\n\ndef test_duplicate_data_sources():\n    path = os.path.dirname(__file__)\n    uri = os.path.join(path, \"catalog_dup_sources.yml\")\n\n    with pytest.raises(exceptions.DuplicateKeyError):\n        open_catalog(uri)\n\n\ndef test_duplicate_parameters():\n    path = os.path.dirname(__file__)\n    uri = os.path.join(path, \"catalog_dup_parameters.yml\")\n\n    with pytest.raises(exceptions.DuplicateKeyError):\n        open_catalog(uri)\n\n\n@pytest.fixture\ndef temp_catalog_file():\n    path = tempfile.mkdtemp()\n    catalog_file = os.path.join(path, \"catalog.yaml\")\n    with open(catalog_file, \"w\") as f:\n        f.write(\n            \"\"\"\nsources:\n  a:\n    driver: csv\n    args:\n      urlpath: /not/a/file\n  b:\n    driver: csv\n    args:\n      urlpath: /not/a/file\n        \"\"\"\n        )\n\n    yield catalog_file\n\n    shutil.rmtree(path)\n\n\ndef test_catalog_file_removal(temp_catalog_file):\n    cat_dir = os.path.dirname(temp_catalog_file)\n    cat = open_catalog(cat_dir + \"/*\", ttl=0.1)\n    assert set(cat) == {\"a\", \"b\"}\n\n    os.remove(temp_catalog_file)\n    time.sleep(0.5)  # wait for catalog refresh\n    assert set(cat) == set()\n\n\ndef test_flatten_duplicate_error():\n    path = tempfile.mkdtemp()\n    f1 = os.path.join(path, \"catalog.yaml\")\n    path = tempfile.mkdtemp()\n    f2 = os.path.join(path, \"catalog.yaml\")\n    for f in [f1, f2]:\n        with open(f, \"w\") as fo:\n            fo.write(\n                \"\"\"\n        sources:\n          a:\n            driver: csv\n            args:\n              urlpath: /not/a/file\n        \"\"\"\n            )\n    with pytest.raises(ValueError):\n        open_catalog([f1, f2])\n\n\ndef test_multi_cat_names():\n    fn = abspath(\"catalog_union*.yml\")\n    cat = open_catalog(fn)\n    assert cat.name == fn\n    assert fn in repr(cat)\n\n    fn1 = abspath(\"catalog_union_1.yml\")\n    fn2 = abspath(\"catalog_union_2.yml\")\n    cat = open_catalog([fn1, fn2])\n    assert cat.name == \"2 files\"\n    assert cat.description == \"Catalog generated from 2 files\"\n\n    cat = open_catalog([fn1, fn2], name=\"special_name\", description=\"Special description\")\n    assert cat.name == \"special_name\"\n    assert cat.description == \"Special description\"\n\n\ndef test_name_of_builtin():\n    import intake\n\n    assert intake.cat.name == \"builtin\"\n    assert intake.cat.description == \"Generated from data packages found on your intake search path\"\n\n\ndef test_cat_with_declared_name():\n    fn = abspath(\"catalog_named.yml\")\n    description = \"Description declared in the open function\"\n    cat = open_catalog(fn, name=\"name_in_func\", description=description)\n    assert cat.name == \"name_in_func\"\n    assert cat.description == description\n    cat._load()  # we don't get metadata until load/list/getitem\n    assert cat.metadata.get(\"some\") == \"thing\"\n\n    cat = open_catalog(fn)\n    assert cat.name == \"name_in_spec\"\n    assert cat.description == \"This is a catalog with a description in the yaml\"\n\n\ndef test_cat_with_no_declared_name_gets_name_from_dir_if_file_named_catalog():\n    fn = abspath(\"catalog.yml\")\n    cat = open_catalog(fn, name=\"name_in_func\", description=\"Description in func\")\n    assert cat.name == \"name_in_func\"\n    assert cat.description == \"Description in func\"\n\n    cat = open_catalog(fn)\n    assert cat.name == \"tests\"\n    assert cat.description is None\n\n\ndef test_default_expansions():\n    try:\n        os.environ[\"INTAKE_INT_TEST\"] = \"1\"\n        par = UserParameter(\"\", \"\", \"int\", default=\"env(INTAKE_INT_TEST)\")\n        par.expand_defaults()\n        assert par.expanded_default == 1\n    finally:\n        del os.environ[\"INTAKE_INT_TEST\"]\n\n    par = UserParameter(\"\", \"\", \"str\", default=\"env(USER)\")\n    par.expand_defaults(getenv=False)\n    assert par.expanded_default == \"env(USER)\"\n    par.expand_defaults()\n    assert par.expanded_default == os.getenv(\"USER\", \"\")\n\n    par = UserParameter(\"\", \"\", \"str\", default=\"client_env(USER)\")\n    par.expand_defaults()\n    assert par.expanded_default == \"client_env(USER)\"\n    par.expand_defaults(client=True)\n    assert par.expanded_default == os.getenv(\"USER\", \"\")\n\n    par = UserParameter(\"\", \"\", \"str\", default=\"shell(echo success)\")\n    par.expand_defaults(getshell=False)\n    assert par.expanded_default == \"shell(echo success)\"\n    par.expand_defaults()\n    assert par.expanded_default == \"success\"\n\n    par = UserParameter(\"\", \"\", \"str\", default=\"client_shell(echo success)\")\n    par.expand_defaults(client=True)\n    assert par.expanded_default == \"success\"\n\n    par = UserParameter(\"\", \"\", \"int\", default=1)\n    par.expand_defaults()  # no error from string ops\n\n\ndef test_remote_cat(http_server):\n    url = http_server + \"catalog1.yml\"\n    cat = open_catalog(url)\n    assert \"entry1\" in cat\n    assert cat.entry1.describe()\n\n\ndef test_multi_plugins():\n    from intake.source.csv import CSVSource\n\n    fn = abspath(\"multi_plugins.yaml\")\n    cat = open_catalog(fn)\n    s = cat.tables0()\n    assert isinstance(s, CSVSource)\n\n    s = cat.tables1()\n    assert isinstance(s, CSVSource)\n\n    s = cat.tables2()\n    assert isinstance(s, CSVSource)\n\n    s = cat.tables3()\n    assert isinstance(s, CSVSource)\n    assert s._csv_kwargs == {}\n\n    s = cat.tables3(plugin=\"myplug\")\n    assert isinstance(s, CSVSource)\n    assert s._csv_kwargs == {}\n\n    s = cat.tables3(plugin=\"myplug2\")\n    assert isinstance(s, CSVSource)\n    assert s._csv_kwargs is True\n\n    with pytest.raises(ValueError):\n        cat.tables4()\n    with pytest.raises(ValueError):\n        cat.tables4(plugin=\"myplug\")\n    with pytest.raises(ValueError):\n        cat.tables4(plugin=\"myplug2\")\n\n    s = cat.tables5()\n    assert isinstance(s, CSVSource)\n\n    with pytest.raises(ValueError):\n        cat.tables5(plugin=\"myplug\")\n\n    fn = abspath(\"multi_plugins2.yaml\")\n    with pytest.raises(ValueError):\n        open_catalog(fn)\n\n\ndef test_no_plugins():\n    fn = abspath(\"multi_plugins.yaml\")\n    cat = open_catalog(fn)\n    with pytest.raises(ValueError) as e:\n        cat.tables6\n    assert \"doesnotexist\" in str(e.value)\n    assert \"plugin-directory\" in str(e.value)\n    with pytest.raises(ValueError) as e:\n        cat.tables7\n    assert \"doesnotexist\" in str(e.value)\n\n\ndef test_explicit_entry_driver():\n    from intake.source.textfiles import TextFilesSource\n\n    e = LocalCatalogEntry(\"test\", \"desc\", TextFilesSource, args={\"urlpath\": None})\n    assert e.describe()[\"container\"] == \"python\"\n    assert isinstance(e(), TextFilesSource)\n\n    with pytest.raises(TypeError):\n        LocalCatalogEntry(\"test\", \"desc\", None)\n\n\ndef test_getitem_and_getattr():\n    fn = abspath(\"multi_plugins.yaml\")\n    catalog = open_catalog(fn)\n    catalog[\"tables0\"]\n    with pytest.raises(KeyError):\n        catalog[\"doesnotexist\"]\n    with pytest.raises(KeyError):\n        catalog[\"_doesnotexist\"]\n    with pytest.raises(KeyError):\n        # This exists as an *attribute* but not as an item.\n        catalog[\"metadata\"]\n    catalog.tables0  # alias to catalog['tables0']\n    catalog.metadata  # a normal attribute\n    with pytest.raises(AttributeError):\n        catalog.doesnotexit\n    with pytest.raises(AttributeError):\n        catalog._doesnotexit\n    assert catalog.tables0 == catalog[\"tables0\"]\n    assert isinstance(catalog.metadata, (dict, type(None)))\n\n\ndef test_dot_names():\n    fn = abspath(\"dot-nest.yaml\")\n    cat = open_catalog(fn)\n    assert cat.self.leaf.description == \"leaf\"\n    assert cat.self[\"leafdot.dot\"].description == \"leaf-dot\"\n    assert cat[\"selfdot.dot\", \"leafdot.dot\"].description == \"leaf-dot\"\n\n    assert cat[\"self.selfdot.dot\", \"leafdot.dot\"].description == \"leaf-dot\"\n    assert cat[\"self.self.dot\", \"leafdot.dot\"].description == \"leaf-dot\"\n    assert cat[\"self.self.dot\", \"leaf\"].description == \"leaf\"\n    assert cat[\"self.self.dot\", \"leaf.dot\"].description == \"leaf-dot\"\n\n    assert cat[\"self.self.dot.leaf.dot\"].description == \"leaf-dot\"\n\n\ndef test_listing(catalog1):\n    assert list(catalog1) == list(catalog1.nested)\n    with pytest.raises(TypeError):\n        list(catalog1.arr)\n\n\ndef test_dict_save():\n    from intake.catalog.base import Catalog\n\n    fn = os.path.join(tempfile.mkdtemp(), \"mycat.yaml\")\n    entry = LocalCatalogEntry(\n        name=\"trial\", description=\"get this back\", driver=\"csv\", args=dict(urlpath=\"\")\n    )\n    cat = Catalog.from_dict({\"trial\": entry}, name=\"mycat\")\n    cat.save(fn)\n\n    cat2 = open_catalog(fn)\n    assert \"trial\" in cat2\n    assert cat2.name == \"mycat\"\n    assert \"CSV\" in cat2.trial.classname\n\n\ndef test_dict_save_complex():\n    from intake.catalog.base import Catalog\n\n    fn = os.path.join(tempfile.mkdtemp(), \"mycat.yaml\")\n    cat = Catalog()\n    entry = LocalCatalogEntry(\n        name=\"trial\",\n        description=\"get this back\",\n        driver=\"csv\",\n        cache=[],\n        catalog=cat,\n        parameters=[UserParameter(name=\"par1\", description=\"desc\", type=\"int\")],\n        args={\"urlpath\": \"none\"},\n    )\n\n    cat._entries = {\"trial\": entry}\n    cat.save(fn)\n\n    cat2 = open_catalog(fn)\n    assert \"trial\" in cat2\n    assert cat2.name == \"mycat\"\n    assert cat2.trial.describe()[\"plugin\"][0] == \"csv\"\n\n\ndef test_dict_adddel():\n    from intake.catalog.base import Catalog\n\n    entry = LocalCatalogEntry(\n        name=\"trial\", description=\"get this back\", driver=\"csv\", args=dict(urlpath=\"\")\n    )\n    cat = Catalog.from_dict({\"trial\": entry}, name=\"mycat\")\n    assert \"trial\" in cat\n    cat[\"trial2\"] = entry\n    assert list(cat) == [\"trial\", \"trial2\"]\n    cat.pop(\"trial\")\n    assert list(cat) == [\"trial2\"]\n    assert cat[\"trial2\"].describe() == entry.describe()\n\n\ndef test_filter():\n    from intake.catalog.base import Catalog\n\n    entry1 = LocalCatalogEntry(\n        name=\"trial\", description=\"get this back\", driver=\"csv\", args=dict(urlpath=\"\")\n    )\n    entry2 = LocalCatalogEntry(\n        name=\"trial\",\n        description=\"pass this through\",\n        driver=\"csv\",\n        args=dict(urlpath=\"\"),\n    )\n    cat = Catalog.from_dict({\"trial1\": entry1, \"trial2\": entry2}, name=\"mycat\")\n    cat2 = cat.filter(lambda e: \"pass\" in e._description)\n    assert list(cat2) == [\"trial2\"]\n    assert cat2.trial2 == entry2()\n\n\ndef test_from_dict_with_data_source():\n    \"Check that Catalog.from_dict accepts DataSources not wrapped in Entry.\"\n    from intake.catalog.base import Catalog\n\n    entry = LocalCatalogEntry(\n        name=\"trial\", description=\"get this back\", driver=\"csv\", args=dict(urlpath=\"\")\n    )\n    ds = entry()\n    Catalog.from_dict({\"trial\": ds}, name=\"mycat\")\n\n\ndef test_no_instance():\n    from intake.catalog.local import LocalCatalogEntry\n\n    e0 = LocalCatalogEntry(\"foo\", \"\", \"fake\")\n    e1 = LocalCatalogEntry(\"foo0\", \"\", \"fake\")\n\n    # this would error on instantiation with driver not found\n    assert e0 != e1\n\n\ndef test_fsspec_integration():\n    import fsspec\n    import pandas as pd\n\n    mem = fsspec.filesystem(\"memory\")\n    with mem.open(\"cat.yaml\", \"wt\") as f:\n        f.write(\n            \"\"\"\nsources:\n  implicit:\n    driver: csv\n    description: o\n    args:\n      urlpath: \"{{CATALOG_DIR}}/file.csv\"\n  explicit:\n    driver: csv\n    description: o\n    args:\n      urlpath: \"memory://file.csv\"\n  extra:\n    driver: csv\n    description: o\n    args:\n      urlpath: \"{{CATALOG_DIR}}/file.csv\"\n      storage_options: {other: option}\"\"\"\n        )\n    with mem.open(\"/file.csv\", \"wt\") as f:\n        f.write(\"a,b\\n0,1\")\n    expected = pd.DataFrame({\"a\": [0], \"b\": [1]})\n    cat = open_catalog(\"memory://cat.yaml\")\n    assert list(cat) == [\"implicit\", \"explicit\", \"extra\"]\n    assert cat.implicit.read().equals(expected)\n    assert cat.explicit.read().equals(expected)\n    s = cat.extra()\n    assert s._storage_options[\"other\"]\n\n\ndef test_cat_add(tmpdir):\n    tmpdir = str(tmpdir)\n    fn = os.path.join(tmpdir, \"cat.yaml\")\n    with open(fn, \"w\") as f:\n        f.write(\"sources: {}\")\n    cat = open_catalog(fn)\n    assert list(cat) == []\n\n    # was added in memory\n    cat.add(cat)\n    cat._load()  # this would happen automatically, but not immediately\n    assert list(cat) == [\"cat\"]\n\n    # was added to the file\n    cat = open_catalog(fn)\n    assert list(cat) == [\"cat\"]\n\n\ndef test_no_entries_items(catalog1):\n    from intake.catalog.entry import CatalogEntry\n    from intake.source.base import DataSource\n\n    for k, v in catalog1.items():\n        assert not isinstance(v, CatalogEntry)\n        assert isinstance(v, DataSource)\n\n    for k in catalog1:\n        v = catalog1[k]\n        assert not isinstance(v, CatalogEntry)\n        assert isinstance(v, DataSource)\n\n    for k in catalog1:\n        # we can't do attribute access on \"text\" because it\n        # collides with a property\n        if k == \"text\":\n            continue\n        v = getattr(catalog1, k)\n        assert not isinstance(v, CatalogEntry)\n        assert isinstance(v, DataSource)\n\n\ndef test_cat_dictlike(catalog1):\n    assert list(catalog1) == list(catalog1.keys())\n    assert len(list(catalog1)) == len(catalog1)\n    assert list(catalog1.items()) == list(zip(catalog1.keys(), catalog1.values()))\n\n\ndef test_inherit_params(inherit_params_cat):\n    assert inherit_params_cat.param._urlpath == \"s3://test_bucket/file.parquet\"\n\n\ndef test_runtime_overwrite_params(inherit_params_cat):\n    assert (\n        inherit_params_cat.param(bucket=\"runtime_overwrite\")._urlpath\n        == \"s3://runtime_overwrite/file.parquet\"\n    )\n\n\ndef test_local_param_overwrites(inherit_params_cat):\n    assert inherit_params_cat.local_param_overwrites._urlpath == \"s3://local_param/file.parquet\"\n\n\ndef test_local_and_global_params(inherit_params_cat):\n    assert (\n        inherit_params_cat.local_and_global_params._urlpath\n        == \"s3://test_bucket/local_filename.parquet\"\n    )\n\n\ndef test_search_inherit_params(inherit_params_cat):\n    assert (\n        inherit_params_cat.search(\"local_and_global\").local_and_global_params._urlpath\n        == \"s3://test_bucket/local_filename.parquet\"\n    )\n\n\ndef test_multiple_cats_params(inherit_params_multiple_cats):\n    assert (\n        inherit_params_multiple_cats.local_and_global_params._urlpath\n        == \"s3://test_bucket/local_filename.parquet\"\n    )\n"
  },
  {
    "path": "intake/catalog/tests/test_parameters.py",
    "content": "import os\n\nimport pytest\n\nimport intake\nfrom intake.catalog.local import LocalCatalogEntry, UserParameter\nfrom intake.source.base import DataSource\n\n\nclass NoSource(DataSource):\n    def __init__(self, **kwargs):\n        self.metadata = kwargs.pop(\"metadata\", {})\n        self.kwargs = kwargs\n\n\ndriver = \"intake.catalog.tests.test_parameters.NoSource\"\n\n\ndef test_simplest():\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": 1})\n    s = e()\n    assert s.kwargs[\"arg1\"] == 1\n\n\ndef test_cache_default_source():\n    # If the user provides parameters, don't allow default caching\n    up = UserParameter(\"name\", default=\"oi\")\n    e = LocalCatalogEntry(\"\", \"\", driver, getenv=False, parameters=[up])\n    s1 = e(name=\"oioi\")\n    s2 = e()\n    assert s1 is not s2\n    s1 = e()\n    s2 = e(name=\"oioi\")\n    assert s1 is not s2\n    # Otherwise, we can cache the default source\n    e = LocalCatalogEntry(\"\", \"\", driver, getenv=False)\n    s1 = e()\n    s2 = e()\n    assert s1 is s2\n\n\ndef test_parameter_default():\n    up = UserParameter(\"name\", default=\"oi\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up])\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"oi\"\n\n\ndef test_maybe_default_from_env():\n    # maybe fill in parameter default from the env, depending on getenv\n    up = UserParameter(\"name\", default=\"env(INTAKE_TEST_VAR)\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up], getenv=False)\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"env(INTAKE_TEST_VAR)\"\n\n    os.environ[\"INTAKE_TEST_VAR\"] = \"oi\"\n    # Clear the cached source so we can (not) pick up the changed environment variable.\n    e.clear_cached_default_source()\n\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"env(INTAKE_TEST_VAR)\"\n\n    up = UserParameter(\"name\", default=\"env(INTAKE_TEST_VAR)\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up], getenv=True)\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"oi\"\n\n    del os.environ[\"INTAKE_TEST_VAR\"]\n    # Clear the cached source so we can pick up the changed environment variable.\n    e.clear_cached_default_source()\n\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"\"\n\n\ndef test_up_override_and_render():\n    up = UserParameter(\"name\", default=\"env(INTAKE_TEST_VAR)\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up], getenv=False)\n    s = e(name=\"other\")\n    assert s.kwargs[\"arg1\"] == \"other\"\n\n\ndef test_user_explicit_override():\n    up = UserParameter(\"name\", default=\"env(INTAKE_TEST_VAR)\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up], getenv=False)\n    # override wins over up\n    s = e(arg1=\"other\")\n    assert s.kwargs[\"arg1\"] == \"other\"\n\n\ndef test_auto_env_expansion():\n    os.environ[\"INTAKE_TEST_VAR\"] = \"oi\"\n    e = LocalCatalogEntry(\n        \"\",\n        \"\",\n        driver,\n        args={\"arg1\": \"{{env(INTAKE_TEST_VAR)}}\"},\n        parameters=[],\n        getenv=False,\n    )\n    s = e()\n\n    # when getenv is False, you pass through the text\n    assert s.kwargs[\"arg1\"] == \"{{env(INTAKE_TEST_VAR)}}\"\n\n    e = LocalCatalogEntry(\n        \"\",\n        \"\",\n        driver,\n        args={\"arg1\": \"{{env(INTAKE_TEST_VAR)}}\"},\n        parameters=[],\n        getenv=True,\n    )\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"oi\"\n\n    # same, but with quoted environment name\n    e = LocalCatalogEntry(\n        \"\",\n        \"\",\n        driver,\n        args={\"arg1\": '{{env(\"INTAKE_TEST_VAR\")}}'},\n        parameters=[],\n        getenv=True,\n    )\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"oi\"\n\n    del os.environ[\"INTAKE_TEST_VAR\"]\n    # Clear the cached source so we can pick up the changed environment variable.\n    e.clear_cached_default_source()\n\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"\"\n\n    e = LocalCatalogEntry(\n        \"\",\n        \"\",\n        driver,\n        args={\"arg1\": \"{{env(INTAKE_TEST_VAR)}}\"},\n        parameters=[],\n        getenv=False,\n    )\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"{{env(INTAKE_TEST_VAR)}}\"\n\n\ndef test_validate_up():\n    up = UserParameter(\"name\", default=1, type=\"int\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up], getenv=False)\n    s = e()  # OK\n    assert s.kwargs[\"arg1\"] == \"1\"\n    with pytest.raises(ValueError):\n        e(name=\"oi\")\n\n    up = UserParameter(\"name\", type=\"int\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up], getenv=False)\n    s = e()  # OK\n    # arg1 is a string: real int gets rendered by jinja\n    assert s.kwargs[\"arg1\"] == \"0\"  # default default for int\n    s = e(arg1=\"something\")\n    assert s.kwargs[\"arg1\"] == \"something\"\n\n\ndef test_validate_par():\n    up = UserParameter(\"arg1\", type=\"int\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"oi\"}, parameters=[up], getenv=False)\n    with pytest.raises(ValueError):\n        e()\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": 1}, parameters=[up], getenv=False)\n    e()  # OK\n\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"1\"}, parameters=[up], getenv=False)\n    s = e()  # OK\n    assert s.kwargs[\"arg1\"] == 1  # a number, not str\n\n\ndef test_mlist_parameter():\n    up = UserParameter(\"\", \"\", \"mlist\", allowed=[\"a\", \"b\"])\n    up.validate([])\n    up.validate([\"b\"])\n    up.validate([\"b\", \"a\"])\n    with pytest.raises(ValueError):\n        up.validate([\"c\"])\n    with pytest.raises(ValueError):\n        up.validate([\"a\", \"c\"])\n    with pytest.raises(ValueError):\n        up.validate(\"hello\")\n\n\ndef test_explicit_overrides():\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"oi\"})\n    s = e(arg1=\"hi\")\n    assert s.kwargs[\"arg1\"] == \"hi\"\n\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"})\n    s = e(name=\"hi\")\n    assert s.kwargs[\"arg1\"] == \"hi\"\n\n    os.environ[\"INTAKE_TEST_VAR\"] = \"another\"\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"oi\"}, getenv=True)\n    s = e(arg1=\"{{env(INTAKE_TEST_VAR)}}\")\n    assert s.kwargs[\"arg1\"] == \"another\"\n\n    up = UserParameter(\"arg1\", type=\"int\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": 1}, parameters=[up])\n    with pytest.raises(ValueError):\n        e(arg1=\"oi\")\n\n    s = e(arg1=\"1\")\n    assert s.kwargs[\"arg1\"] == 1\n\n\ndef test_extra_arg():\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"oi\"})\n    s = e(arg2=\"extra\")\n    assert s.kwargs[\"arg2\"] == \"extra\"\n\n\ndef test_unknown():\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"})\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"\"\n\n    # parameter has no default\n    up = UserParameter(\"name\")\n    e = LocalCatalogEntry(\"\", \"\", driver, args={\"arg1\": \"{{name}}\"}, parameters=[up])\n    s = e()\n    assert s.kwargs[\"arg1\"] == \"\"\n\n\ndef test_catalog_passthrough():\n    root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))\n    cat = intake.open_catalog(os.path.join(root, \"tests/catalog_inherit_params.yml\"))\n    assert set(cat.subcat.user_parameters) == {\"bucket\", \"inner\"}\n    url = cat.subcat.ex2.urlpath\n    assert url == \"test_bucket/test_name\"\n    url = cat.subcat.ex2(bucket=\"hi\", inner=\"ho\").urlpath\n    assert url == \"hi/ho\"\n\n    # test clone\n    cat2 = cat(bucket=\"yet\")\n    url = cat2.subcat(inner=\"another\").ex2.urlpath\n    assert url == \"yet/another\"\n    url = cat2.subcat(inner=\"another\").ex2(bucket=\"hi\").urlpath\n    assert url == \"hi/another\"\n"
  },
  {
    "path": "intake/catalog/tests/test_reload_integration.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\nimport os.path\nimport shutil\nimport tempfile\nimport time\n\nimport pytest\n\nfrom intake import open_catalog\n\nfrom .util import assert_items_equal\n\nTMP_DIR = tempfile.mkdtemp()\nTEST_CATALOG_PATH = [TMP_DIR]\n\nYAML_FILENAME = \"intake_test_catalog.yml\"\nMISSING_PATH = os.path.join(TMP_DIR, \"a\")\n\n\ndef teardown_module(module):\n    try:\n        shutil.rmtree(TMP_DIR)\n    except:\n        pass\n\n\n@pytest.fixture\ndef intake_server_with_config(intake_server):\n    fullname = os.path.join(TMP_DIR, YAML_FILENAME)\n\n    with open(fullname, \"w\") as f:\n        f.write(\n            \"\"\"\nplugins:\n  source:\n    - module: intake.catalog.tests.example1_source\n    - module: intake.catalog.tests.example_plugin_dir.example2_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n\"\"\"\n        )\n\n    time.sleep(1)\n\n    yield intake_server\n    os.remove(fullname)\n\n\ndef test_reload_updated_config(intake_server_with_config):\n    catalog = open_catalog(intake_server_with_config, ttl=0.1)\n\n    entries = list(catalog)\n    assert entries == [\"use_example1\"]\n\n    with open(os.path.join(TMP_DIR, YAML_FILENAME), \"a\") as f:\n        f.write(\n            \"\"\"\n  use_example1_1:\n    description: example1 other\n    driver: example1\n    args: {}\n\"\"\"\n        )\n\n    time.sleep(1.2)\n\n    assert_items_equal(list(catalog), [\"use_example1\", \"use_example1_1\"])\n\n\ndef test_reload_updated_directory(intake_server_with_config):\n    catalog = open_catalog(intake_server_with_config, ttl=0.1)\n\n    orig_entries = list(catalog)\n    assert \"example2\" not in orig_entries\n\n    filename = os.path.join(TMP_DIR, \"intake_test_catalog2.yml\")\n    with open(filename, \"w\") as f:\n        f.write(\n            \"\"\"\nsources:\n  example2:\n    description: source 2\n    driver: csv\n    args:\n        urlpath: none\n        \"\"\"\n        )\n\n    time.sleep(1.2)\n\n    assert_items_equal(list(catalog), [\"example2\"] + orig_entries)\n\n\ndef test_reload_missing_remote_directory(intake_server):\n    try:\n        shutil.rmtree(TMP_DIR)\n    except:\n        pass\n\n    time.sleep(1)\n    catalog = open_catalog(intake_server, ttl=0.1)\n    assert_items_equal(list(catalog), [])\n\n    os.mkdir(TMP_DIR)\n    with open(os.path.join(TMP_DIR, YAML_FILENAME), \"w\") as f:\n        f.write(\n            \"\"\"\nplugins:\n  source:\n    - module: intake.catalog.tests.example1_source\n    - module: intake.catalog.tests.example_plugin_dir.example2_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n        \"\"\"\n        )\n    time.sleep(1.2)\n\n    assert_items_equal(list(catalog), [\"use_example1\"])\n    try:\n        shutil.rmtree(TMP_DIR)\n    except:\n        pass\n\n\ndef test_reload_missing_local_directory(tempdir):\n    catalog = open_catalog(tempdir + \"/*\", ttl=0.1)\n    assert_items_equal(list(catalog), [])\n\n    with open(os.path.join(tempdir, YAML_FILENAME), \"w\") as f:\n        f.write(\n            \"\"\"\nplugins:\n  source:\n    - module: intake.catalog.tests.example1_source\n    - module: intake.catalog.tests.example_plugin_dir.example2_source\nsources:\n  use_example1:\n    description: example1 source plugin\n    driver: example1\n    args: {}\n        \"\"\"\n        )\n\n    time.sleep(1.2)\n    assert \"use_example1\" in catalog\n"
  },
  {
    "path": "intake/catalog/tests/test_utils.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport pandas as pd\nimport pytest\n\nimport intake.catalog.utils as utils\n\n\ndef test_expand_templates():\n    pars = {\"a\": \"{{par}} hi\"}\n    context = {\"b\": 1, \"par\": \"ho\"}\n    assert utils.expand_templates(pars, context)[\"a\"] == \"ho hi\"\n    assert utils.expand_templates(pars, context, True)[1] == {\"b\"}\n\n\ndef test_expand_nested_template():\n    pars = {\"a\": [\"{{par}} hi\"]}\n    context = {\"b\": 1, \"par\": \"ho\"}\n    assert utils.expand_templates(pars, context)[\"a\"] == [\"ho hi\"]\n    assert utils.expand_templates(pars, context, True)[1] == {\"b\"}\n\n    pars = {\"a\": {\"k\": {(\"{{par}} hi\",)}}}\n    context = {\"b\": 1, \"par\": \"ho\"}\n    assert utils.expand_templates(pars, context)[\"a\"] == {\"k\": {(\"ho hi\",)}}\n    assert utils.expand_templates(pars, context, True)[1] == {\"b\"}\n\n\n@pytest.mark.parametrize(\n    \"test_input,expected\",\n    [\n        (None, pd.Timestamp(\"1970-01-01 00:00:00\")),\n        (1, pd.Timestamp(\"1970-01-01 00:00:00.000000001\")),\n        (\"1988-02-24T13:37+0100\", pd.Timestamp(\"1988-02-24 13:37+0100\")),\n        (\n            {\"__datetime__\": True, \"as_str\": \"1988-02-24T13:37+0100\"},\n            pd.Timestamp(\"1988-02-24T13:37+0100\"),\n        ),\n    ],\n)\ndef test_coerce_datetime(test_input, expected):\n    assert utils.coerce_datetime(test_input) == expected\n\n\ndef test_flatten():\n    assert list(utils.flatten([[\"hi\"], [\"oi\"]])) == [\"hi\", \"oi\"]\n\n\n@pytest.mark.parametrize(\n    \"value,dtype,expected\",\n    [\n        (1, \"int\", 1),\n        (\"1\", \"int\", 1),\n        (1, \"str\", \"1\"),\n        ((), \"list\", []),\n        ((1,), \"list\", [1]),\n        ((1,), \"list[str]\", [\"1\"]),\n        (\"{'a': 1}\", \"dict\", {\"a\": 1}),\n        (\"{'a': '1'}\", \"dict\", {\"a\": \"1\"}),\n        (\"[1, 2]\", \"list\", [1, 2]),\n        (\"['1', '2']\", \"list\", [\"1\", \"2\"]),\n        (\"[{'a': '1'}]\", \"list\", [{\"a\": \"1\"}]),\n        (\"{'a': [1, 2]}\", \"dict\", {\"a\": [1, 2]}),\n        (\"{'a': ['1', '2']}\", \"dict\", {\"a\": [\"1\", \"2\"]}),\n        (\"1988-02-24T13:37+0100\", \"datetime\", pd.Timestamp(\"1988-02-24 13:37+0100\")),\n    ],\n)\ndef test_coerce(value, dtype, expected):\n    out = utils.coerce(dtype, value)\n    assert out == expected\n    assert type(out) == type(expected)\n"
  },
  {
    "path": "intake/catalog/tests/test_zarr.py",
    "content": "import os\nimport shutil\nimport tempfile\n\nimport pytest\n\nfrom intake import open_catalog\nfrom intake.catalog.zarr import ZarrGroupCatalog\nfrom intake.source.zarr import ZarrArraySource\n\nfrom .util import assert_items_equal\n\nzarr = pytest.importorskip(\"zarr\")\n\n\n@pytest.fixture\ndef temp_zarr():\n    zarr_path = tempfile.mkdtemp()\n\n    # setup a zarr hierarchy stored on local file system\n    store = zarr.DirectoryStore(zarr_path)\n    root = zarr.open_group(store=store, mode=\"w\")\n    root.attrs[\"title\"] = \"root group\"\n    root.attrs[\"description\"] = \"a test hierarchy\"\n    root.create_group(\"foo\")\n    root[\"foo\"].attrs[\"title\"] = \"foo group\"\n    root[\"foo\"].attrs[\"description\"] = \"a test group\"\n    root.create_group(\"bar\")\n    root.create_dataset(\"baz\", shape=100, dtype=\"i4\")\n    root[\"baz\"].attrs[\"title\"] = \"baz array\"\n    root[\"baz\"].attrs[\"description\"] = \"a test array\"\n    root[\"bar\"].create_group(\"spam\")\n    root[\"bar\"].create_dataset(\"eggs\", shape=10, dtype=\"f4\")\n    zarr.consolidate_metadata(store=store)\n\n    # setup a local YAML file catalog including entries pointing to the zarr\n    # hierarchy\n    yaml_path = tempfile.mkdtemp()\n    catalog_file = os.path.join(yaml_path, \"catalog.yml\")\n    with open(catalog_file, \"w\") as f:\n        f.write(\n            \"\"\"\nsources:\n  root:\n    driver: zarr_cat\n    args:\n      urlpath: {zarr_path}\n      consolidated: True\n  bar:\n    driver: zarr_cat\n    args:\n      urlpath: {zarr_path}\n      component: bar\n  eggs:\n    driver: ndzarr\n    args:\n      urlpath: {zarr_path}\n      component: bar/eggs\n        \"\"\".format(\n                zarr_path=zarr_path\n            )\n        )\n\n    yield zarr_path, store, root, catalog_file\n\n    shutil.rmtree(zarr_path)\n    shutil.rmtree(yaml_path)\n\n\n@pytest.mark.parametrize(\"consolidated\", [False, True])\ndef test_zarr_catalog(temp_zarr, consolidated):\n    import dask.array as da\n\n    path, store, root, _ = temp_zarr\n\n    # test zarr catalog opened directly, with different urlpath argument types\n    for urlpath in path, store, root:\n        # open catalog\n        cat = ZarrGroupCatalog(urlpath=urlpath, consolidated=consolidated)\n        assert isinstance(cat, ZarrGroupCatalog)\n        assert \"catalog\" == cat.container\n\n        # check entries\n        assert_items_equal([\"foo\", \"bar\", \"baz\"], list(cat))\n        assert isinstance(cat[\"foo\"], ZarrGroupCatalog)\n        assert \"catalog\" == cat[\"foo\"].describe()[\"container\"]\n        assert isinstance(cat[\"bar\"], ZarrGroupCatalog)\n        assert \"catalog\" == cat[\"bar\"].describe()[\"container\"]\n        assert isinstance(cat[\"baz\"], ZarrArraySource)\n        assert \"ndarray\" == cat[\"baz\"].describe()[\"container\"]\n\n        # check metadata from attributes\n        assert \"root group\" == cat.metadata[\"title\"]\n        assert \"a test hierarchy\" == cat.metadata[\"description\"]\n        assert \"foo group\" == cat[\"foo\"].metadata[\"title\"]\n        assert \"a test group\" == cat[\"foo\"].metadata[\"description\"]\n        # no attributes\n        assert \"title\" not in cat[\"bar\"].metadata\n        assert \"description\" not in cat[\"bar\"].metadata\n\n        # check nested catalogs\n        assert_items_equal([\"spam\", \"eggs\"], list(cat[\"bar\"]))\n        assert isinstance(cat[\"bar\"][\"spam\"], ZarrGroupCatalog)\n        assert \"catalog\" == cat[\"bar\"][\"spam\"].describe()[\"container\"]\n        assert isinstance(cat[\"bar\"][\"eggs\"], ZarrArraySource)\n        assert \"ndarray\" == cat[\"bar\"][\"eggs\"].describe()[\"container\"]\n\n        # check obtain zarr groups\n        assert isinstance(cat.to_zarr(), zarr.hierarchy.Group)\n        assert isinstance(cat[\"foo\"].to_zarr(), zarr.hierarchy.Group)\n        assert isinstance(cat[\"bar\"].to_zarr(), zarr.hierarchy.Group)\n        assert isinstance(cat[\"bar\"][\"spam\"].to_zarr(), zarr.hierarchy.Group)\n\n        # check obtain dask arrays\n        assert isinstance(cat[\"baz\"].to_dask(), da.Array)\n        assert isinstance(cat[\"bar\"][\"eggs\"].to_dask(), da.Array)\n\n        # open catalog directly from subgroup via `component` arg\n        cat = ZarrGroupCatalog(urlpath, component=\"bar\")\n        assert_items_equal([\"spam\", \"eggs\"], list(cat))\n        assert isinstance(cat[\"spam\"], ZarrGroupCatalog)\n        assert \"catalog\" == cat[\"spam\"].describe()[\"container\"]\n        assert isinstance(cat[\"eggs\"], ZarrArraySource)\n        assert \"ndarray\" == cat[\"eggs\"].describe()[\"container\"]\n\n\ndef test_zarr_entries_in_yaml_catalog(temp_zarr):\n    import dask.array as da\n\n    # open YAML catalog file\n    _, _, _, catalog_file = temp_zarr\n    cat = open_catalog(catalog_file)\n\n    # test entries\n    assert_items_equal([\"root\", \"bar\", \"eggs\"], list(cat))\n\n    # entry pointing to zarr root group\n    assert isinstance(cat[\"root\"], ZarrGroupCatalog)\n    assert_items_equal([\"foo\", \"bar\", \"baz\"], list(cat[\"root\"]))\n    assert \"catalog\" == cat[\"root\"].describe()[\"container\"]\n    assert isinstance(cat[\"root\"].to_zarr(), zarr.hierarchy.Group)\n\n    # entry pointing to zarr sub-group\n    assert isinstance(cat[\"bar\"], ZarrGroupCatalog)\n    assert_items_equal([\"spam\", \"eggs\"], list(cat[\"bar\"]))\n    assert \"catalog\" == cat[\"bar\"].describe()[\"container\"]\n    assert isinstance(cat[\"bar\"].to_zarr(), zarr.hierarchy.Group)\n\n    # entry pointing to zarr array\n    assert isinstance(cat[\"eggs\"], ZarrArraySource)\n    assert \"ndarray\" == cat[\"eggs\"].describe()[\"container\"]\n    assert isinstance(cat[\"eggs\"].to_dask(), da.Array)\n"
  },
  {
    "path": "intake/catalog/tests/util.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source import base, registry\n\n\ndef assert_items_equal(a, b):\n    assert len(a) == len(b) and sorted(a) == sorted(b)\n\n\nclass TestingSource(base.DataSource):\n    \"\"\"A source that gives back whatever parameters were passed to it\"\"\"\n\n    name = \"test\"\n    version = \"0.0.1\"\n    container = \"python\"\n    partition_access = False\n\n    def __init__(self, *args, **kwargs):\n        self.args = args\n        self.kwargs = kwargs\n        super(TestingSource, self).__init__(\"python\")\n        self.npartitions = 1\n\n    def _load_metadata(self):\n        pass\n\n    def _get_partition(self, _):\n        return self.args, self.kwargs\n\n\ndef register():\n    registry[\"test\"] = TestingSource\n"
  },
  {
    "path": "intake/catalog/utils.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport ast\nimport functools\nimport itertools\nimport os\nimport re\nimport shlex\nimport subprocess\nimport sys\nimport warnings\n\n\ndef flatten(iterable):\n    \"\"\"Flatten an arbitrarily deep list\"\"\"\n    # likely not used\n    iterable = iter(iterable)\n    while True:\n        try:\n            item = next(iterable)\n        except StopIteration:\n            break\n\n        if isinstance(item, str):\n            yield item\n            continue\n\n        try:\n            data = iter(item)\n            iterable = itertools.chain(data, iterable)\n        except Exception:\n            yield item\n\n\ndef reload_on_change(f):\n    @functools.wraps(f)\n    def wrapper(self, *args, **kwargs):\n        self.reload()\n        return f(self, *args, **kwargs)\n\n    return wrapper\n\n\ndef clamp(value, lower=0, upper=sys.maxsize):\n    \"\"\"Clamp float between given range\"\"\"\n    return max(lower, min(upper, value))\n\n\ndef _j_getenv(x, default=\"\"):\n    from jinja2 import Undefined\n\n    if isinstance(x, Undefined):\n        x = x._undefined_name\n    if isinstance(default, Undefined):\n        default = default._undefined_name\n    return os.getenv(x, default)\n\n\ndef _j_getshell(x):\n    from jinja2 import Undefined\n\n    if isinstance(x, Undefined):\n        x = x._undefined_name\n    try:\n        return subprocess.check_output(x).decode()\n    except (IOError, OSError):\n        return \"\"\n\n\ndef _j_passthrough(x, funcname):\n    from jinja2 import Undefined\n\n    if isinstance(x, Undefined):\n        x = x._undefined_name\n    return \"{{%s(%s)}}\" % (funcname, x)\n\n\ndef _expand(p, context, all_vars, client, getenv, getshell):\n    from jinja2 import Environment, meta\n\n    if isinstance(p, dict):\n        return {k: _expand(v, context, all_vars, client, getenv, getshell) for k, v in p.items()}\n    elif isinstance(p, (list, tuple, set)):\n        return type(p)(_expand(v, context, all_vars, client, getenv, getshell) for v in p)\n    elif isinstance(p, str):\n        jinja = Environment()\n        if getenv and not client:\n            jinja.globals[\"env\"] = _j_getenv\n        else:\n            jinja.globals[\"env\"] = lambda x: _j_passthrough(x, funcname=\"env\")\n        if getenv and client:\n            jinja.globals[\"client_env\"] = _j_getenv\n        else:\n            jinja.globals[\"client_env\"] = lambda x: _j_passthrough(x, funcname=\"client_env\")\n        if getshell and not client:\n            jinja.globals[\"shell\"] = _j_getshell\n        else:\n            jinja.globals[\"shell\"] = lambda x: _j_passthrough(x, funcname=\"shell\")\n        if getshell and client:\n            jinja.globals[\"client_shell\"] = _j_getshell\n        else:\n            jinja.globals[\"client_shell\"] = lambda x: _j_passthrough(x, funcname=\"client_shell\")\n        ast = jinja.parse(p)\n        all_vars -= meta.find_undeclared_variables(ast)\n        return jinja.from_string(p).render(context)\n    else:\n        # no expansion\n        return p\n\n\ndef expand_templates(pars, context, return_left=False, client=False, getenv=True, getshell=False):\n    \"\"\"\n    Render variables in context into the set of parameters with jinja2.\n\n    For variables that are not strings, nothing happens.\n\n    Parameters\n    ----------\n    pars: dict\n        values are strings containing some jinja2 controls\n    context: dict\n        values to use while rendering\n    return_left: bool\n        whether to return the set of variables in context that were not used\n        in rendering parameters\n\n    Returns\n    -------\n    dict with the same keys as ``pars``, but updated values; optionally also\n    return set of unused parameter names.\n    \"\"\"\n    all_vars = set(context)\n    out = _expand(pars, context, all_vars, client, getenv, getshell)\n    if return_left:\n        return out, all_vars\n    return out\n\n\ndef expand_defaults(default, client=False, getenv=True, getshell=False):\n    \"\"\"Compile env, client_env, shell and client_shell commands\n\n    Execution rules:\n    - env() and shell() execute where the cat is loaded, if getenv and getshell\n      are True, respectively\n    - client_env() and client_shell() execute only if client is True and\n      getenv/getshell are also True.\n\n    If both getenv and getshell are False, this method does nothing.\n\n    If the environment variable is missing or the shell command fails, the\n    output is an empty string.\n    \"\"\"\n    r = re.match(r\"env\\((.*),?(.*)\\)\", default)\n    if r and not client and getenv:\n        gs = r.groups()\n        default = os.environ.get(gs[0], gs[1] if len(gs) > 1 else \"\")\n    r = re.match(r\"client_env\\((.*)\\)\", default)\n    if r and client and getenv:\n        default = os.environ.get(r.groups()[0], \"\")\n    r = re.match(r\"shell\\((.*)\\)\", default)\n    if r and not client and getshell:\n        try:\n            cmd = shlex.split(r.groups()[0])\n            default = subprocess.check_output(cmd).rstrip().decode(\"utf8\")\n        except (subprocess.CalledProcessError, OSError):\n            default = \"\"\n    else:\n        warnings.warn(\"Shell command not executed due to getshell=False\")\n    r = re.match(r\"client_shell\\((.*)\\)\", default)\n    if r and client and getshell:\n        try:\n            cmd = shlex.split(r.groups()[0])\n            default = subprocess.check_output(cmd).rstrip().decode(\"utf8\")\n        except (subprocess.CalledProcessError, OSError):\n            default = \"\"\n    else:\n        warnings.warn(\"Shell command not executed due to getshell=False\")\n    return default\n\n\ndef merge_pars(params, user_inputs, spec_pars, client=False, getenv=True, getshell=False):\n    \"\"\"Produce open arguments by merging various inputs\n\n    This function is called in the context of a catalog entry, when finalising\n    the arguments for instantiating the corresponding data source.\n\n    The three sets of inputs to be considered are:\n    - the arguments section of the original spec (params)\n    - UserParameters associated with the entry (spec_pars)\n    - explicit arguments provided at instantiation time, like entry(arg=value)\n      (user_inputs)\n\n    Both spec_pars and user_inputs can be considered as template variables and\n    used in expanding string values in params.\n\n    The default value of a spec_par, if given, may have embedded env and shell\n    functions, which will be evaluated before use, if the default is used and\n    the corresponding getenv/getsgell are set. Similarly, string value params\n    will also have access to these functions within jinja template groups,\n    as well as full jinja processing.\n\n    Where a key exists in both the spec_pars and the user_inputs, the\n    user_input wins. Where user_inputs contains keys not seen elsewhere, they\n    are regarded as extra kwargs to pass to the data source.\n\n    Where spec pars have the same name as keys in params, their type, max/min\n    and allowed fields are used to validate the final values of the\n    corresponding arguments.\n\n    Parameters\n    ----------\n    params : dict\n        From the entry's original spec\n    user_inputs : dict\n        Provided by the user/calling function\n    spec_pars : list of UserParameters\n        Default and validation instances\n    client : bool\n        Whether this is all running on a client to a remote server - sets\n        which of the env/shell functions are in operation.\n    getenv : bool\n        Whether to allow pulling environment variables. If False, the\n        template blocks will pass through unevaluated\n    getshell : bool\n        Whether or not to allow executing of shell commands. If False, the\n        template blocks will pass through unevaluated\n\n    Returns\n    -------\n    Final parameter dict\n    \"\"\"\n    context = params.copy()\n    for par in spec_pars:\n        val = user_inputs.get(par.name, par.default)\n        if val is not None:\n            if isinstance(val, str):\n                val = expand_defaults(val, getenv=getenv, getshell=getshell, client=client)\n            context[par.name] = par.validate(val)\n    context.update({k: v for k, v in user_inputs.items() if k not in context})\n    out, left = expand_templates(params, context, True, client, getenv, getshell)\n    context = {k: v for k, v in context.items() if k in left}\n    for par in spec_pars:\n        if par.name in context:\n            # coerces to type\n            context[par.name] = par.validate(context[par.name])\n            left.remove(par.name)\n\n    params.update(out)\n    user_inputs = expand_templates(user_inputs, context, False, client, getenv, getshell)\n    params.update({k: v for k, v in user_inputs.items() if k in left})\n    params.pop(\"CATALOG_DIR\", None)\n    for k, v in params.copy().items():\n        # final validation/coersion\n        for sp in [p for p in spec_pars if p.name == k]:\n            params[k] = sp.validate(params[k])\n\n    return params\n\n\ndef coerce_datetime(v=None):\n    import pandas\n\n    if not v:\n        v = 0\n\n    try:\n        iter(v)\n    except TypeError:  # not iterable\n        pass\n    else:\n        if \"__datetime__\" in v:\n            v = v[\"as_str\"]\n\n    return pandas.to_datetime(v)\n\n\ndef with_str_parse(value, rule):\n    import ast\n\n    if isinstance(value, str) and rule not in [str, coerce_datetime]:\n        try:\n            value = ast.literal_eval(value)\n        except (ValueError, TypeError, RuntimeError):\n            pass\n\n    return rule(value)\n\n\nCOERCION_RULES = {\n    \"bool\": bool,\n    \"datetime\": coerce_datetime,\n    \"dict\": dict,\n    \"float\": float,\n    \"tuple\": tuple,\n    \"mlist\": list,\n    \"int\": int,\n    \"list\": list,\n    \"str\": str,\n    \"unicode\": str,\n    \"other\": lambda x: x,\n}\n\n\ndef coerce(dtype, value):\n    \"\"\"\n    Convert a value to a specific type.\n\n    If the value is already the given type, then the original value is\n    returned. If the value is None, then the default value given by the\n    type constructor is returned. Otherwise, the type constructor converts\n    and returns the value.\n    \"\"\"\n    if \"[\" in dtype:\n        dtype, inner = dtype.split(\"[\")\n        inner = inner.rstrip(\"]\")\n    else:\n        inner = None\n    if dtype is None:\n        return value\n    if type(value).__name__ == dtype:\n        return value\n    if dtype == \"mlist\":\n        if isinstance(value, (tuple, set, dict)):\n            return list(value)\n        if isinstance(value, str):\n            try:\n                value = ast.literal_eval(value)\n                return list(value)\n            except ValueError as e:\n                raise ValueError(\"Failed to coerce string to list\") from e\n        return value\n    op = COERCION_RULES[dtype]\n    out = op() if value is None else with_str_parse(value, op)\n    if isinstance(out, dict) and inner is not None:\n        # TODO: recurse into coerce here, to allow list[list[str]] and such?\n        out = {k: COERCION_RULES[inner](v) for k, v in out.items()}\n    if isinstance(out, (tuple, list, set)) and inner is not None:\n        out = op(COERCION_RULES[inner](v) for v in out)\n    return out\n\n\nclass RemoteCatalogError(Exception):\n    pass\n\n\ndef _has_catalog_dir(args):\n    \"\"\"Check is any value in args dict needs CATALOG_DIR variable\"\"\"\n    from jinja2 import Environment, meta\n\n    env = Environment()\n    for k, arg in args.items():\n        parsed_content = env.parse(arg)\n        vars = meta.find_undeclared_variables(parsed_content)\n        if \"CATALOG_DIR\" in vars:\n            return True\n    return False\n"
  },
  {
    "path": "intake/catalog/zarr.py",
    "content": "from .base import Catalog\nfrom .local import LocalCatalogEntry\n\n\nclass ZarrGroupCatalog(Catalog):\n    \"\"\"A catalog of the members of a Zarr group.\"\"\"\n\n    version = \"0.0.1\"\n    container = \"catalog\"\n    partition_access = None\n    name = \"zarr_cat\"\n\n    def __init__(\n        self,\n        urlpath,\n        storage_options=None,\n        component=None,\n        metadata=None,\n        consolidated=False,\n        name=None,\n    ):\n        \"\"\"\n\n        Parameters\n        ----------\n        urlpath : str\n            Location of data file(s), possibly including protocol information\n        storage_options : dict, optional\n            Passed on to storage backend for remote files\n        component : str, optional\n            If None, build a catalog from the root group. If given, build the\n            catalog from the group at this location in the hierarchy.\n        metadata : dict, optional\n            Catalog metadata. If not provided, will be populated from Zarr\n            group attributes.\n        consolidated : bool, optional\n            If True, assume Zarr metadata has been consolidated.\n        \"\"\"\n        self._urlpath = urlpath\n        self._storage_options = storage_options or {}\n        self._component = component\n        self._consolidated = consolidated\n        self._grp = None\n        self.name = name\n        super().__init__(metadata=metadata)\n\n    def _load(self):\n        import zarr\n\n        if self._grp is None:\n            # obtain the zarr root group\n            if isinstance(self._urlpath, zarr.hierarchy.Group):\n                # use already-opened group, allows support for nested groups\n                # as catalogs\n                root = self._urlpath\n\n            else:\n                # obtain store\n                if isinstance(self._urlpath, str):\n                    # open store from url\n                    from fsspec import get_mapper\n\n                    store = get_mapper(self._urlpath, **self._storage_options)\n                else:\n                    # assume store passed directly\n                    store = self._urlpath\n\n                # open root group\n                if self._consolidated:\n                    # use consolidated metadata\n                    root = zarr.open_consolidated(store=store, mode=\"r\")\n                else:\n                    root = zarr.open_group(store=store, mode=\"r\")\n\n            # deal with component path\n            if self._component is None:\n                self._grp = root\n            else:\n                self._grp = root[self._component]\n\n            # use zarr attributes as metadata\n            self.metadata.update(self._grp.attrs.asdict())\n\n        # build catalog entries\n        entries = {}\n        for k, v in self._grp.items():\n            if isinstance(v, zarr.core.Array):\n                entry = LocalCatalogEntry(\n                    name=k,\n                    description=\"\",\n                    driver=\"ndzarr\",\n                    args=dict(urlpath=v),\n                    catalog=self,\n                )\n            else:\n                entry = LocalCatalogEntry(\n                    name=k, description=\"\", driver=\"zarr_cat\", args=dict(urlpath=v)\n                )\n            entries[k] = entry\n        self._entries = entries\n\n    def to_zarr(self):\n        return self._grp\n"
  },
  {
    "path": "intake/config.py",
    "content": "\"\"\"Intake config manipulations and persistence\"\"\"\n\n# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport ast\nimport contextlib\nimport copy\nimport logging\nimport os\nimport posixpath\nimport warnings\nfrom os.path import expanduser\n\nimport yaml\nfrom fsspec.implementations.local import make_path_posix\n\nfrom intake.utils import yaml_load\n\nlogger = logging.getLogger(\"intake\")\n\nconfdir = make_path_posix(os.getenv(\"INTAKE_CONF_DIR\", os.path.join(expanduser(\"~\"), \".intake\")))\n\n\ndefaults = {\n    \"logging\": \"INFO\",\n    \"catalog_path\": [],\n    \"allow_import\": True,\n    \"allow_templates\": True,\n    \"allow_pickle\": False,\n    \"import_on_startup\": True,\n    \"extra_imports\": [],\n    \"import_block_list\": [],\n    \"reader_avoid\": [],\n    \"environment_conf_parse\": \"ignore\",  # \"error\", \"warn\", \"ignore\"\n}\n\n\ndef cfile():\n    return make_path_posix(os.getenv(\"INTAKE_CONF_FILE\", posixpath.join(confdir, \"conf.yaml\")))\n\n\nclass Config(dict):\n    \"\"\"Intake's dict-like config system\n\n    Instance ``intake.conf`` is globally used throughout the package\n\n    Attributes:\n        environment_conf_parse : str\n                                 \"ignore\" (default), \"warn\" or raise an \"error\"\n                                 when parsing local environment variables as strings.\n    \"\"\"\n\n    def __init__(self, filename=None, **kwargs):\n        self.filename = filename if filename is not None else cfile()\n        self.reload_all()\n        self.temp = None\n        super().__init__(**kwargs)\n        logger.setLevel(self[\"logging\"])\n\n    def reset(self):\n        \"\"\"Set conf values back to defaults\"\"\"\n        self.clear()\n        self.update(defaults)\n\n    def save(self, fn=None):\n        \"\"\"Save current configuration to file as YAML\n\n        Uses ``self.filename`` for target location\n        \"\"\"\n        # TODO: fsspec?\n        fn = fn or self.filename\n        if fn is False:\n            return\n        try:\n            os.makedirs(os.path.dirname(fn))\n        except (OSError, IOError):\n            pass\n        with open(fn, \"w\") as f:\n            yaml.dump(dict(self), f)\n\n    @contextlib.contextmanager\n    def _unset(self, temp):\n        yield\n        self.clear()\n        self.update(temp)\n\n    def set(self, update_dict=None, **kw):\n        \"\"\"Change config values within a context or for the session\n\n        values: dict\n            This can be deeply nested to set only leaf values\n\n        See also: ``intake.readers.utils.nested_keys_to_dict``\n\n        Examples\n        --------\n\n        Value resets after context ends\n\n        >>> with intake.conf.set(mybval=5):\n        ...     ...\n\n        Set for whole session\n\n        >>> intake.conf.set(myval=5)\n\n        Set only a single leaf value within a nested dict\n\n        >>> intake.conf.set(intake.readers.utils.nested_keys_to_dict({\"deep.2.key\": True})\n        \"\"\"\n        temp = copy.deepcopy(self)\n        if update_dict:\n            kw.update(update_dict)\n        from intake.readers.utils import merge_dicts\n\n        self.update(merge_dicts(self, kw))\n        return self._unset(temp)\n\n    def __getitem__(self, item):\n        if item in self:\n            return super().__getitem__(item)\n        elif item in defaults:\n            return defaults[item]\n        else:\n            raise KeyError(item)\n\n    def get(self, key, default=None):\n        if key in self:\n            return super().__getitem__(key)\n        return default\n\n    def reload_all(self):\n        self.reset()\n        self.load()\n        self.load_env()\n\n    def load(self, fn=None):\n        \"\"\"Update global config from YAML file\n\n        If fn is None, looks in global config directory, which is either defined\n        by the INTAKE_CONF_DIR env-var or is ~/.intake/ .\n        \"\"\"\n        # TODO: if fn is not None, set self.filename\n        # TODO: fsspec?\n        fn = fn or self.filename\n\n        if os.path.isfile(fn):\n            with open(fn) as f:\n                try:\n                    self.update(yaml_load(f))\n                except Exception as e:\n                    logger.warning('Failure to load config file \"{fn}\": {e}' \"\".format(fn=fn, e=e))\n\n    def load_env(self):\n        \"\"\"Analyse environment variables and update conf accordingly\"\"\"\n        # environment variables take precedence over conf file\n        for k, v in os.environ.items():\n            if k.startswith(\"INTAKE_\"):\n                k2 = k[7:].lower()\n                try:\n                    val = ast.literal_eval(v)\n                except (ValueError, SyntaxError) as e:\n                    if self[\"environment_conf_parse\"] == \"error\":\n                        logger.info(\n                            f\"Environment variable '{v}' cannot be parsed by ast.literal_eval()\"\n                        )\n                        raise e\n                    elif self[\"environment_conf_parse\"] == \"warn\":\n                        warnings.warn(\n                            f\"Failed to parse environment variable '{k}' (value: '{v}') using ast.literal_eval().\"\n                            f\"\\nParsing as string.\"\n                            f\"\\nError was: '{type(e).__name__}: {e}'\"\n                        )\n                    val = str(v)  # make sure it is a string\n                self[k2] = val\n\n\ndef intake_path_dirs(path):\n    \"\"\"Return a list of directories from the intake path.\n\n    If a string, perhaps taken from an environment variable, then the\n    list of paths will be split on the character \":\" for posix of \";\" for\n    windows. Protocol indicators (\"protocol://\") will be ignored.\n    \"\"\"\n    if isinstance(path, (list, tuple)):\n        return path\n    import re\n\n    pattern = re.compile(\";\" if os.name == \"nt\" else r\"(?<!:):(?![:/])\")\n    return pattern.split(path)\n\n\nconf = Config()\nconf.reload_all()\nsave_conf = conf.save\nload_cond = conf.load\n"
  },
  {
    "path": "intake/conftest.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\nimport posixpath\nimport tempfile\n\nimport pytest\n\nfrom intake import config, open_catalog, register_driver\nfrom intake.source.base import DataSource, Schema\nfrom intake.tests.test_utils import copy_test_file\n\nhere = os.path.dirname(__file__)\n\n\nclass TestSource(DataSource):\n    name = \"test\"\n    container = \"python\"\n\n    def __init__(self, **kwargs):\n        self.test_kwargs = kwargs\n        super().__init__()\n\n    def _get_schema(self):\n        return Schema()\n\n\nregister_driver(\"test\", TestSource)\n\n\n@pytest.fixture\ndef tmp_config_path(tmp_path):\n    from fsspec.implementations.local import make_path_posix\n\n    key = \"INTAKE_CONF_FILE\"\n    original = os.getenv(key)\n    temp_config_path = make_path_posix(os.path.join(tmp_path, \"test_config.yml\"))\n    os.environ[key] = temp_config_path\n    assert config.cfile() == temp_config_path\n    yield temp_config_path\n    config.conf.reset()\n    if original:\n        os.environ[key] = original\n    else:\n        del os.environ[key]\n    assert config.cfile() != temp_config_path\n\n\n@pytest.fixture(scope=\"function\")\ndef env(temp_cache, tempdir):\n    import intake\n\n    env = os.environ.copy()\n    env[\"INTAKE_CONF_DIR\"] = intake.config.confdir\n    env[\"INTAKE_CACHE_DIR\"] = intake.config.conf[\"cache_dir\"]\n    return env\n\n\n@pytest.fixture\ndef inherit_params_cat():\n    with tempfile.TemporaryDirectory() as tmp_dir:\n        tmp_path = posixpath.join(tmp_dir, \"intake\")\n        target_catalog = copy_test_file(\"catalog_inherit_params.yml\", tmp_path)\n        return open_catalog(target_catalog)\n\n\n@pytest.fixture\ndef inherit_params_multiple_cats():\n    with tempfile.TemporaryDirectory() as tmp_dir:\n        tmp_path = posixpath.join(tmp_dir, \"intake\")\n        copy_test_file(\"catalog_inherit_params.yml\", tmp_path)\n        copy_test_file(\"catalog_nested_sub.yml\", tmp_path)\n        return open_catalog(tmp_path + \"/*.yml\")\n\n\n@pytest.fixture\ndef inherit_params_subcat():\n    with tempfile.TemporaryDirectory() as tmp_dir:\n        tmp_path = posixpath.join(tmp_dir, \"intake\")\n        target_catalog = copy_test_file(\"catalog_inherit_params.yml\", tmp_path)\n        copy_test_file(\"catalog_nested_sub.yml\", tmp_path)\n        return open_catalog(target_catalog)\n"
  },
  {
    "path": "intake/container/__init__.py",
    "content": "def register_container(*_, **__):\n    ...\n"
  },
  {
    "path": "intake/container/base.py",
    "content": "class RemoteSource:\n    ...\n\n\ndef get_partition(*_, **__):\n    pass\n"
  },
  {
    "path": "intake/interface/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nfrom packaging.version import Version\n\ngl = globals()\n\n\ndef do_import():\n    try:\n        import hvplot\n        import panel as pn\n\n        error = Version(pn.__version__) < Version(\"1\") or Version(hvplot.__version__) < Version(\n            \"0.8.1\"\n        )\n    except ImportError:\n        error = True\n\n    if error:\n        raise RuntimeError(\n            \"Please install panel and hvplot to use the GUI\\n\"\n            \"`conda install -c conda-forge 'panel>=1' 'hvplot>=0.8.1'`\"\n        )\n\n    from .gui import GUI\n\n    css = \"\"\"\n    .scrolling {\n      overflow: scroll;\n    }\n    \"\"\"\n    pn.config.raw_css.append(css)  # add scrolling class from css (panel GH#383, GH#384)\n    ex = pn.extension(\"codeeditor\", template=\"fast\")\n    gl[\"instance\"] = GUI()\n    return ex\n\n\ndef __getattr__(attr):\n    if attr in {\"instance\", \"gui\"}:\n        do_import()\n    return gl[attr]\n\n\ndef output_notebook(*_, **__):\n    \"\"\"\n    Load the notebook extension\n    \"\"\"\n    return do_import()\n"
  },
  {
    "path": "intake/interface/base.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport os\nfrom collections import OrderedDict\n\nimport panel as pn\n\nMAX_WIDTH = 5000\n\nhere = os.path.abspath(os.path.dirname(__file__))\nICONS = {\n    \"logo\": os.path.join(here, \"icons\", \"logo.png\"),\n    \"error\": os.path.join(here, \"icons\", \"baseline-error-24px.svg\"),\n}\n\n\ndef enable_widget(widget, enable=True):\n    \"\"\"Set disabled on widget\"\"\"\n    widget.disabled = not enable\n\n\ndef coerce_to_list(items, preprocess=None):\n    \"\"\"Given an instance or list, coerce to list.\n\n    With optional preprocessing.\n    \"\"\"\n    if not isinstance(items, list):\n        items = [items]\n    if preprocess:\n        items = list(map(preprocess, items))\n    return items\n\n\nclass Base(object):\n    \"\"\"\n    Base class for composable panel objects that make up the GUI.\n\n    Parameters\n    ----------\n    children: list of panel objects\n        children that will be used to populate the panel when visible\n    panel: panel layout object\n        instance of a panel layout (row or column) that contains children\n        when visible\n    watchers: list of param watchers\n        watchers that are set on children - cleaned up when visible\n        is set to false.\n    visible: bool\n        whether or not the instance should be visible. When not visible\n        ``panel`` is empty.\n    logo : bool, opt\n        whether to show the intake logo in a panel to the left of the main\n        panel. Default is False\n    \"\"\"\n\n    children = None\n    panel = None\n    watchers = None\n    visible_callback = None\n    logo_panel = pn.Column(\n        pn.pane.PNG(ICONS[\"logo\"], align=\"center\"), margin=(25, 0, 0, 0), width=50\n    )\n    logo = False\n\n    def __init__(self, visible=True, visible_callback=None, logo=False):\n        self.visible = visible\n        self.visible_callback = visible_callback\n        self.logo = logo\n\n    @property\n    def panel(self):\n        if not self.logo:\n            return self._panel\n        return pn.Row(self.logo_panel, self._panel, margin=0)\n\n    @panel.setter\n    def panel(self, panel):\n        self._panel = panel\n\n    def servable(self, *args, **kwargs):\n        return self.panel.servable(*args, **kwargs)\n\n    def show(self, *args, **kwargs):\n        return self.panel.show(*args, **kwargs)\n\n    def __repr__(self):\n        \"\"\"Print output\"\"\"\n        return self.panel.__repr__()\n\n    def _repr_mimebundle_(self, *args, **kwargs):\n        \"\"\"Display in a notebook or a server\"\"\"\n        try:\n            return self.panel._repr_mimebundle_(*args, **kwargs)\n        except Exception:\n            raise NotImplementedError(\"Panel does not seem to be set \" \"up properly\")\n\n    def setup(self):\n        \"\"\"Should instantiate widgets, set ``children``, and set watchers\"\"\"\n        raise NotImplementedError\n\n    @property\n    def visible(self):\n        \"\"\"Whether or not the instance should be visible.\"\"\"\n        return self._visible\n\n    @visible.setter\n    def visible(self, visible):\n        \"\"\"When visible changed, do setup or unwatch and call visible_callback\"\"\"\n        self._visible = visible\n        pan = getattr(self._panel, \"_layout\", self._panel)\n        if visible and len(pan.objects) == 0:\n            self.setup()\n            self._panel.extend(self.children)\n        elif not visible and len(pan.objects) > 0:\n            self.unwatch()\n            pan.clear()\n        if self.visible_callback:\n            self.visible_callback(visible)\n\n    def unwatch(self):\n        \"\"\"Get rid of any lingering watchers and remove from list\"\"\"\n        if self.watchers is not None:\n            unwatched = []\n            for watcher in self.watchers:\n                watcher.inst.param.unwatch(watcher)\n                unwatched.append(watcher)\n            self.watchers = [w for w in self.watchers if w not in unwatched]\n\n    def __getstate__(self):\n        \"\"\"Serialize the current state of the object\"\"\"\n        return {\"visible\": self.visible}\n\n    def __setstate__(self, state):\n        \"\"\"Set the current state of the object from the serialized version.\n        Works inplace. See ``__getstate__`` to get serialized version and\n        ``from_state`` to create a new object.\"\"\"\n        self.visible = state.get(\"visible\", True)\n        return self\n\n    @classmethod\n    def from_state(cls, state):\n        \"\"\"Create a new object from a serialized exising object.\n\n        Examples\n        --------\n        original = cls()\n        copy = cls.from_state(original.__getstate__())\n        \"\"\"\n        return cls().__setstate__(state)\n\n\nclass BaseSelector(Base):\n    \"\"\"Base class for capturing selector logic.\n\n    Parameters\n    ----------\n    preprocess: function\n        run on every input value when creating options\n    widget: panel widget\n        selector widget which this class keeps uptodate with class properties\n    \"\"\"\n\n    preprocess = None\n    widget = None\n\n    @property\n    def labels(self):\n        \"\"\"Labels of items in widget\"\"\"\n        return self.widget.labels\n\n    @property\n    def items(self):\n        \"\"\"Available items to select from\"\"\"\n        return self.widget.values\n\n    @items.setter\n    def items(self, items):\n        \"\"\"When setting items make sure widget options are uptodate\"\"\"\n        if items is not None:\n            self.options = items\n\n    def _create_options(self, items):\n        \"\"\"Helper method to create options from list, or instance.\n\n        Applies preprocess method if available to create a uniform\n        output\n        \"\"\"\n        return OrderedDict(map(lambda x: (x.name, x), coerce_to_list(items, self.preprocess)))\n\n    @property\n    def options(self):\n        \"\"\"Options available on the widget\"\"\"\n        return self.widget.options\n\n    @options.setter\n    def options(self, new):\n        \"\"\"Set options from list, or instance of named item\n\n        Over-writes old options\n        \"\"\"\n        options = self._create_options(new)\n        if self.widget.value:\n            self.widget.set_param(options=options, value=list(options.values())[:1])\n        else:\n            self.widget.options = options\n            self.widget.value = list(options.values())[:1]\n\n    def add(self, items):\n        \"\"\"Add items to options\"\"\"\n        options = self._create_options(items)\n        for k, v in options.items():\n            if k in self.labels and v not in self.items:\n                options.pop(k)\n                count = 0\n                while f\"{k}_{count}\" in self.labels:\n                    count += 1\n                options[f\"{k}_{count}\"] = v\n        self.widget.options.update(options)\n        self.widget.param.trigger(\"options\")\n        self.widget.value = list(options.values())[:1]\n\n    def remove(self, items):\n        \"\"\"Remove items from options\"\"\"\n        items = coerce_to_list(items)\n        new_options = {k: v for k, v in self.options.items() if v not in items}\n        self.widget.options = new_options\n        self.widget.param.trigger(\"options\")\n\n    @property\n    def selected(self):\n        \"\"\"Value selected on the widget\"\"\"\n        ops = self.widget.options\n        return [\n            val\n            for val in self.widget.value\n            if (val in ops.values() if isinstance(ops, dict) else val in ops)\n        ]\n\n    @selected.setter\n    def selected(self, new):\n        \"\"\"Set selected from list or instance of object or name.\n\n        Over-writes existing selection\n        \"\"\"\n\n        def preprocess(item):\n            if isinstance(item, str):\n                return self.options[item]\n            return item\n\n        items = coerce_to_list(new, preprocess)\n        self.widget.value = items\n\n\nclass BaseView(Base):\n    def __getstate__(self, include_source=True):\n        \"\"\"Serialize the current state of the object. Set include_source\n        to False when using with another panel that will include source.\"\"\"\n        if include_source:\n            return {\n                \"visible\": self.visible,\n                \"label\": self.source._name,\n                \"source\": self.source.__getstate__(),\n            }\n        else:\n            return {\"visible\": self.visible}\n\n    def __setstate__(self, state):\n        \"\"\"Set the current state of the object from the serialized version.\n        Works inplace. See ``__getstate__`` to get serialized version and\n        ``from_state`` to create a new object.\"\"\"\n        if \"source\" in state:\n            self.source = state[\"source\"]\n        self.visible = state.get(\"visible\", True)\n\n    @property\n    def source(self):\n        return self._source\n\n    @source.setter\n    def source(self, source):\n        \"\"\"When the source gets updated, update the select widget\"\"\"\n        if isinstance(source, list):\n            # if source is a list, get first item or None\n            source = source[0] if len(source) > 0 else None\n        self._source = source\n"
  },
  {
    "path": "intake/interface/catalog/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n"
  },
  {
    "path": "intake/interface/catalog/add.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport ast\nimport os\nfrom functools import partial\n\nimport fsspec\nimport panel as pn\nfrom fsspec.registry import known_implementations\n\nimport intake\n\nfrom ..base import MAX_WIDTH, Base, enable_widget\n\n\nclass FileSelector(Base):\n    \"\"\"\n    Panel interface for picking files\n\n    The current path is stored in .path and the current selection is stored in\n    .url.\n\n    Parameters\n    ----------\n    filters: list of string\n        extentions that are included in the list of files - correspond to\n        catalog extensions.\n    done_callback: func, opt\n        called when the object's main job has completed. In this case,\n        selecting a file.\n\n    Attributes\n    ----------\n    url: str\n        path to local catalog file\n    children: list of panel objects\n        children that will be used to populate the panel when visible\n    panel: panel layout object\n        instance of a panel layout (row or column) that contains children\n        when visible\n    watchers: list of param watchers\n        watchers that are set on children - cleaned up when visible\n        is set to false.\n    \"\"\"\n\n    def __init__(self, filters=[\"yaml\", \"yml\"], done_callback=None, **kwargs):\n        self.filters = filters\n        self.panel = pn.Column(name=\"Local\", width_policy=\"max\", margin=0)\n        self.done_callback = done_callback\n        self.fs = fsspec.filesystem(\"file\")\n        super().__init__(**kwargs)\n\n    def setup(self):\n        self.path_text = pn.widgets.TextInput(value=os.getcwd() + \"/\", width_policy=\"max\")\n        self.protocol = pn.widgets.Select(\n            options=list(sorted(known_implementations)), value=\"file\", name=\"protocol\"\n        )\n        self.storage_options = pn.widgets.TextInput(name=\"kwargs\", value=\"{}\")\n        self.go = pn.widgets.Button(name=\"⇨\")\n        self.validator = pn.pane.SVG(None, width=25)\n        self.main = pn.widgets.MultiSelect(size=15, width_policy=\"max\")\n        self.home = pn.widgets.Button(name=\"🏠\", width=40, height=30)\n        self.up = pn.widgets.Button(name=\"‹\", width=30, height=30)\n\n        self.make_options()\n\n        self.watchers = [\n            self.go.param.watch(self.go_clicked, \"clicks\"),\n            self.protocol.param.watch(self.protocol_changed, \"value\"),\n            # self.path_text.param.watch(self.validate, ['value']),\n            # self.path_text.param.watch(self.make_options, ['value']),\n            self.home.param.watch(self.go_home, \"clicks\"),\n            self.up.param.watch(self.move_up, \"clicks\"),\n            self.main.param.watch(self.move_down, [\"value\"]),\n        ]\n\n        self.children = [\n            pn.Row(self.protocol, self.storage_options),\n            pn.Row(self.home, self.up, self.path_text, self.go, margin=0),\n            self.main,\n        ]\n\n    def protocol_changed(self, *_):\n        self.path_text.value = \"\"\n        self.main.options = []\n        self.main.value = []\n\n    def go_clicked(self, *_):\n        self.fs = fsspec.filesystem(\n            self.protocol.value, **ast.literal_eval(self.storage_options.value)\n        )\n        self.make_options()\n\n    @property\n    def path(self):\n        return self.path_text.value\n\n    @property\n    def url(self):\n        \"\"\"Path to local catalog file\"\"\"\n        return self.protocol.value + \"://\" + os.path.join(self.path, self.main.value[0])\n\n    def move_up(self, arg=None):\n        self.path_text.value = self.fs._parent(self.path_text.value)\n        self.make_options()\n\n    def go_home(self, arg=None):\n        self.protocol.value = \"file\"\n        self.path_text.value = os.getcwd() + os.path.sep\n\n    def make_options(self, arg=None):\n        if self.done_callback:\n            self.done_callback(False)\n        out = []\n        try:\n            for f in self.fs.ls(self.path, True):\n                bn = os.path.basename(f[\"name\"].rstrip(\"/\"))\n                if bn.startswith(\".\"):\n                    continue\n                elif f[\"type\"] == \"directory\":\n                    out.append(bn + \"/\")\n                elif not self.filters or any(bn.endswith(ext) for ext in self.filters):\n                    out.append(bn)\n        except Exception as e:\n            print(e)\n\n        self.main.value = []\n        self.main.options = sorted(out)\n\n    def move_down(self, *events):\n        for event in events:\n            if event.name == \"value\" and len(event.new) > 0:\n                fn = event.new[0]\n                if fn.endswith(\"/\"):\n                    if self.path_text.value:\n                        self.path_text.value = os.path.join(self.path_text.value, fn)\n                    else:\n                        self.path_text.value = fn\n                    self.make_options()\n                elif self.done_callback:\n                    self.done_callback(True)\n\n    def __getstate__(self):\n        \"\"\"Serialize the current state of the object.\"\"\"\n        return {\"path\": self.path, \"selected\": self.main.value}\n\n    def __setstate__(self, state):\n        \"\"\"Set the current state of the object from the serialized version.\n        Works inplace. See ``__getstate__`` to get serialized version and\n        ``from_state`` to create a new object.\"\"\"\n        self.path_text.value = state[\"path\"]\n        self.main.value = state[\"selected\"]\n        return self\n\n\nclass URLSelector(Base):\n    \"\"\"\n    Panel interface for inputting a URL to a remote catalog\n\n    The inputted URL is stored in .url.\n\n    Attributes\n    ----------\n    url: str\n        url to remote files (including protocol)\n    children: list of panel objects\n        children that will be used to populate the panel when visible\n    panel: panel layout object\n        instance of a panel layout (row or column) that contains children\n        when visible\n    watchers: list of param watchers\n        watchers that are set on children - cleaned up when visible\n        is set to false.\n    \"\"\"\n\n    def __init__(self, **kwargs):\n        self.panel = pn.Row(name=\"URL\", width_policy=\"max\", margin=0)\n        super().__init__(**kwargs)\n\n    def setup(self):\n        self.main = pn.widgets.TextInput(placeholder=\"Full URL with protocol\", width_policy=\"max\")\n        self.children = [\"URL:\", self.main]\n\n    @property\n    def url(self):\n        \"\"\"URL to remote files (including protocol)\"\"\"\n        return self.main.value\n\n    def __getstate__(self):\n        \"\"\"Serialize the current state of the object.\"\"\"\n        return {\"url\": self.url}\n\n    def __setstate__(self, state):\n        \"\"\"Set the current state of the object from the serialized version.\n        Works inplace. See ``__getstate__`` to get serialized version and\n        ``from_state`` to create a new object.\"\"\"\n        self.main.value = state[\"url\"]\n        return self\n\n\nclass CatAdder(Base):\n    \"\"\"Panel for adding new cats from local file or remote\n\n    Parameters\n    ----------\n    done_callback: function with cat as input\n        function that is called when the \"Add Catalog\" button is clicked.\n\n    Attributes\n    ----------\n    cat_url: str\n        url to remote files or path to local files. Depends on active tab\n    cat: catalog\n        catalog object initialized from from cat_url\n    children: list of panel objects\n        children that will be used to populate the panel when visible\n    panel: panel layout object\n        instance of a panel layout (row or column) that contains children\n        when visible\n    watchers: list of param watchers\n        watchers that are set on children - cleaned up when visible\n        is set to false.\n    \"\"\"\n\n    tabs = None\n\n    def __init__(self, done_callback=None, **kwargs):\n        self.done_callback = done_callback\n        self.panel = pn.Column(\n            name=\"Add Catalog\", width_policy=\"max\", max_width=MAX_WIDTH, margin=0\n        )\n        self.widget = pn.widgets.Button(name=\"Add Catalog\", disabled=True, width_policy=\"min\")\n        self.fs = FileSelector(done_callback=partial(enable_widget, self.widget))\n        self.url = URLSelector()\n        super().__init__(**kwargs)\n\n    def setup(self):\n        self.selectors = [self.fs, self.url]\n        self.tabs = pn.Tabs(*map(lambda x: x.panel, self.selectors))\n\n        self.watchers = [\n            self.widget.param.watch(self.add_cat, \"clicks\"),\n        ]\n\n        self.children = [self.tabs, self.widget]\n\n    @property\n    def cat_url(self):\n        \"\"\"URL to remote files or path to local files. Depends on active tab.\"\"\"\n        url = self.selectors[self.tabs.active].url\n        if self.selectors[self.tabs.active] is self.fs:\n            fs = self.fs.fs\n        else:\n            fs = None\n        return url, fs\n\n    @property\n    def cat(self):\n        \"\"\"Catalog object initialized from from cat_url\"\"\"\n        # might want to do some validation in here\n        url, fs = self.cat_url\n        if fs:\n            return intake.open_catalog(url, fs=fs)\n        else:\n            return intake.open_catalog(url)\n\n    def add_cat(self, arg=None):\n        \"\"\"Add cat and close panel\"\"\"\n        try:\n            self.done_callback(self.cat)\n            self.panel.visible = False\n        except Exception as e:\n            raise e\n"
  },
  {
    "path": "intake/interface/catalog/search.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport panel as pn\n\n\nclass Search:\n    \"\"\"Search panel for searching a list of catalogs\n\n    Parameters\n    ----------\n    done_callback: function with cats as input\n        function that is called when new cats have been generated\n        via the search functionality\n    \"\"\"\n\n    def __init__(self, done_callback: callable):\n        self.done_callback = done_callback\n        self.tinput = pn.widgets.TextInput()\n        ok = pn.widgets.Button(name=\"OK\")\n        ok.on_click(self.go)\n        label = pn.widgets.StaticText(value=\"Search\")\n        self.panel = pn.Row(label, self.tinput, ok)\n\n    def go(self, *_):\n        if self.tinput.value:\n            self.done_callback(self.tinput.value)\n"
  },
  {
    "path": "intake/interface/gui.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport panel as pn\n\nimport intake\nfrom intake.interface.base import ICONS\nfrom intake.interface.catalog.add import CatAdder\nfrom intake.interface.catalog.search import Search\nfrom intake.interface.source import defined_plots\n\n\nclass GUI:\n    \"\"\"\n    Top level GUI panel\n\n    This class is responsible for coordinating the inputs and outputs\n    of various sup-panels and their effects on each other.\n\n    Parameters\n    ----------\n    cats: dict of catalogs\n        catalogs used to initalize the cat panel, {display_name: cat_object}\n\n    \"\"\"\n\n    def __init__(self, cats=None):\n        # state\n        self._children = {}  # cat name in the selector to child catalogs' names: cat objects\n        # mapping of name in the selector to catalog object\n        self._cats = cats or {\"builtin\": intake.cat}\n        self._sources = {}  # source name: source instance\n\n        # layout\n        col0 = pn.Column(pn.pane.PNG(ICONS[\"logo\"], align=\"center\"), margin=(25, 0, 0, 0), width=50)\n        self.catsel = pn.widgets.MultiSelect(\n            name=\"Catalogs\",\n            options=list(self._cats),\n            value=[],\n            size=13,\n            styles={\"width\": \"25%\"},\n        )\n        self.catsel.param.watch(self.cat_selected, \"value\")\n        add = pn.widgets.Button(name=\"+\")\n        sub = pn.widgets.Button(name=\"-\")\n        search = pn.widgets.Button(name=\"🔍\")\n        col1 = pn.Column(self.catsel, pn.Row(add, sub, search))\n        add.on_click(self.add_clicked)\n        sub.on_click(self.sub_clicked)\n        search.on_click(self.search_clicked)\n\n        self.sourcesel = pn.widgets.MultiSelect(name=\"Sources\", size=13, styles={\"width\": \"25%\"})\n        plot = pn.widgets.Button(name=\"📊\")\n        plot.on_click(self.plot_clicked)\n        self.sourcesel.param.watch(self.source_selected, \"value\")\n        col2 = pn.Column(self.sourcesel, plot)\n\n        self.sourceinf = pn.widgets.CodeEditor(\n            readonly=True, language=\"yaml\", print_margin=False, annotations=[]\n        )\n        col3 = pn.Column(self.sourceinf)\n\n        row0 = pn.Row(col0, col1, col2, col3, styles={\"width\": \"100%\"})\n\n        self.plots = defined_plots.Plots()\n        self.plots.panel.visible = False\n        self.add = CatAdder(done_callback=self.add_catalog)\n        self.add.panel.visible = False\n        self.search = Search(done_callback=self.searched)\n        self.search.panel.visible = False\n        self.row1 = pn.Row(self.plots.panel, self.add.panel, self.search.panel)\n\n        self.main = pn.Column(row0, self.row1)\n        self.cat_selected(None)\n\n    def _repr_mimebundle_(self, *args, **kwargs):\n        \"\"\"Display in a notebook or a server\"\"\"\n        return self.main._repr_mimebundle_(*args, **kwargs)\n\n    def show(self, *args, **kwargs):\n        return self.main.show(*args, **kwargs)\n\n    def __repr__(self):\n        return \"Intake GUI\"\n\n    def cat_selected(self, *_):\n        right = \"└─>\"\n\n        cat = self.catsel.value\n        if not cat:\n            return\n        else:\n            catname = cat[0]\n            cat = self._cats[catname]\n        catsel_needs_update = False\n        self._sources.clear()\n        indent = len(catname) - len(catname.lstrip(\" \")) + 2\n        for entry in cat:\n            name = \" \" * indent + right + entry\n            source = cat[entry]\n            if isinstance(source, intake.catalog.Catalog):\n                if name not in self._cats:\n                    self._cats[name] = source\n                    self._children.setdefault(catname, []).append(name)\n                    catsel_needs_update = True\n            elif \"Catalog\" in getattr(source, \"output_instance\", \"\"):\n                if name not in self._cats:\n                    cat = source.read()\n                    self._cats[name] = cat\n                    self._children.setdefault(catname, []).append(name)\n                    catsel_needs_update = True\n            else:\n                self._sources[entry] = source\n        if catsel_needs_update:\n            self.update_catsel()\n        self.sourcesel.param.update(options=list(self._sources))\n\n    def update_catsel(self):\n        self.catsel.param.update(options=get_catlist(self._cats, self._children))\n\n    def add_catalog(self, cat, name=None, **_):\n        if hasattr(cat, \"token\"):\n            if \"CATALOG_PATH\" in cat.user_parameters:\n                par = cat.user_parameters[\"CATALOG_PATH\"]\n                name = getattr(par, \"default\", str(par))\n            else:\n                name = cat.token\n        else:\n            name = name or getattr(cat, \"token\", cat.name)\n        self._cats[name] = cat\n        self.update_catsel()\n\n    def source_selected(self, *_):\n        from intake import BaseReader\n        import yaml\n\n        source = self.sourcesel.value\n        if not source:\n            return\n        else:\n            source = self._sources[source[0]]\n        if isinstance(source, BaseReader):\n            # could have reverted to ReaderDescription, but this version will include any\n            # other readers/data, not just references.\n            d = {\"cls\": source.qname()}\n            d.update(source.to_dict())\n            txt = yaml.dump(d, default_flow_style=False)\n        else:\n            txt = yaml.dump(source._yaml()[\"sources\"], default_flow_style=False)\n        self.sourceinf.param.update(value=txt)\n\n    def plot_clicked(self, *_):\n        if self.plots.panel.visible:\n            self.plots.panel.visible = False\n        elif self.sources:\n            self.plots.source = self.sources[0]\n            self.add.panel.visible = False\n            self.plots.panel.visible = True\n            self.search.panel.visible = False\n\n    def searched(self, searchstring: str):\n        if self.cats:\n            cat = self.cats[0]\n            cat2 = cat.search(searchstring)\n            self.add_catalog(cat2, name=f\"search <{searchstring[:10]}>\")\n\n    def add_clicked(self, *_):\n        if self.add.panel.visible:\n            self.add.panel.visible = False\n        else:\n            self.add.panel.visible = True\n            self.plots.panel.visible = False\n            self.search.panel.visible = False\n\n    def sub_clicked(self, *_):\n        for catname in self.catsel.value:\n            self.remove_cat(catname)\n\n    def remove_cat(self, catname, done=True):\n        self._cats.pop(catname, None)  # remake \"builtin\" if accidentally removed?\n        for cat in self._children.get(catname, []):\n            self.remove_cat(cat, done=False)\n        if done:\n            self.catsel.param.update(options=list(self._cats))\n\n    def search_clicked(self, *_):\n        if self.search.panel.visible:\n            self.search.panel.visible = False\n        else:\n            self.add.panel.visible = False\n            self.plots.panel.visible = False\n            self.search.panel.visible = True\n\n    @property\n    def cats(self):\n        \"\"\"Cats that have been selected from the cat sub-panel\"\"\"\n        return [self._cats[k] for k in self.catsel.value]\n\n    @property\n    def sources(self):\n        \"\"\"Sources that have been selected from the source sub-panel\"\"\"\n        return [self._sources[k] for k in self.sourcesel.value]\n\n    @property\n    def source_instance(self):\n        return self.sources[0] if self.sourcesel.values else None\n\n\ndef get_catlist(catnames, children, outlist=None, seen=None):\n    outlist = outlist or []\n    seen = seen or set()\n    for name in sorted(catnames):\n        if name in seen:\n            continue\n        seen.add(name)\n        outlist.append(name)\n        if name in children:\n            get_catlist(children[name], children, outlist, seen)\n    return outlist\n"
  },
  {
    "path": "intake/interface/source/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2019, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n"
  },
  {
    "path": "intake/interface/source/defined_plots.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2022, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport hvplot\nfrom packaging.version import Version\n\ntry:\n    import xrviz\n    from xrviz.dashboard import Dashboard as XRViz\n\n    assert Version(xrviz.__version__) >= Version(\"0.1.1\")\nexcept ImportError:\n    xrviz = False\n\nimport panel as pn\n\nfrom ...catalog.local import LocalCatalogEntry, YAMLFileCatalog\nfrom ..base import BaseView\n\n\nclass Event:\n    def __init__(self, plot, **kwargs):\n        self.object = plot\n        for key, val in kwargs.items():\n            setattr(self, key, val)\n\n\nclass Plots(BaseView):\n    \"\"\"\n    Panel for displaying pre-defined plots from catalog.\n\n    Parameters\n    ----------\n    source: intake catalog entry, or list of same\n        source to describe in this object\n    edit_callback: callback to alert that plot has been edited\n\n    Attributes\n    ----------\n    has_plots: bool\n        whether the source has plots defined\n    options: list\n        plots options defined on the source\n    selected: str\n        name of selected plot\n    children: list of panel objects\n        children that will be used to populate the panel when visible\n    panel: panel layout object\n        instance of a panel layout (row or column) that contains children\n        when visible\n    watchers: list of param watchers\n        watchers that are set on children - cleaned up when visible\n        is set to false.\n    \"\"\"\n\n    select = None\n\n    def __init__(self, source=None, **kwargs):\n        self.custom = pn.widgets.Button(name=\"Create\", width=70, align=\"center\")\n        self.source = source\n        self.panel = pn.Column(name=\"Plot\", width_policy=\"max\", margin=0)\n        self._behavior_callback = None\n        self._callbacks = []\n        super().__init__(**kwargs)\n\n    def setup(self):\n        self.instructions = pn.pane.Markdown(\"**Select Plot**\", align=\"center\", width=70)\n        self.select = pn.widgets.Select(\n            options=self.options, height=30, align=\"center\", min_width=200\n        )\n        # Add spaces in front of each option to make it render correctly.\n        # This is a bit of a hack to make a Select which only displays the down arrow.\n        self.edit_options = pn.widgets.Select(\n            options=[\"  Edit\", \"  Clone\", \"  Rename\", \"  Delete\"],\n            align=\"center\",\n            width=25,\n            margin=(5, -10),\n        )\n        self.pane = pn.pane.HoloViews(self._plot_object(self.selected), name=\"Plot\")\n\n        self.interact_label = pn.pane.Markdown(\n            \"Name\", align=\"center\", width_policy=\"max\", min_width=100\n        )\n        self.interact_name = pn.widgets.TextInput(placeholder=\"Name of new plot...\")\n        self.interact_cancel = pn.widgets.Button(name=\"Cancel\", width=100)\n        self.interact_save = pn.widgets.Button(name=\"Save\", button_type=\"primary\", width=100)\n\n        self.watchers = [\n            self.select.param.watch(self.plot_selected, [\"value\"]),\n            self.custom.param.watch(self.interact, [\"clicks\"]),\n            self.edit_options.param.watch(self.interact, [\"value\"]),\n            self.interact_name.param.watch(self.name_changed, [\"value_input\"]),\n            self.interact_cancel.param.watch(self.cancel, [\"clicks\"]),\n            self.interact_save.param.watch(self.interact_action, [\"clicks\"]),\n        ]\n        self.alert = pn.pane.Alert(alert_type=\"danger\")\n        self.out = pn.Row(self.pane, name=\"Plot\")\n\n        self.row_select_plots = pn.Row(\n            self.instructions,\n            self.select,\n            self.custom,\n            self.edit_options,\n        )\n        self.row_dialog_buttons = pn.Row(\n            self.interact_label,\n            self.interact_name,\n            self.interact_cancel,\n            self.interact_save,\n        )\n        self.children = [\n            pn.Column(\n                self.row_select_plots,\n                self.row_dialog_buttons,\n                pn.Row(self.alert),\n                self.out,\n            )\n        ]\n        # Set initial visibility\n        self.row_select_plots[-1].visible = False  # edit_options dropdown\n        self.row_dialog_buttons.visible = False\n        self.alert.visible = False\n\n    @BaseView.source.setter\n    def source(self, source):\n        \"\"\"When the source gets updated, update the options in\n        the selector\"\"\"\n        if source and isinstance(source, list):\n            source = source[0]\n        if isinstance(source, LocalCatalogEntry):\n            source = source()\n        BaseView.source.fset(self, source)\n        if self.select:\n            self.select.options = self.options\n        if source and source.container == \"dataframe\":\n            self.custom.disabled = False\n        elif source and xrviz and source.container in [\"xarray\", \"ndarray\", \"numpy\"]:\n            self.custom.disabled = False\n        else:\n            self.custom.disabled = True\n\n    @property\n    def has_plots(self):\n        \"\"\"Whether the source has plots defined\"\"\"\n        return self.source is not None and len(self._source.plots) > 0\n\n    @property\n    def options(self):\n        \"\"\"Plots options defined on the source\"\"\"\n        return ([\"None\"] + list(self.source.metadata[\"plots\"])) if self.source is not None else []\n\n    @property\n    def selected(self):\n        \"\"\"Name of selected plot\"\"\"\n        return self.select.value if self.select is not None else None\n\n    @selected.setter\n    def selected(self, selected):\n        \"\"\"When plot is selected set, make sure widget stays upto date\"\"\"\n        self.select.value = selected\n\n    def watch(self, callback):\n        self._callbacks.append(callback)\n\n    def plot_selected(self, *events):\n        for event in events:\n            if event.name == \"value\":\n                self.pane.object = self._plot_object(event.new)\n            self.custom.name = \"Create\" if str(self.select.value) == \"None\" else \"Edit\"\n            self.edit_options.visible = self.custom.name == \"Edit\"\n\n    def name_changed(self, *events):\n        for event in events:\n            if event.name == \"value_input\":\n                self.alert.visible = False\n                # Empty name not allowed and name cannot already be in use\n                if len(event.new) == 0:\n                    self.interact_save.disabled = True\n                elif event.new in self.options:\n                    self.interact_save.disabled = True\n                    self.alert.object = f'Name \"{event.new}\" already exists'\n                    self.alert.visible = True\n                else:\n                    self.interact_save.disabled = False\n\n    def cancel(self, _):\n        self.pane.object = self._plot_object(self.selected)\n        self.out[0] = self.pane\n        self.row_select_plots.visible = True\n        self.row_dialog_buttons.visible = False\n        self._behavior_callback = None\n\n    def interact_action(self, _):\n        # Stop catalog from autoreloading\n        self.source.cat.ttl = None\n        if self._behavior_callback is not None:\n            # Perform action\n            event = self._behavior_callback()\n            # Save catalog (if YAMLFileCatalog)\n            if isinstance(self.source.cat, YAMLFileCatalog):\n                self.source.cat.add(self.source)\n            # Callbacks\n            for cb in self._callbacks:\n                cb(event)\n        # Reset custom behavior callback\n        self._behavior_callback = None\n        # End action\n        self.cancel(None)\n\n    def interact(self, _):\n        # Create/Edit/Clone/Rename/Delete was selected\n        if self._behavior_callback is not None:\n            # This is a short-circuit to allow edit options reset\n            # without causing an infinite loop\n            return\n        if self.selected == \"None\":\n            # Create\n            self._behavior_callback = self._create\n            self.interact_save.name = \"Save\"\n            self.interact_save.disabled = True\n            self.interact_label.object = \"Name \"\n            self.interact_name.value = \"\"\n            self.interact_name.visible = True\n            viz = self.draw()\n            self.out[0] = viz\n        elif self.edit_options.value == \"  Clone\":\n            self._behavior_callback = self._clone\n            self.interact_save.name = \"Clone\"\n            self.interact_save.disabled = True\n            self.interact_label.object = f'Clone \"**{self.selected}**\" as '\n            self.interact_name.value = \"\"\n            self.interact_name.visible = True\n        elif self.edit_options.value == \"  Rename\":\n            self._behavior_callback = self._rename\n            self.interact_save.name = \"Rename\"\n            self.interact_save.disabled = True\n            self.interact_label.object = f'Rename \"**{self.selected}**\" to '\n            self.interact_name.value = \"\"\n            self.interact_name.visible = True\n        elif self.edit_options.value == \"  Delete\":\n            self._behavior_callback = self._delete\n            self.interact_save.name = \"Delete\"\n            self.interact_save.disabled = False\n            self.interact_label.object = f'Really delete \"**{self.selected}**\" ?'\n            self.interact_name.visible = False\n        elif self.edit_options.value == \"  Edit\":\n            # Edit\n            self._behavior_callback = self._edit\n            self.interact_save.name = \"Save\"\n            self.interact_save.disabled = False\n            self.interact_label.object = f'Editing \"**{self.selected}**\"'\n            self.interact_name.visible = False\n            viz = self.draw()\n            self.out[0] = viz\n        else:\n            raise ValueError(self.edit_options.value)\n        # Update visibility of components\n        self.row_dialog_buttons.visible = True\n        self.row_select_plots.visible = False\n        # Reset edit options selection (won't trigger\n        # edit action because _behavior_callback is set)\n        self.edit_options.value = \"Edit\"\n\n    def draw(self):\n        if self.selected == \"None\":\n            kwargs = {\"y\": []}\n        else:\n            kwargs = self.source.metadata[\"plots\"][self.selected]\n        if self.source.container == \"dataframe\":\n            df = self.source.to_dask()\n            if df.npartitions == 1:\n                df = df.compute()\n            self._viz = viz = hvplot.explorer(df, **kwargs)\n        elif self.source.container in [\"xarray\", \"ndarray\", \"numpy\"]:\n            import xarray\n\n            try:\n                data = self.source.to_dask()\n            except NotImplementedError:\n                data = self.source.read()\n            if not isinstance(data, (xarray.DataArray, xarray.Dataset)):\n                data = xarray.DataArray(data)\n            self._viz = XRViz(data, **kwargs)\n            viz = self._viz.panel\n        else:\n            raise ValueError(f\"Unhandled container type {self.source.container}\")\n        return viz\n\n    def _plot_object(self, selected):\n        from hvplot import hvPlot\n\n        if selected and str(selected) != \"None\":\n            args = self.source.metadata[\"plots\"][str(selected)]\n            plot_method = hvPlot(self.source)(**args)\n            self.out[0] = self.pane\n            return plot_method\n\n    def _create(self):\n        plot_name = self.interact_name.value\n        # Add plot metadata to both DataSource and CatalogEntry\n        self.source.metadata.setdefault(\"plots\", {})[plot_name] = self._viz.settings()\n        self.source.entry._metadata.setdefault(\"plots\", {})[plot_name] = self._viz.settings()\n        # Add new plot name to self.options\n        self.select.options = self.options\n        # Select new graph\n        self.selected = plot_name\n        return Event(self, kind=\"create\", plot_name=plot_name)\n\n    def _edit(self):\n        # Update plot metadata for both DataSource and CatalogEntry\n        self.source.metadata[\"plots\"][self.selected] = self._viz.settings()\n        self.source.entry._metadata[\"plots\"][self.selected] = self._viz.settings()\n        return Event(self, kind=\"edit\", plot_name=self.selected)\n\n    def _clone(self):\n        plot_name = self.interact_name.value\n        # Clone plot metadata for both DataSource and CatalogEntry\n        self.source.metadata[\"plots\"][plot_name] = self.source.metadata[\"plots\"][\n            self.selected\n        ].copy()\n        self.source.entry._metadata[\"plots\"][plot_name] = self.source.entry._metadata[\"plots\"][\n            self.selected\n        ].copy()\n        # Add new plot name to self.options\n        self.select.options = self.options\n        # Select new graph\n        self.selected = plot_name\n        return Event(self, kind=\"clone\", plot_name=plot_name, cloned_from=self.selected)\n\n    def _rename(self):\n        plot_name = self.interact_name.value\n        # Rename plot metadata for both DataSource and CatalogEntry\n        self.source.metadata[\"plots\"][plot_name] = self.source.metadata[\"plots\"].pop(self.selected)\n        self.source.entry._metadata[\"plots\"][plot_name] = self.source.entry._metadata[\"plots\"].pop(\n            self.selected\n        )\n        # Update available options in dropdown\n        self.select.options = self.options\n        # Select renamed graph\n        self.selected = plot_name\n        return Event(self, kind=\"rename\", plot_name=plot_name, prev_name=self.selected)\n\n    def _delete(self):\n        # Delete plot metadata from both DataSource and CatalogEntry\n        del self.source.metadata[\"plots\"][self.selected]\n        del self.source.entry._metadata[\"plots\"][self.selected]\n        # Update available options in dropdown\n        self.select.options = self.options\n        self.selected = \"None\"\n        return Event(self, kind=\"delete\", plot_name=self.selected)\n\n    def __getstate__(self, include_source=True):\n        \"\"\"Serialize the current state of the object. Set include_source\n        to False when using with another panel that will include source.\"\"\"\n        state = super().__getstate__(include_source)\n        state.update(\n            {\n                \"selected\": self.selected,\n            }\n        )\n        return state\n\n    def __setstate__(self, state):\n        \"\"\"Set the current state of the object from the serialized version.\n        Works inplace. See ``__getstate__`` to get serialized version and\n        ``from_state`` to create a new object.\"\"\"\n        super().__setstate__(state)\n        if self.visible:\n            self.selected = state.get(\"selected\")\n        return self\n"
  },
  {
    "path": "intake/readers/__init__.py",
    "content": "from intake.readers.datatypes import *  # noqa: F403\nfrom intake.readers.readers import *  # noqa: F403\nfrom intake.readers.convert import BaseConverter, Pipeline, auto_pipeline, path\nfrom intake.readers.entry import DataDescription, ReaderDescription\nfrom intake.readers.user_parameters import BaseUserParameter, SimpleUserParameter\nimport intake.readers.user_parameters\nimport intake.readers.transform\nimport intake.readers.output\nimport intake.readers.catalogs\nimport intake.readers.examples\nfrom intake.readers.metadata import metadata_fields\n\n\ndef recommend(data, *args, reader=False, **kwargs):\n    \"\"\"Recommend datatypes or readers depending on what is passed\n\n    Calls intake.readers.datatypes.recommend or intake.readers.readers.recommend\n    Please see their respective docstrings; or, better, explicitly call the one\n    which you intend to use.\n\n    Parameters\n    ----------\n    reader: bool (False)\n        If the \"data\" is a URL or other base string that can be used\n        to recommend a datatype, setting this flag True will take the first\n        guess and return the readers that can handle it.\n    \"\"\"\n    if isinstance(data, intake.readers.datatypes.BaseData):\n        return intake.readers.readers.recommend(data, *args, **kwargs)\n    datat = intake.readers.datatypes.recommend(data, *args, **kwargs)\n    if reader is False:\n        return datat\n    return datat[0](data).possible_outputs\n"
  },
  {
    "path": "intake/readers/catalogs.py",
    "content": "\"\"\"Data readers which create Catalog objects\"\"\"\n\nfrom __future__ import annotations\n\nimport itertools\nimport json\n\nimport fsspec\n\nfrom intake.readers import datatypes\nfrom intake.readers.entry import Catalog, DataDescription, ReaderDescription\nfrom intake.readers.readers import BaseReader\nfrom intake.readers.utils import LazyDict\n\n\nclass TiledLazyEntries(LazyDict):\n    \"\"\"A dictionary-like, which only loads a key's value from Tiled on demand\"\"\"\n\n    def __init__(self, client):\n        self.client = client\n\n    def __getitem__(self, item: str) -> ReaderDescription:\n        from intake.readers.readers import TiledClient\n\n        client = self.client[item]\n        data = datatypes.TiledService(url=client.uri, metadata=client.item)\n        if type(client).__name__ == \"Node\":\n            reader = TiledCatalogReader(data=data)\n        else:\n            reader = TiledClient(\n                data,\n                output_instance=f\"{type(client).__module__}:{type(client).__name__}\",\n            )\n        return reader.to_entry()\n\n    def __len__(self):\n        return len(self.client)\n\n    def __iter__(self):\n        return iter(self.client)\n\n    def __repr__(self):\n        return f\"TiledEntries {sorted(set(self))}\"\n\n\nclass TiledCatalogReader(BaseReader):\n    \"\"\"Creates a catalog of Tiled datasets from a root URL\n\n    The generated catalog is lazy, only the list of entries is got eagerly, but they are\n    fetched only on demand.\n    \"\"\"\n\n    implements = {datatypes.TiledService}\n    output_instance = \"intake.readers.entry:Catalog\"\n    imports = {\"tiled\"}\n\n    def _read(self, data, **kwargs):\n        from tiled.client import from_uri\n\n        opts = data.options.copy()\n        opts.update(kwargs)\n        client = from_uri(data.url, **opts)\n        entries = TiledLazyEntries(client)\n        return Catalog(\n            entries=entries,\n            aliases={k: k for k in sorted(client)},\n            metadata=client.item,\n        )\n\n\nclass SQLAlchemyCatalog(BaseReader):\n    \"\"\"Uses SQLAlchemy to get the list of tables at some SQL URL\n\n    These tables are presented as data entries in a catalog, but could then be loaded by\n    any reader that implements SQLQuery.\n    \"\"\"\n\n    implements = {datatypes.Service}\n    imports = {\"sqlalchemy\"}\n    output_instance = \"intake.readers.entry:Catalog\"\n\n    def _read(self, data, views=True, schema=None, **kwargs):\n        import sqlalchemy\n\n        engine = sqlalchemy.create_engine(data.url)\n        meta = sqlalchemy.MetaData()\n        meta.reflect(bind=engine, views=views, schema=schema)\n        tables = list(meta.tables)\n        entries = {\n            name: DataDescription(\n                \"intake.readers.datatypes:SQLQuery\",\n                kwargs={\"conn\": data.url, \"query\": name},\n            )\n            for name in tables\n        }\n        return Catalog(data=entries)\n\n\nclass StacCatalogReader(BaseReader):\n    \"\"\"Create a Catalog from a STAC endpoint or file\n\n    https://stacspec.org/en\n    \"\"\"\n\n    # STAC organisation: Catalog->Item->Asset. Catalogs can reference Catalogs.\n    # also have ItemCollection (from searching a Catalog) and CombinedAsset (multi-layer data)\n    # Asset and CombinedAsset are datasets, the rest are Catalogs\n\n    implements = {datatypes.STACJSON, datatypes.Literal}\n    imports = {\"pystac\"}\n    output_instance = \"intake.readers.entry:Catalog\"\n\n    def _read(\n        self,\n        data,\n        cls: str = \"Catalog\",\n        signer: callable | None = None,\n        prefer: tuple[str] | str | None = (\"xarray\", \"numpy\"),\n        **kwargs,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        data:\n            JSON-like STAC response, or the actual URL to fetch this from\n        cls:\n            the class of STAC object this should be, Catalog, Item, Asset, ItemCollection, Feature\n        signer:\n            if given, apply this function to assets and derived items, to add signature/\n            auth information to HTTP calls\n        prefer:\n            if given, select readers matching this spec from those that might apply to\n            a given asset; e.g., \"xarray\" for those data that can be read into xarray.\n        \"\"\"\n        import pystac\n\n        cls = getattr(pystac, cls)\n        if isinstance(data, datatypes.JSONFile):\n            self._stac = cls.from_file(data.url)\n        else:\n            self._stac = cls.from_dict(data.data)\n        metadata = self._stac.to_dict()\n        metadata.pop(\"links\", None)\n        self.metadata.update(metadata)\n        cat = Catalog(metadata=self.metadata)\n        items = []\n\n        # the following can be slow and could be deferred to lazy entries, if we can get\n        # the names without details cheaply\n        if isinstance(self._stac, pystac.Catalog):\n            items = itertools.chain(self._stac.get_children(), self._stac.get_items())\n        elif isinstance(self._stac, pystac.ItemCollection):\n            items = self._stac.items\n        if hasattr(self._stac, \"assets\"):\n            for key, value in self._stac.assets.items():\n                if signer:\n                    signer(value)\n                try:\n                    reader = self._get_reader(\n                        value, signer=signer, prefer=prefer, metadata=self.metadata\n                    ).to_entry()\n                    cat[key] = reader\n                except (ValueError, TypeError, StopIteration):\n                    pass\n\n        for subcatalog in items:\n            subcls = type(subcatalog).__name__\n\n            cat[subcatalog.id] = ReaderDescription(\n                reader=self.qname(),\n                kwargs=dict(\n                    {\n                        \"cls\": subcls,\n                        \"data\": datatypes.Literal(subcatalog.to_dict()),\n                        \"signer\": signer,\n                        \"prefer\": prefer,\n                    },\n                    **kwargs,\n                ),\n                metadata=self.metadata,\n            )\n        return cat\n\n    @staticmethod\n    def _get_reader(asset, signer=None, prefer=None, metadata=None):\n        \"\"\"\n        Assign intake driver for data I/O\n\n        prefer: str | list[str]\n            passed to .to_reader to inform what class of reader would be preferable, if any\n        \"\"\"\n        url = asset.href\n        mime = asset.media_type\n\n        if mime in [\"\", \"null\"]:\n            mime = None\n\n        # if mimetype not registered try rasterio driver\n        storage_options = asset.extra_fields.get(\"table:storage_options\", {})\n        if \"credential\" in storage_options:\n            # MS-ABFS specific argument; look for MS identifier?\n            storage_options[\"sas_token\"] = storage_options[\"credential\"]\n        cls = datatypes.recommend(url, mime=mime, storage_options=storage_options, head=False)\n        meta = asset.to_dict()\n        if cls:\n            data = cls[0](url=url, metadata=meta, storage_options=storage_options)\n        else:\n            raise ValueError\n        if \"stac-items\" in meta.get(\"roles\", []) or [] and isinstance(data, datatypes.Parquet):\n            from intake.readers.readers import DaskGeoParquet\n\n            data.metadata[\"signer\"] = signer\n            data.metadata[\"prefer\"] = prefer\n            return DaskGeoParquet(data)\n        else:\n            rr = None\n        return data.to_reader(reader=rr, outtype=prefer, metadata=metadata)\n\n\nclass StackBands(BaseReader):\n    \"\"\"Reimplementation of \"StackBandsSource\" from intake-stac\n\n    Takes the subitems of a given collection and concatenates them using xarray\n    on the given dimension.\n    \"\"\"\n\n    implements = {datatypes.STACJSON, datatypes.Literal}\n    imports = {\"pystac\", \"xarray\"}\n    output_instance = \"xarray:Dataset\"\n\n    def _read(self, data, bands: list[str], concat_dim: str = \"band\", signer=None, **kw):\n        \"\"\"\n        Parameters\n        ----------\n        data:\n            STAC endpoint, file, or dict literal\n        bands:\n            band_id, name or common_name to select from contained items\n        concat_dim:\n            concat dimansion in the xarray sense\n\n        Returns\n        -------\n        Configured reader, so that you can persist it. Call .read() again to execute\n        the read and concat operation.\n        \"\"\"\n        # this should be a separate reader for STACJSON,\n        import pystac\n        from pystac.extensions.eo import EOExtension\n\n        cls = pystac.Item\n        if isinstance(data, datatypes.JSONFile):\n            self._stac = cls.from_file(data.url)\n        else:\n            self._stac = cls.from_dict(data.data)\n        if signer:\n            signer(self._stac)\n\n        band_info = [band.to_dict() for band in EOExtension.ext(self._stac).bands]\n        metadatas = {}\n        titles = []\n        hrefs = []\n        types = []\n        assets = self._stac.assets\n        for band in bands:\n            # band can be band id, name or common_name\n            if band in assets:\n                info = next(\n                    (\n                        b\n                        for b in band_info\n                        if band in [b.get(_) for _ in [\"common_name\", \"name\", \"id\"]]\n                    ),\n                    None,\n                )\n            else:\n                info = next(\n                    (b for b in band_info if band in [b.get(_) for _ in [\"common_name\", \"name\"]]),\n                    None,\n                )\n                if info is not None:\n                    band = info.get(\"id\", info.get(\"name\"))\n\n            if band not in assets or info is None:\n                valid_band_names = []\n                for b in band_info:\n                    valid_band_names.append(b.get(\"id\", b.get(\"name\")))\n                    valid_band_names.append(b.get(\"common_name\"))\n                raise ValueError(\n                    f\"{band} not found in list of eo:bands in collection.\"\n                    f\"Valid values: {sorted(list(set(valid_band_names)))}\"\n                )\n            asset = assets.get(band)\n            metadatas[band] = asset.to_dict()\n            titles.append(band)\n            types.append(asset.media_type)\n            hrefs.append(asset.href)\n\n        unique_types = set(types)\n        if len(unique_types) != 1:\n            raise ValueError(\n                f\"Stacking failed: bands must have same type, multiple found: {unique_types}\"\n            )\n        reader = StacCatalogReader._get_reader(asset, signer=signer)\n        reader.kwargs[\"dim\"] = concat_dim\n        reader.kwargs[\"data\"].url = hrefs\n        reader.kwargs.update(kw)\n        return reader.read()\n\n\nclass StacSearch(BaseReader):\n    \"\"\"\n    Get stac objects matching a search spec from a STAC endpoint\n    \"\"\"\n\n    implements = {datatypes.STACJSON}\n    imports = {\"pystac\"}\n    output_instance = \"intake.readers.entry:Catalog\"\n\n    def __init__(self, metadata=None, **kwargs):\n        super().__init__(metadata=metadata, **kwargs)\n\n    def _read(self, data, query=None, **kwargs):\n        \"\"\"\n        Parameters\n        ----------\n        query:\n            See https://pystac-client.readthedocs.io/en/latest/api.html#item-search\n            for a description of the available fields\n        kwargs:\n            passed to the resulting readers, e.g., for auth information\n        \"\"\"\n        import requests\n        from pystac import ItemCollection\n\n        req = requests.post(data.url + \"/search\", json=query)\n        out = req.json()\n        cat = Catalog(metadata=self.metadata)\n        items = ItemCollection.from_dict(out).items\n        for subcatalog in items:\n            subcls = type(subcatalog).__name__\n            cat[subcatalog.id] = ReaderDescription(\n                reader=StacCatalogReader.qname(),\n                kwargs=dict(\n                    **{\"cls\": subcls, \"data\": datatypes.Literal(subcatalog.to_dict())},\n                    **kwargs,\n                ),\n            )\n        return cat\n\n\nclass STACIndex(BaseReader):\n    \"\"\"Searches stacindex.org for known public STAC data sources\"\"\"\n\n    implements = {datatypes.Service}\n    output_instance = Catalog.qname()\n    imports = {\"pystac\"}\n\n    def _read(self, *args, **kwargs):\n        with fsspec.open(\"https://stacindex.org/api/catalogs\") as f:\n            data = json.load(f)\n        cat = Catalog()\n        for entry in data:\n            if entry[\"isPrivate\"]:\n                continue\n            if entry[\"isApi\"]:\n                cat[entry[\"slug\"]] = StacSearch(\n                    data=datatypes.STACJSON(entry[\"url\"]),\n                    metadata={\n                        \"title\": entry[\"title\"],\n                        \"description\": entry[\"summary\"],\n                        \"created\": entry[\"created\"],\n                        \"updated\": entry[\"updated\"],\n                    },\n                )\n            else:\n                cat[entry[\"slug\"]] = StacCatalogReader(\n                    data=datatypes.STACJSON(entry[\"url\"]),\n                    metadata={\n                        \"title\": entry[\"title\"],\n                        \"description\": entry[\"summary\"],\n                        \"created\": entry[\"created\"],\n                        \"updated\": entry[\"updated\"],\n                    },\n                )\n        return cat\n\n\nclass THREDDSCatalog(Catalog):\n    \"\"\"A catalog provided by a THREDDS service\n\n    This subclass exists, just so we can indicate the possibility of using the server's\n    search endpoint and xarray collection\n    \"\"\"\n\n\nclass THREDDSCatalogReader(BaseReader):\n    \"\"\"\n    Read from THREDDS endpoint\n\n    https://www.unidata.ucar.edu/software/tds/\n    \"\"\"\n\n    implements = {datatypes.THREDDSCatalog}\n    output_instance = THREDDSCatalog.qname()\n    imports = {\"siphon\", \"xarray\"}\n\n    def _read(self, data, make=\"both\", **kwargs):\n        \"\"\"\n        Parameters\n        ----------\n        data:\n            Service URL endpoint\n        make: \"both\" | \"dap\" | \"cdf\"\n            For each iterm, make an openDAP reader, netCDF reader, or both.\n            If both, the entries will have the same name, with _DAP and\n            _CDF appended.\n        \"\"\"\n        from siphon.catalog import TDSCatalog\n\n        from intake.readers.readers import XArrayDatasetReader\n\n        thr = TDSCatalog(data.url)\n        cat = THREDDSCatalog(metadata=thr.metadata)\n        for r in thr.catalog_refs.values():\n            cat[r.title] = THREDDSCatalogReader(datatypes.THREDDSCatalog(url=r.href))\n        for ds in thr.datasets.values():\n            if make == \"both\":\n                cat[ds.name + \"_DAP\"] = XArrayDatasetReader(\n                    datatypes.Service(ds.access_urls[\"OpenDAP\"]), engine=\"pydap\"\n                )\n                cat[ds.name + \"_CDF\"] = XArrayDatasetReader(\n                    datatypes.HDF5(ds.access_urls[\"HTTPServer\"]), engine=\"h5netcdf\"\n                )\n            elif make == \"dap\":\n                cat[ds.name] = XArrayDatasetReader(\n                    datatypes.Service(ds.access_urls[\"OpenDAP\"]), engine=\"pydap\"\n                )\n            elif make == \"cdf\":\n                cat[ds.name] = XArrayDatasetReader(\n                    datatypes.HDF5(ds.access_urls[\"HTTPServer\"]), engine=\"h5netcdf\"\n                )\n            else:\n                raise ValueError(\"parameter `make` must be one of both|dap|cdf, but got '%s'\", make)\n\n        return cat\n\n\nclass HuggingfaceHubCatalog(BaseReader):\n    \"\"\"\n    Datasets from HuggingfaceHub\n\n    To actually access datasets which are not public, you will need to supply\n    and appropriate token. By default, you will be able to read any public/example\n    data.\n\n    Examples\n    --------\n    >>> hf = intake.readers.catalogs.HuggingfaceHubCatalog().read()\n    >>> hf['acronym_identification'].read()\n    DatasetDict({\n        train: Dataset({\n            features: ['id', 'tokens', 'labels'],\n            num_rows: 14006\n        })\n        validation: Dataset({\n            features: ['id', 'tokens', 'labels'],\n            num_rows: 1717\n        })\n        test: Dataset({\n            features: ['id', 'tokens', 'labels'],\n            num_rows: 1750\n        })\n    })\n\n    \"\"\"\n\n    output_instance = \"intake.readers.entry:Catalog\"\n    imports = {\"datasets\"}\n    func = \"huggingface_hub:list_datasets\"\n\n    def _read(self, *args, with_community_datasets: bool = False, **kwargs):\n        \"\"\"\n        Parameters\n        ----------\n        with_community_datasets:\n            If False, only includes official public data, and retrieves fewer entries.\n        \"\"\"\n        from intake.readers.datatypes import HuggingfaceDataset\n        from intake.readers.readers import HuggingfaceReader\n        import huggingface_hub\n\n        datasets = huggingface_hub.list_datasets(full=False)\n        if not with_community_datasets:\n            datasets = [dataset for dataset in datasets if \"/\" not in dataset.id]\n        entries = [\n            HuggingfaceReader(data=HuggingfaceDataset(d.id, metadata=d.__dict__)) for d in datasets\n        ]\n        cat = Catalog(entries=entries)\n        cat.aliases = {d.id: e for d, e in zip(datasets, cat.entries)}\n        return cat\n\n\nclass SKLearnExamplesCatalog(BaseReader):\n    \"\"\"\n    Example datasets from sklearn.datasets\n\n    https://scikit-learn.org/stable/datasets/toy_dataset.html\n\n    Each entry has some specific parameters, please read the linked page. Note that the metadata\n    is only available in the final dataset after .read(), not before.\n\n    Examples\n    --------\n    >>> import intake.readers.catalogs\n    >>> cat = intake.readers.catalogs.SKLearnExamplesCatalog().read()\n    >>> list(cat)\n    ['breast_cancer',\n     'diabetes',\n     'digits',\n     ...]\n    >>> cat.olivetti_faces.read()\n    downloading Olivetti faces from https://ndownloader.figshare.com/files/5976027 to TMP\n    {'data': array([[0.30991736, 0.3677686 , 0.41735536, ..., 0.15289256, 0.16115703,\n             0.1570248 ],\n            [0.45454547, 0.47107437, 0.5123967 , ..., 0.15289256, 0.15289256,\n             0.15289256],\n            [0.3181818 , 0.40082645, 0.49173555, ..., 0.14049587, 0.14876033,\n            ...\n    \"\"\"\n\n    output_instance = \"intake.readers.entry:Catalog\"\n    imports = {\"sklearn\"}\n\n    def _read(self, **kw):\n        from intake.readers.readers import SKLearnExampleReader\n        import sklearn.datasets\n\n        names = [funcname[5:] for funcname in dir(sklearn.datasets) if funcname.startswith(\"load_\")]\n        names.extend(\n            [funcname[6:] for funcname in dir(sklearn.datasets) if funcname.startswith(\"fetch_\")]\n        )\n        entries = [SKLearnExampleReader(name=name) for name in names]\n        cat = Catalog(entries=entries)\n        cat.aliases = {name: e for name, e in zip(names, cat.entries)}\n        return cat\n\n\nclass TorchDatasetsCatalog(BaseReader):\n    \"\"\"\n    Standard example PyTorch datasets\n\n    Includes all the entries in packages torchvision, torchaudio, torchtext . The\n    types of these data when read are all torch.utils.data:Dataset, but the contents\n    are quite different.\n\n    Examples\n    --------\n    >>> cat = intake.readers.catalogs.TorchDatasetsCatalog(rootdir=\"here\").read()\n    >>> cat.RTE.read()\n    (ShardingFilterIterDataPipe,\n     ShardingFilterIterDataPipe,\n     ShardingFilterIterDataPipe)\n    \"\"\"\n\n    # TODO: the load function can be used for a wide variety of local files\n    output_instance = \"intake.readers.entry:Catalog\"\n    imports = {\"datasets\"}\n\n    def _read(self, rootdir: str, *args, **kwargs):\n        \"\"\"\n        Parameters\n        ----------\n        rootdir:\n            A local directory to store cached files\n\n        Some datasets require further kwargs, such as subset (e.g., \"train\") or other\n        selector.\n        \"\"\"\n        from intake.readers.readers import TorchDataset\n        import importlib\n\n        cat = Catalog()\n        for name in (\"vision\", \"audio\", \"text\"):\n            try:\n                mod = importlib.import_module(f\"torch{name}\")\n                for func in mod.datasets.__all__:\n                    f = getattr(mod.datasets, func)\n                    metadata = (\n                        {\"description\": f.__doc__.split(\"\\n\", 1)[0], \"text\": f.__doc__}\n                        if f.__doc__\n                        else {}\n                    )\n                    metadata[\"section\"] = name\n                    cat[func] = TorchDataset(\n                        modname=name, funcname=func, rootdir=rootdir, metadata=metadata\n                    )\n            except (ImportError, ModuleNotFoundError):\n                pass\n        return cat\n\n\nclass TensorFlowDatasetsCatalog(BaseReader):\n    \"\"\"\n    Datasets from the TensorFlow public registry\n\n    See https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets for\n    full decriptions. Data will be cached locally on load, and metadata is only fetched\n    at that time.\n\n    Examples\n    --------\n    >>> tf = intake.readers.catalogs.TensorFlowDatasetsCatalog().read(\n    >>> tf.xnli.read()\n    Downloading and preparin ...\n    Dataset xnli downloaded and prepared to <>>. Subsequent calls will reuse this data.\n    Out[13]:\n    ({Split('test'): <PrefetchDataset element_spec={'hypothesis': ...\n    \"\"\"\n\n    output_instance = \"intake.readers.entry:Catalog\"\n    imports = {\"tensorflow_datasets\"}\n\n    def _read(self, *args, **kwargs):\n        from intake.readers.readers import TFPublicDataset\n        from tensorflow_datasets.core import community\n\n        cat = Catalog()\n        for name in community.registry.registered._DATASET_REGISTRY:\n            cat[name] = TFPublicDataset(name=name)\n        return cat\n\n\nclass EarthdataReader(BaseReader):\n    \"\"\"Read particular earthdata dataset by ID and parameter bounds\n\n    Requires registration at https://urs.earthdata.nasa.gov/ and calling\n    earthaccess.login() before access. For some specific providers, you also\n    must allow access within the online portal before loading data.\n\n    Will attempt to read all data\n    with xarray as netCDF4/HDF5 files.\n    \"\"\"\n\n    output_instance = \"xarray:Dataset\"\n    imports = {\"earthdata\", \"xarray\"}\n    func = \"earthaccess:search_data\"\n    func_doc = \"earthaccess:open\"\n\n    def _read(self, concept, **kwargs):\n        import earthaccess\n        import xarray as xr\n\n        granules = self._func(concept_id=concept, **kwargs)\n        files = earthaccess.open(granules)\n        return xr.open_mfdataset(files, engine=\"h5netcdf\")\n\n\nclass EarthdataCatalogReader(BaseReader):\n    \"\"\"Finds the earthdata datasets that contain some data in the given query bounds\n\n    Each entry returns an ``EarthdataReader`` - see further documentation there. The\n    keys are the \"concept-id\" of each dataset, a unique string.\n\n    Examples\n    --------\n\n    >>> import intake.readers.catalogs\n    >>> cat = intake.readers.catalogs.EarthdataCatalogReader(temporal=(\"2002-01-01\", \"2002-01-02\")).read()\n    >>> reader = cat['C2723754864-GES_DISC']\n    >>> reader.read()\n    <xarray.Dataset>\n    Dimensions:                         (time: 2, lon: 3600, lat: 1800, nv: 2)\n    Coordinates:\n      * lon                             (lon) float32 -179.9 -179.9 ... 179.9 179.9\n      * lat                             (lat) float64 -89.95 -89.85 ... 89.85 89.95\n      * time                            (time) datetime64[ns] 2002-01-01 2002-01-02\n    Dimensions without coordinates: nv\n    Data variables:\n        precipitation                   (time, lon, lat) float32 dask.array<chunksize=(1, 3600, 900), meta=np.ndarray>\n        precipitation_cnt               (time, lon, lat) int8 dask.array<chunksize=(1, 3600, 900), meta=np.ndarray>\n    ...\n    \"\"\"\n\n    output_instance = \"intake.readers.entry:Catalog\"\n    imports = {\"earthdata\", \"xarray\"}\n    func = \"earthaccess:search_datasets\"\n\n    def _read(self, temporal=(\"1980-01-01\", \"2023-11-10\"), **kwargs):\n        cat = Catalog()\n        dss = self._func(temporal=temporal, cloud_hosted=True, **kwargs)\n        for ds in dss:\n            cat[ds[\"meta\"][\"concept-id\"]] = ReaderDescription(\n                reader=\"intake.readers.catalogs:EarthdataReader\",\n                metadata=dict(ds),\n                kwargs=dict(\n                    concept=ds[\"meta\"][\"concept-id\"], temporal=temporal, cloud_hosted=True, **kwargs\n                ),\n            )\n        return cat\n"
  },
  {
    "path": "intake/readers/convert.py",
    "content": "\"\"\"Convert between python representations of data\n\nBy convention, functions here do not change the data, just how it is held.\n\"\"\"\nfrom __future__ import annotations\n\nimport copy\nimport re\nfrom urllib.parse import urljoin\nfrom functools import lru_cache\nfrom itertools import chain\n\nfrom intake import import_name, conf\nfrom intake.readers.datatypes import OpenAIService\nfrom intake.readers import BaseData, BaseReader, readers, LlamaServerReader, OpenAIReader\nfrom intake.readers.utils import all_to_one, subclasses, safe_dict\n\n\nclass ImportsProperty:\n    \"\"\"Builds the .imports attribute from classes in the .instances class attribute\"\"\"\n\n    def __get__(self, obj, cls):\n        # this asserts that ALL in and out types should be importable\n        cls.imports = set(\n            _.split(\":\", 1)[0].split(\".\", 1)[0]\n            for _ in chain(cls.instances, cls.instances.values())\n            if _ is not SameType\n        )\n        return cls.imports\n\n\nclass BaseConverter(BaseReader):\n    \"\"\"Converts from one object type to another\n\n    Most often, subclasses call a single function on the data, but arbitrary complex transforms\n    are possible. This is designed to be one step in a Pipeline.\n\n    .run() will be called on the output object from the previous stage, subclasses will wither\n    override that, or just provide a func=.\n    \"\"\"\n\n    instances: dict[str, str] = {}  #: mapping from input types to output types\n\n    def run(self, x, *args, **kwargs):\n        \"\"\"Execute a conversion stage on the output object from another stage\n\n        Subclasses may override this\n        \"\"\"\n        func = import_name(self.func)\n        return func(x, *args, **kwargs)\n\n    def _read(self, *args, data=None, **kwargs):\n        \"\"\"Read the data\n\n        Subclasses may override this if they wish to interact with the upstream reader/pipeline.\n        \"\"\"\n        if data is None:\n            data = args[0]\n            args = args[1:]\n        if isinstance(data, BaseReader):\n            data = data.read()\n        return self.run(data, *args, **kwargs)\n\n\nclass GenericFunc(BaseConverter):\n    \"\"\"Call given arbitrary function\n\n    This could be a transform or anything; the caller should specify what the\n    output_instance will be, since the class doesn't know.\n    \"\"\"\n\n    def _read(self, *args, data=None, func=None, data_kwarg=None, **kwargs):\n        if data is not None and isinstance(data, BaseReader):\n            data = data.read()\n        if data is not None:\n            if data_kwarg is None:\n                return func(data, *args, **kwargs)\n            else:\n                kwargs[data_kwarg] = data\n                return func(*args, **kwargs)\n        return func(*args, **kwargs)\n\n\nclass SameType:\n    \"\"\"Used to indicate that the output of a transform is the same as the input, which is arbitrary\"\"\"\n\n\nclass DuckToPandas(BaseConverter):\n    instances = {\"duckdb:DuckDBPyRelation\": \"pandas:DataFrame\"}\n    func = \"duckdb:DuckDBPyConnection.df\"\n\n    def run(self, x, *args, with_arrow=True, **kwargs):\n        if with_arrow:\n            import pandas as pd\n\n            table = x.to_arrow_table()\n            data = {\n                col: pd.Series(pd.arrays.ArrowExtensionArray(val))\n                for col, val in zip(table.column_names, table.columns)\n            }\n            return pd.DataFrame(data)\n        else:\n            return x.df()\n\n\nclass PandasToDuck(BaseConverter):\n    \"\"\"Save content of pandas dataframe to Duck internal storage\n\n    Value of ``table`` can be used to point to attached database or\n    output file.\n    \"\"\"\n\n    instances = {\"pandas:DataFrame\": \"duckdb:DuckDBPyRelation\"}\n    func = \"duckdb:df\"\n\n    def run(\n        self,\n        x,\n        table: str,\n        conn: dict | str | None = None,\n        *args,\n        comment: str = None,\n        overwrite=True,\n        **kwargs,\n    ):\n        # TODO: more options like metadata\n        import duckdb\n        from intake.readers.datatypes import SQLQuery\n\n        duck = readers.DuckSQL._duck(None, conn=conn)\n        out = duckdb.df(x, connection=duck)\n        duck.register(view_name=\"temp_view\", python_object=out)\n        duck.sql(\n            f\"CREATE {'OR REPLACE' if overwrite else ''} \"\n            f\"TABLE '{table}' AS SELECT * FROM 'temp_view';\"\n        )\n        if comment is not None:\n            # https://duckdb.org/docs/stable/sql/data_types/\n            #   literal_types.html#escape-string-literals\n            comment = str(comment).replace(\"'\", \"''\")\n            duck.sql(f\"COMMENT ON TABLE '{table}' IS '{comment}';\")\n        out = readers.DuckSQL(SQLQuery(conn=conn, query=f\"SELECT * FROM '{table}'\"))\n        return out\n\n\nclass DaskDFToPandas(BaseConverter):\n    instances = {\n        \"dask.dataframe:DataFrame\": \"pandas:DataFrame\",\n        \"dask_geopandas.core:GeoDataFrame\": \"geopandas:GeoDataFrame\",\n        \"dask.array:Array\": \"numpy:ndarray\",\n    }\n    func = \"dask:compute\"\n\n    def run(self, x, *args, **kwargs):\n        return self._func(x)[0]\n\n\nclass PandasToGeopandas(BaseConverter):\n    instances = {\"pandas:DataFrame\": \"geopandas:GeoDataFrame\"}\n    func = \"geopandas:GeoDataFrame\"\n\n\nclass XarrayToPandas(BaseConverter):\n    instances = {\"xarray:Dataset\": \"pandas:DataFrame\"}\n    func = \"xarray:Dataset.to_dataframe\"\n\n\nclass PandasToXarray(BaseConverter):\n    instances = {\"pandas:DataFrame\": \"xarray:Dataset\"}\n    func = \"xarray:Dataset.from_dataframe\"\n\n\nclass ToHvPlot(BaseConverter):\n    instances = all_to_one(\n        {\n            \"pandas:DataFrame\",\n            \"dask.dataframe:DataFrame\",\n            \"xarray:Dataset\",\n            \"xarray:DataArray\",\n        },\n        \"holoviews.core.layout:Composable\",\n    )\n    func = \"hvplot:hvPlot\"\n\n    def run(self, data, explorer: bool = False, **kw):\n        \"\"\"For tabular data only, pass explorer=True to get an interactive GUI\"\"\"\n        import hvplot\n\n        if explorer:\n            # this is actually a hvplot.ui:hvPlotExplorer and only allows tabular data\n            return hvplot.explorer(data, **kw)\n        return hvplot.hvPlot(data, **kw)()\n\n\nclass RayToPandas(BaseConverter):\n    instances = {\"ray.data:Dataset\": \"pandas:DataFrame\"}\n    func = \"ray.data:Dataset.to_pandas\"\n\n\nclass PandasToRay(BaseConverter):\n    instances = {\"pandas:DataFrame\": \"ray.data:Dataset\"}\n    func = \"ray.data:from_pandas\"\n\n\nclass RayToDask(BaseConverter):\n    instances = {\"ray.data:Dataset\": \"dask.dataframe:DataFrame\"}\n    func = \"ray.data:Dataset.to_dask\"\n\n\nclass DaskToRay(BaseConverter):\n    instances = {\"dask.dataframe:DataFrame\": \"ray.data:Dataset\"}\n    func = \"ray.data:from_dask\"\n\n\nclass HuggingfaceToRay(BaseConverter):\n    instances = {\"datasets.arrow_dataset:Dataset\": \"ray.data:Dataset\"}\n    func = \"ray.data:from_huggingface\"\n\n\nclass TorchToRay(BaseConverter):\n    instances = {\"torch.utils.data:Dataset\": \"ray.data:Dataset\"}\n    func = \"ray.data:from_torch\"\n\n\nclass SparkDFToRay(BaseConverter):\n    instances = {\"pyspark.sql:DataFrame\": \"ray.data:Dataset\"}\n    func = \"ray.data:from_spark\"\n\n\nclass RayToSpark(BaseConverter):\n    instances = {\"ray.data:Dataset\": \"pyspark.sql:DataFrame\"}\n    func = \"ray.data:Dataset.to_spark\"\n\n\nclass TiledNodeToCatalog(BaseConverter):\n    instances = {\"tiled.client.node:Node\": \"intake.readers.entry:Catalog\"}\n\n    def run(self, x, **kw):\n        # eager creation of entries from a node\n        from intake.readers.datatypes import TiledDataset, TiledService\n        from intake.readers.entry import Catalog\n        from intake.readers.readers import TiledClient, TiledNode\n\n        cat = Catalog()\n        for k, client in x.items():\n            if type(client).__name__ == \"Node\":\n                data = TiledService(url=client.uri)\n                reader = TiledNode(data=data, metadata=client.item)\n                cat[k] = reader\n            else:\n                data = TiledDataset(url=client.uri)\n                reader = TiledClient(\n                    data,\n                    output_instance=f\"{type(client).__module__}:{type(client).__name__}\",\n                    metadata=client.item,\n                )\n                cat[k] = reader\n        return cat\n\n\nclass TiledSearch(BaseConverter):\n    \"\"\"See https://blueskyproject.io/tiled/tutorials/search.html\"\"\"\n\n    instances = {\"tiled.client.node:Node\": \"tiled.client.node:Node\"}\n\n    def run(self, x, *arg, **kw):\n        # TODO: expects instances of classes in tiled.queries, which must be pickled, but\n        #  could allow (name, args) or something else\n        return x.search(*arg, **kw)\n\n\nclass TileDBToNumpy(BaseConverter):\n    instances = {\"tiledb.libtiledb:Array\": \"numpy:ndarray\"}\n\n    def run(self, x, *args, **kwargs):\n        # allow attribute selection here for when it wasn't included at read time?\n        return x[:]\n\n\nclass TileDBToPandas(BaseConverter):\n    \"\"\"Implemented only if an attribute was not already chosen.\"\"\"\n\n    instances = {\"tiledb.libtiledb:Array\": \"pandas:DataFrame\"}\n    func = \"tiledb.libtiledb:Array.df\"\n\n    def run(self, x, *args, **kwargs):\n        return x.df[:]\n\n\nclass DaskArrayToTileDB(BaseConverter):\n    # this is like output, and could return a datatypes.TileDB instead\n    instances = {\"dask.array:Array\": \"tiledb.libtiledb:Array\"}\n    func = \"dask.array:to_tiledb\"\n\n    def run(self, x, uri, **kwargs):\n        return self._func(x, uri, return_stored=True, **kwargs)\n\n\nclass NumpyToTileDB(BaseConverter):\n    # this could be considered an output converter, giving a datatypes.TileDB\n    # instead of the array instance\n    # alternatively, a datatypes.TileDB could be the *input* to the function\n    instances = {\"numpy:ndarray\": \"tiledb.libtiledb:Array\"}\n    func = \"tiledb:from_numpy\"\n\n    def run(self, x, uri, **kwargs):\n        return self._func(uri, x, **kwargs)\n\n\nclass DeltaQueryToDask(BaseConverter):\n    instances = {\"deltalake:DeltaTable\": \"dask.dataframe:DataFrame\"}\n    func = \"deltalake:DeltaTable.file_uris\"\n\n    def _read(self, reader, query, *args, **kwargs):\n        import dask.dataframe as dd\n\n        file_uris = reader.read().file_uris(query)\n\n        return dd.read_parquet(file_uris, storage_options=reader.kwargs[\"data\"].storage_options)\n\n\nclass DeltaQueryToDaskGeopandas(BaseConverter):\n    instances = {\"deltalake:DeltaTable\": \"dask_geopandas:GeoDataFrame\"}\n    func = \"deltalake:DeltaTable.file_uris\"\n\n    def _read(self, reader, query, *args, **kwargs):\n        import dask_geopandas\n\n        file_uris = reader.read().file_uris(query)\n\n        return dask_geopandas.read_parquet(\n            file_uris, storage_options=reader.kwargs[\"data\"].storage_options\n        )\n\n\nclass GeoDataFrameToSTACCatalog(BaseConverter):\n    instances = {\"geopandas:GeoDataFrame\": \"intake.readers.entry:Catalog\"}\n    func = \"intake.readers.catalogs:StacCatalogReader\"\n\n    @classmethod\n    def _un_arr(cls, data):\n        # clean up dataframe\n        import numpy as np\n\n        if isinstance(data, dict):\n            data = {k: cls._un_arr(v) for k, v in data.items()}\n        elif isinstance(data, (list, np.ndarray)):\n            data = [cls._un_arr(_) for _ in data]\n        return data\n\n    def read(self, data, *args, **kwargs):\n        from intake.readers import Literal\n        from intake.readers.catalogs import StacCatalogReader\n        import stac_geoparquet\n\n        # clean up numpy arrays->list and any assets that are just None\n        data[\"assets\"] = data.assets.apply(\n            lambda x: {k: v for k, v in self._un_arr(x).items() if v}\n        )\n        stac = stac_geoparquet.stac_geoparquet.to_item_collection(data)\n        lit = Literal(stac.to_dict())\n        return StacCatalogReader(\n            lit,\n            signer=self.metadata.get(\"signer\"),\n            prefer=self.metadata.get(\"prefer\"),\n            cls=\"ItemCollection\",\n            metadata=self.metadata,\n        ).read()\n\n\nclass PandasToMetagraph(BaseConverter):\n    instances = {\"pd:DataFrame\": \"metagraph.wrappers.EdgeSet:PandasEdgeSet\"}\n    func = \"metagraph.wrappers.EdgeSet:PandasEdgeSet\"\n\n\nclass NibabelToNumpy(BaseConverter):\n    instances = {\"nibabel.spatialimages:SpatialImage\": \"numpy:ndimage\"}\n    func = \"nibabel.spatialimages:SpatialImage.get_fdata\"\n\n\nclass DicomToNumpy(BaseConverter):\n    instances = {\"pydicom.dataset:FileDataset\": \"numpy:ndarray\"}\n    func = \"pydicom.dataset:FileDataset.pixel_array\"\n\n    def run(self, x, *args, **kwargs):\n        return x.pixel_array\n\n\nclass FITSToNumpy(BaseConverter):\n    instances = {\"astropy.io.fits:HDUList\": \"numpy:ndarray\"}\n    func = \"astropy.io.fits:FitsHDU.data\"\n\n    def run(self, x, extension=None):\n        \"\"\"Get the array data of one FITS extension\n\n        If hdu is None, find first extension containing data.\n        \"\"\"\n        if extension is None:\n            found = False\n            for extension, hdu in enumerate(x):\n                if hdu.header.get(\"NAXIS\", 0) > 0:\n                    found = True\n                    break\n            if not found:\n                raise ValueError(\"No data extensions\")\n        return x[extension].data\n\n\nclass ASDFToNumpy(BaseConverter):\n    instances = {\"asdf:AsdfFile\": \"numpy:ndarray\"}\n    func = \"asdf:AsdfFile.\"\n\n    def run(self, x, tree_path: str | list[str], **kwargs):\n        if isinstance(tree_path, str):\n            tree_path = tree_path.split(\".\")\n        for p in tree_path:\n            x = x[p]\n        return x[:]\n\n\nclass PolarsLazy(BaseConverter):\n    instances = {\"polars:DataFrame\": \"polars:LazyFrame\"}\n    func = \"polars:DataFrame.lazy\"\n\n\nclass PolarsEager(BaseConverter):\n    instances = {\"polars:LazyFrame\": \"polars:DataFrame\"}\n    func = \"polars:LazyFrame.collect\"  # collect_async() ?\n\n\nclass PolarsToPandas(BaseConverter):\n    instances = {\"polars:DataFrame\": \"pandas:DataFrame\"}\n    func = \"polars:DataFrame.to_pandas\"\n\n    def run(self, x, *args, **kwargs):\n        return x.to_pandas(*args, **kwargs)\n\n\nclass PandasToPolars(BaseConverter):\n    instances = {\"pandas:DataFrame\": \"polars:DataFrame\"}\n    func = \"polars:from_pandas\"\n\n\nclass DataFrameToMetadata(BaseConverter):\n    instances = all_to_one(\n        [\"pandas:DataFrame\", \"dask.dataframe:DataFrame\", \"polars:DataFrame\"], \"builtins:dict\"\n    )\n\n    def run(self, x, *args, **kwargs):\n        out = {\"repr\": repr(x), \"shape\": x.shape}  # cf Repr, the output converter\n        t = str(type(x)).lower()\n        # TODO: perhaps can split this class into several\n        # TODO: implement spark, daft, modin, ibis ...\n        # Note that FileSizeReader can give file size on disk (if origin is files)\n        if \"pandas\" in t:\n            out[\"memory\"] = x.memory_usage(deep=True).sum()\n            out[\"schema\"] = x.dtypes if hasattr(x, \"dtypes\") else x.dtype\n            out[\"shape\"] = x.shape\n        elif \"polars\" in t:\n            out[\"memory\"] = x.estimated_size()\n            out[\"shape\"] = x.shape\n            out[\"schema\"] = x.schema\n        elif \"ray\" in t:\n            out[\"memory\"] = x.size_bytes()\n            out[\"shape\"] = [x.count(), len(x.columns)]\n            out[\"schema\"] = safe_dict(x.schema)\n        return safe_dict(out)\n\n\nclass GGUFToLlamaCPPService(BaseConverter):\n    instances = {\"intake.readers.datatypes:GGUF\": \"intake.readers.datatypes:LlamaCPPService\"}\n\n    def run(self, x, **kwargs):\n        return LlamaServerReader(x).read(**kwargs)\n\n\nclass LLamaCPPServiceToOpenAIService(BaseConverter):\n    instances = {\n        \"intake.readers.datatypes:LlamaCPPService\": \"intake.readers.datatypes:OpenAIService\"\n    }\n\n    def run(self, x, options=None):\n        url = urljoin(x.url, \"/v1\")\n        service = OpenAIService(url=url, key=\"none\", options=options)\n        return service\n\n\nclass OpenAIServiceToOpenAIClient(BaseConverter):\n    instances = {\"intake.readers.datatypes:OpenAIService\": \"openai:OpenAI\"}\n\n    def run(self, x):\n        return OpenAIReader(x).read()\n\n\ndef convert_class(data, out_type: str):\n    \"\"\"Get conversion class from given data to out_type\n\n    This works on concrete data, not a datatype or reader instance. It returns the\n    first match. out_type will match on regex, e.g., \"pandas\" would match \"pandas:DataFrame\"\n    \"\"\"\n    package = type(data).__module__.split(\".\", 1)[0]\n    for cls in subclasses(BaseConverter):\n        for intype, outtype in cls.instances.items():\n            if not re.findall(out_type, outtype):\n                continue\n            if intype.split(\".\", 1)[0] != package:\n                continue\n            thing = readers.import_name(intype)\n            if isinstance(data, thing):\n                return cls\n    raise ValueError(\"Converter not found\")\n\n\ndef convert_classes(in_type: str):\n    \"\"\"Get available conversion classes for input type\"\"\"\n    out_dict = []\n    package = in_type.split(\":\", 1)[0].split(\".\", 1)[0]\n    for cls in subclasses(BaseConverter):\n        for intype, outtype in cls.instances.items():\n            if \"*\" not in intype and intype.split(\":\", 1)[0].split(\".\", 1)[0] != package:\n                continue\n            if re.findall(intype.lower(), in_type.lower()) or re.findall(\n                in_type.lower(), intype.lower()\n            ):\n                if outtype == SameType:\n                    outtype = intype\n                out_dict.append((outtype, cls))\n    return out_dict\n\n\nclass Pipeline(readers.BaseReader):\n    \"\"\"Holds a list of transforms/conversions to be enacted in sequence\n\n    A transform on a pipeline makes a new pipeline with that transform added to the sequence\n    of operations.\n    \"\"\"\n\n    from intake.readers.readers import BaseReader\n\n    def __init__(\n        self,\n        steps: list[tuple[BaseReader, tuple, dict]],\n        out_instances: list[str],\n        output_instance=None,\n        metadata=None,\n        **kwargs,\n    ):\n        self.output_instances = []\n        prev = out_instances[0]\n        for inst in out_instances:\n            if inst is SameType:\n                inst = prev\n            prev = inst\n            self.output_instances.append(inst)\n        super().__init__(\n            output_instance=output_instance or self.output_instances[-1],\n            metadata=metadata,\n            steps=steps,\n            out_instances=self.output_instances,\n        )\n        steps[-1][2].update(kwargs)\n\n    @property\n    def steps(self):\n        return self.kwargs[\"steps\"]\n\n    def __call__(self, *args, **kwargs):\n        return super().__call__(\n            *args,\n            steps=self.steps,\n            out_instances=self.output_instances,\n            metadata=self.metadata,\n            **kwargs,\n        )\n\n    def __repr__(self):\n        start = \"PipelineReader: \\n\"\n        bits = [\n            f\"  {i}: {f.qname() if isinstance(f, BaseReader) else f.__name__}, {args} {kw} => {out}\"\n            for i, ((f, args, kw), out) in enumerate(zip(self.steps, self.output_instances))\n        ]\n        return \"\\n\".join([start] + bits)\n\n    def output_doc(self):\n        from intake import import_name\n\n        out = import_name(self.output_instance)\n        return out.__doc__\n\n    def doc(self):\n        return self.doc_n(-1)\n\n    def doc_n(self, n):\n        \"\"\"Documentation for the Nth step\"\"\"\n        return self.steps[n][0].doc()\n\n    def _read_stage_n(self, stage, discover=False, **kwargs):\n        from intake.readers.readers import BaseReader\n\n        func, arg, kw = self.steps[stage]\n\n        kw2 = kw.copy()\n        kw2.update(kwargs)\n        for k, v in kw.items():\n            if isinstance(v, BaseReader):\n                kw2[k] = v.read()\n            else:\n                kw2[k] = v\n        arg = kw2.pop(\"args\", arg)\n        # TODO: these conditions can probably be combined\n        if isinstance(func, type) and issubclass(func, BaseReader):\n            if discover:\n                return func(metadata=self.metadata).discover(*arg, **kw2)\n            else:\n                return func(metadata=self.metadata).read(*arg, **kw2)\n        elif isinstance(func, BaseReader):\n            if discover:\n                return func.discover(*arg, **kw2)\n            else:\n                return func.read(*arg, **kw2)\n        else:\n            return func(*arg, **kw2)\n\n    def _read(self, discover=False, **kwargs):\n        data = None\n        for i, step in enumerate(self.steps):\n            kw = kwargs if i == len(self.steps) else {}\n            if i:\n                data = self._read_stage_n(i, data=data, **kw)\n            else:\n                data = self._read_stage_n(i, discover=discover, **kw)\n        return data\n\n    def apply(self, func, *arg, output_instance=None, **kwargs):\n        \"\"\"Add a pipeline stage applying function to the pipeline output so far\"\"\"\n        from intake.readers.convert import GenericFunc\n\n        kwargs[\"func\"] = func\n        return self.with_step((GenericFunc, arg, kwargs), output_instance or self.output_instance)\n\n    def first_n_stages(self, n: int):\n        \"\"\"Truncate pipeline to the given stage\n\n        If n is equal to the number of steps, this is a simple copy.\n        \"\"\"\n        # TODO: allow n=0 to get the basic reader?\n        if n < 1 or n > len(self.steps):\n            raise ValueError(f\"n must be between {1} and {len(self.steps)}\")\n\n        kw = self.kwargs.copy()\n        kw.update(\n            dict(\n                steps=self.steps[:n],\n                out_instances=self.output_instances[:n],\n                metadata=self.metadata,\n            )\n        )\n        pipe = Pipeline(\n            **kw,\n        )\n        if n < len(self.steps):\n            pipe._tok = (self.token, n)\n        return pipe\n\n    def discover(self, **kwargs):\n        return self.read(discover=True)\n\n    def with_step(self, step, out_instance):\n        \"\"\"A new pipeline like this one but with one more step\"\"\"\n        if not isinstance(step, tuple):\n            # must be a func - check?\n            step = (step, (), {})\n        return Pipeline(\n            steps=self.steps + [step],\n            out_instances=self.output_instances + [out_instance],\n            metadata=self.metadata,\n        )\n\n    def read_stepwise(self, breakpoint=0):\n        \"\"\"Read with a wrapper class to allow executing one step at a time\n\n        Parameters\n        ----------\n        breakpoint: int\n            At which stage of the pipeline to enter stepwise mode\n        \"\"\"\n        return PipelineExecution(self, breakpoint=breakpoint)\n\n\nclass PipelineExecution:\n\n    \"\"\"Encapsulates a Pipeline, so you can step through it stepwise\n\n    Interesting attributes to examine:\n    - .data, the result of the most recent step (initially None)\n    - .next, (i, step) the next step to perform. This is a copy, so\n      you can edit the kwargs in-place without changing the original\n    \"\"\"\n\n    def __init__(self, pipeline, breakpoint=0):\n        self.pipeline = pipeline\n        self.data = None\n        self.steps = iter(enumerate(pipeline.steps))\n        self.next = copy.copy(next(self.steps))\n        for _ in range(breakpoint):\n            self.step()\n\n    def __repr__(self):\n        return f\"Executing stage {self.next[0]} of pipeline\\n{self.pipeline}\"\n\n    def cont(self):\n        \"\"\"Continue pipeline to the end without stopping again\"\"\"\n        while True:\n            out = self.step()\n            if out is not self:\n                return out\n\n    def step(self, **kw):\n        \"\"\"Run one step of the pipeline\n\n        If it is the last step, will return the result; otherwise\n        will return self.\n        \"\"\"\n        i, step = self.next\n        if i:\n            self.data = self.pipeline._read_stage_n(i, data=self.data, **kw)\n        else:\n            self.data = self.pipeline._read_stage_n(i, **kw)\n        try:\n            self.next = next(self.steps)\n            return self\n        except StopIteration:\n            return self.data\n\n\ndef conversions_graph(avoid=None, allow_wildcard=True):\n    avoid = avoid or conf[\"reader_avoid\"]\n    if isinstance(avoid, str):\n        avoid = [avoid]\n    import networkx\n\n    graph = networkx.DiGraph()\n\n    # transformers\n    nodes = set(\n        cls.output_instance\n        for cls in subclasses(readers.BaseReader)\n        if cls.output_instance\n        and not any(re.findall(_.lower(), cls.qname().lower()) for _ in avoid)\n    )\n    graph.add_nodes_from(nodes)\n\n    for cls in subclasses(readers.BaseReader):\n        if any(re.findall(_.lower(), cls.qname().lower()) for _ in avoid):\n            continue\n        if cls.output_instance:\n            for impl in cls.implements:\n                graph.add_node(cls.output_instance)\n                graph.add_edge(impl.qname(), cls.output_instance, label=cls.qname())\n    for cls in subclasses(BaseConverter):\n        if any(re.findall(_.lower(), cls.qname().lower()) for _ in avoid):\n            continue\n        for inttype, outtype in cls.instances.items():\n            if (\n                isinstance(outtype, str)\n                and inttype != outtype\n                and (allow_wildcard or \"*\" not in inttype)\n            ):\n                graph.add_nodes_from((inttype, outtype))\n                graph.add_edge(inttype, outtype, label=cls.qname())\n\n    return graph\n\n\ndef plot_conversion_graph(filename) -> None:\n    # TODO: return a PNG datatype or something else?\n    import networkx as nx\n\n    g = conversions_graph(allow_wildcard=False)\n    a = nx.nx_agraph.to_agraph(g)  # requires pygraphviz\n    a.draw(filename, prog=\"fdp\")\n\n\n@lru_cache()  # clear cache if you import more things\ndef path(\n    start: str, end: str | tuple[str], cutoff: int = 5, avoid: tuple[str] | None = None\n) -> list[list]:\n    \"\"\"Find possible conversion paths from start to end types\n\n    Parameters\n    ----------\n    start: data or reader qualified name to start with\n    end: desired output type name; any match on any of the strings given\n    cutoff: the maximum numer of steps to consider per path\n    avoid: ignore all readers/converters with a name matching this\n\n    Returns\n    -------\n    A list of paths, where each item is a list of steps (starttype, endtype) for which\n    there is a conversion class.\n    \"\"\"\n    import networkx as nx\n\n    g = conversions_graph(avoid=avoid)\n    alltypes = list(g)\n    matchtypes = [_ for _ in alltypes if re.findall(start, _)]\n    if not matchtypes:\n        raise ValueError(\"type found no match: %s\", start)\n    start = matchtypes[0]\n    if isinstance(end, str):\n        end = (end,)\n    matchtypes = [_ for _ in alltypes if any(re.findall(e, _) for e in end)]\n    if not matchtypes:\n        raise ValueError(\"outtype found no match: %s\", end)\n    end = matchtypes[0]\n    return sorted(nx.all_simple_edge_paths(g, start, end, cutoff=cutoff), key=len)\n\n\ndef auto_pipeline(\n    url: str | BaseData,\n    outtype: str | tuple[str] = \"\",\n    storage_options: dict | None = None,\n    avoid: list[str] | None = None,\n) -> Pipeline:\n    \"\"\"Create pipeline from given URL to desired output type\n\n    Will search for the shortest conversion path from the inferred data-type to the\n    output.\n\n    Parameters\n    ----------\n    url: input data, usually a location/URL, but maybe a data instance\n    outtype: pattern to match to possible output types\n    storage_options: if url is a remote str, these are kwargs that fsspec may need to\n        access it\n    avoid: don't consider readers whose names match any of these strings\n    \"\"\"\n    from intake.readers.datatypes import recommend\n\n    if isinstance(url, str):\n        if storage_options:\n            data = recommend(url, storage_options=storage_options)[0](\n                url=url, storage_options=storage_options\n            )\n        else:\n            data = recommend(url)[0](url=url)\n    else:\n        data = url\n    if isinstance(data, BaseData):\n        start = data.qname()\n        steps = path(start, outtype, avoid=avoid)\n        reader = data.to_reader(outtype=steps[0][0][1] if steps else outtype)\n        if steps:\n            for s in steps[0][1:]:\n                reader = reader.transform[s[1]]\n    elif isinstance(data, BaseReader):\n        reader = data\n        steps = path(data.output_instance, outtype, avoid=avoid)\n        for s in steps[0]:\n            reader = reader.transform[s[1]]\n\n    return reader\n"
  },
  {
    "path": "intake/readers/datatypes.py",
    "content": "\"\"\"Enumerates all the sorts of data that Intake knows about\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom itertools import chain\nfrom functools import lru_cache as cache\nfrom typing import Any, Optional\n\nimport fsspec\n\nfrom intake import import_name\nfrom intake.readers.utils import Tokenizable, subclasses\n\n# TODO: make \"structure\" possibilities an enum?\n\n\n# https://en.wikipedia.org/wiki/List_of_file_signatures\n\n\nclass BaseData(Tokenizable):\n    \"\"\"Prototype dataset definition\"\"\"\n\n    mimetypes: str = \"\"  #: regex, MIME pattern to match\n    filepattern: str = \"\"  #: regex, file URLs to match; empty if relying on magic or contains\n    structure: set[str] = set()  #: informational tags for nature of data, e.g., \"array\"\n    magic: set[\n        bytes | tuple\n    ] = set()  #: binary patterns, usually at the file head; each item identifies this data type\n    contains: set[\n        str\n    ] = set()  #: if using a directory URL, an ls() on that path will contain these things\n\n    def __init__(self, metadata: dict[str, Any] | None = None):\n        self.metadata: dict[str, Any] = metadata or {}  # arbitrary information\n\n    @classmethod\n    @cache\n    def _filepattern(cls):\n        return re.compile(cls.filepattern)\n\n    @classmethod\n    @cache\n    def _mimetypes(cls):\n        return re.compile(cls.mimetypes)\n\n    @property\n    def possible_readers(self):\n        \"\"\"List of reader classes for this type, grouped by importability\"\"\"\n        from intake.readers.readers import recommend\n\n        return recommend(self)\n\n    @property\n    def possible_outputs(self):\n        \"\"\"Map of importable readers to the expected output class of each\"\"\"\n        readers = self.possible_readers[\"importable\"]\n        return {r: r.output_instance for r in readers}\n\n    def to_reader_cls(\n        self, outtype: tuple[str] | str | None = None, reader: tuple[str] | str | type | None = None\n    ):\n        if outtype and reader:\n            raise ValueError\n        if isinstance(reader, str):\n            # exact match (no lowering)\n            try:\n                return import_name(reader)\n            except (ImportError, ModuleNotFoundError):\n                reader = (reader,)\n        if isinstance(reader, tuple):\n            for cls, out in self.possible_outputs.items():\n                # there shouldn't be many of these\n                if any(re.findall(r.lower(), cls.qname().lower()) for r in reader):\n                    return cls\n        if isinstance(reader, type):\n            return reader\n        elif outtype:\n            if isinstance(outtype, str):\n                outtype = (outtype,)\n            for reader, out in self.possible_outputs.items():\n                # there shouldn't be many of these\n                if out is not None and any(\n                    out == _ or re.findall(_.lower(), out.lower()) for _ in outtype\n                ):\n                    return reader\n        return next(iter(self.possible_readers[\"importable\"]))\n\n    def to_reader(self, outtype: str | None = None, reader: str | None = None, **kw):\n        \"\"\"Find an appropriate reader for this data\n\n        If neither ``outtype`` or ``reader`` is passed, the first importable reader\n        will be picked.\n\n        See also ``.possible_outputs``\n\n        Parameters\n        ----------\n        outtype: string to match against the output classes of potential readers\n        reader: string to match against the class names of the readers\n        \"\"\"\n        return self.to_reader_cls(outtype, reader)(data=self, **kw)\n\n    def to_entry(self):\n        \"\"\"Create DataDescription version of this, for placing in a Catalog\"\"\"\n        from intake.readers.entry import DataDescription\n\n        kw = {k: v for k, v in self.__dict__.items() if not k.startswith(\"_\")}\n        kw.pop(\"metadata\")  # this is always passed separately\n        return DataDescription(datatype=self.qname(), kwargs=kw, metadata=self.metadata)\n\n    def __repr__(self):\n        d = {k: v for k, v in self.__dict__.items() if not k.startswith(\"_\")}\n        return f\"{type(self).__name__}, {d}\"\n\n    def auto_pipeline(self, outtype: str | tuple[str]):\n        \"\"\"Find a pipeline to transform from this to the given output type\"\"\"\n        from intake.readers.convert import auto_pipeline\n\n        return auto_pipeline(self, outtype)\n\n\nclass FileData(BaseData):\n    \"\"\"Datatypes loaded from files, local or remote\"\"\"\n\n    def __init__(self, url, storage_options: dict | None = None, metadata: dict | None = None):\n        self.url = url  #: location of the file(s), should be str or list[str]\n        self.storage_options = storage_options  #: kwargs for a backend storage system\n        super().__init__(metadata)\n\n\nclass Service(BaseData):\n    \"\"\"Datatypes loaded from some service\"\"\"\n\n    def __init__(self, url, options=None, metadata=None):\n        self.url = url\n        self.options = options or {}\n        super().__init__(metadata=metadata)\n\n\nclass Catalog(BaseData):\n    \"\"\"Datatypes that are groupings of other data\"\"\"\n\n    structure = {\"catalog\"}\n\n\nclass PMTiles(FileData):\n    \"\"\"single-file archive format for tiled image data\"\"\"\n\n    filepattern = \"pmtiles\"\n    magic = {b\"PMTiles\"}\n    structure = {\"image\"}\n\n\nclass DuckDB(FileData):\n    \"\"\"Columnar table DB format used exclusively by duckdb\"\"\"\n\n    filepattern = \"(duck?)db\"\n    magic = {(8, b\"DUCK\")}\n    structure = {\"table\"}\n\n\nclass Parquet(FileData):\n    \"\"\"Column-optimized binary format\"\"\"\n\n    filepattern = \"(parq|parquet)\"\n    mimetypes = \"application/(vnd.apache.parquet|parquet|x-parquet)/\"\n    structure = {\"table\", \"nested\"}\n    magic = {b\"PAR1\"}\n    contains = {\"_metadata\", \"parq\", \"parquet\"}  # a directory can be a dataset\n\n\nclass CSV(FileData):\n    \"\"\"Human-readable tabular format, Comma Separated Values\"\"\"\n\n    filepattern = \"(csv$|txt$|tsv$)\"\n    mimetypes = \"(text/csv|application/csv|application/vnd.ms-excel)\"\n    structure = {\"table\"}\n\n\nclass CSVPattern(CSV):\n    \"\"\"Specialised version of CSV, with a path containing capturing fields\n\n    Characteristically contains python-style format groups with {..}\n    \"\"\"\n\n    filepattern = \".*[{].*[}].*(csv$|txt$|tsv$)\"\n\n\nclass Text(FileData):\n    \"\"\"Any text file\"\"\"\n\n    filepattern = \"(txt$|text$|dat$|ascii$)\"\n    mimetypes = \"text/.*\"\n    structure = {\"sequence\"}\n    # some optional byte order marks\n    # https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding\n    magic = {b\"\\xEF\\xBB\\xBF\", b\"\\xFE\\xFF\", b\"\\xFF\\xFE\", b\"\\x00\\x00\\xFE\\xFF\"}\n\n\nclass XML(FileData):\n    \"\"\"Extensible Markup Language file\"\"\"\n\n    filepattern = \"xml[sx]?$\"\n    mimetypes = \"(application|text)/xml\"\n    structure = {\"nested\"}\n    magic = {b\"<?xml \"}\n\n\nclass THREDDSCatalog(XML):\n    \"\"\"Datasets on a THREDDS server\n\n    Typically used for \"environmental data sources\".\n    See https://www.unidata.ucar.edu/software/tds/\n    \"\"\"\n\n    magic = {(None, b\"<xml.*<catalog \")}\n    structure = {\"catalog\"}\n\n\nclass PNG(FileData):\n    \"\"\"Portable Network Graphics, common image format\"\"\"\n\n    filepattern = \"png$\"\n    structure = {\"array\", \"image\"}\n    mimetypes = \"image/png\"\n    magic = {b\"\\x89PNG\"}\n\n\nclass JPEG(FileData):\n    \"\"\"Image format with good compression for the internet\"\"\"\n\n    filepattern = \"jpe?g$\"\n    structure = {\"array\", \"image\"}\n    mimetypes = \"image/jpeg\"\n    magic = {b\"\\xFF\\xD8\\xFF\"}\n\n\nclass WAV(FileData):\n    \"\"\"Waveform/sound file\"\"\"\n\n    filepattern = \"wav$\"\n    structure = {\"array\", \"timeseries\"}\n    mimetypes = \"audio/wav\"\n    magic = {(8, b\"WAVE\")}\n\n\nclass NetCDF3(FileData):\n    \"\"\"Collection of ND-arrays with coordinates, scientific file format\"\"\"\n\n    filepattern = \"(netcdf3$|nc3?$)\"\n    structure = {\"array\"}\n    mimetypes = \"application/x-netcdf\"\n    magic = {b\"CDF\"}\n\n\nclass HDF5(FileData):\n    \"\"\"Hierarchical tree of ND-arrays, widely used scientific file format\"\"\"\n\n    filepattern = \"(hdf5?|h4|nc4?)$\"  # many others by convention\n    structure = {\"array\", \"table\", \"hierarchy\"}\n    magic = {b\"\\x89HDF\"}\n    mimetypes = \"application/x-hdf5?\"\n\n    def __init__(\n        self,\n        url,\n        storage_options: dict | None = None,\n        path: str = \"\",\n        metadata: dict | None = None,\n    ):\n        \"\"\"\n        path: if given, points to specific array/group in the hierarchy; with \"group.subgroup\"\n            format\n        \"\"\"\n        self.url = url\n        self.storage_options = storage_options\n        self.path = path\n        super().__init__(url=url, storage_options=storage_options, metadata=metadata)\n\n\nclass Zarr(FileData):\n    \"\"\"Cloud optimised, chunked N-dimensional file format\"\"\"\n\n    filepattern = \"(zarr$)\"  # i.e., any directory might be\n    structure = {\"array\", \"hierarchy\"}\n    mimetypes = \"application/vnd[+.]zarr\"\n    contains = {\".zarray\", \".zgroup\", \"zarr.json\"}\n\n    def __init__(\n        self,\n        url,\n        storage_options: dict | None = None,\n        root: str = \"\",\n        metadata: dict | None = None,\n    ):\n        \"\"\"\n        root: if given, points to specific array/group in the hierarchy\n        \"\"\"\n        self.url = url\n        self.storage_options = storage_options\n        self.root = root\n        super().__init__(url=url, storage_options=storage_options, metadata=metadata)\n\n\nclass IcechunkRepo(FileData):\n    # NB: this can be considered a special case of zarr stores, but there are\n    # more possible operations on a repo; you can also access these using zarr\n    # alone and specialised URLs.\n\n    structure = {\"array\", \"hierarchy\"}\n    contains = {\"snapshots\"}\n\n    def __init__(\n        self,\n        url,  # just the storage driver name, i.e., icechunk.*_storage\n        # https://icechunk.io/en/latest/quickstart/#create-a-new-icechunk-repository\n        # azure, gcs, in_memory, local_filesystem, r2, s3, tigris\n        storage_options: dict | None = None,  # kwargs for the store\n        root: str = \"\",  # location in the hierarchy\n        ref: str | None = None,  # branch/tag, if not the default\n        metadata: dict | None = None,\n    ):\n        \"\"\"\n        root: if given, points to specific array/group in the hierarchy\n        \"\"\"\n        self.url = url\n        self.root = root\n        self.ref = ref\n        self.storage_options = storage_options\n        super().__init__(url=url, storage_options=storage_options, metadata=metadata)\n\n\nclass MatlabArray(FileData):\n    \"\"\"A single array in a .mat file\"\"\"\n\n    filepattern = \"mat$\"\n    magic = {b\"MATLAB\"}\n\n    def __init__(self, path, variable=None):\n        \"\"\"If variable is None, takes first non-underscored variable found\"\"\"\n        self.path = path\n        self.variable = variable\n\n\nclass MatrixMarket(FileData):\n    \"\"\"Text format for sparse array\"\"\"\n\n    magic = {b\"%%MatrixMarket\"}\n\n\nclass Excel(FileData):\n    \"\"\"The well-known spreadsheet app's file format\"\"\"\n\n    filepattern = \"xls[xmb]?\"\n    structure = {\"tabular\"}\n    mimetypes = \"application/.*(excel|xls)\"\n    magic = {b\"\\x50\\x4B\\x03\\x04\", b\"\\xD0\\xCF\\x11\\xE0\\xA1\\xB1\\x1A\\xE1\"}  # will match any office doc\n\n\nclass TIFF(FileData):\n    \"\"\"Image format commonly used for large data\"\"\"\n\n    # includes geoTIFF/COG, or split out?\n    filepattern = \"(tiff?$|cog$)\"\n    structure = {\"array\", \"image\"}\n    magic = {b\"II*\\x00\", b\"MM\\x00*\"}\n    mimetypes = \"image/(geo)?tiff\"\n\n\nclass GRIB2(FileData):\n    \"\"\" \"Gridded\" file format commonly used in meteo forecasting\"\"\"\n\n    filepattern = \"gri?b2?$\"\n    structure = {\"array\"}\n    magic = {b\"GRIB\"}\n    mimetypes = \"application/wmo-grib\"\n\n\nclass FITS(FileData):\n    \"\"\"Tabular or array data in text/binary format common in astronomy\"\"\"\n\n    filepattern = \"fits$\"  # other conventions too\n    structure = {\"array\", \"table\"}\n    magic = {b\"SIMPLE\"}\n    mimetypes = \"(image|application)/fits\"\n\n\nclass ASDF(FileData):\n    \"\"\"Advanced Scientific Data Format\"\"\"\n\n    filepattern = \"asdf$\"\n    structure = {\"array\", \"table\"}\n    magic = {b\"#ASDF\"}\n\n\nclass DICOM(FileData):\n    \"\"\"Imaging data usually from medical scans\"\"\"\n\n    filepattern = \"(dicom|dcm|ct|mri|DCM)$\"  # and others\n    structure = {\"array\", \"image\"}\n    magic = {(128, b\"DICM\")}\n    mimetypes = \"application/dicom\"\n\n\nclass Nifti(FileData):\n    \"\"\"Medical imaging or volume data file\"\"\"\n\n    # https://brainder.org/2012/09/23/the-nifti-file-format/\n    filepattern = \"(hdr|nii)(\\\\.gz)?$\"\n    structure = {\"array\", \"image\"}\n    magic = {(344, b\"\\x6E\\x69\\x31\\x00\"), (344, b\"\\x6E\\x2B\\x31\\x00\")}\n\n\nclass OpenDAP(Service):\n    \"\"\"Earth-science oriented searchable HTTP API\"\"\"\n\n    structure = {\"array\"}\n\n\nclass SQLQuery(BaseData):\n    \"\"\"Query on a database-like service\"\"\"\n\n    structure = {\"sequence\", \"table\"}\n    filepattern = \"^(oracle|mssql|sqlite|mysql|postgres)\"\n\n    def __init__(self, conn, query, metadata=None):\n        self.conn = conn\n        self.query = query\n        super().__init__(metadata)\n\n\nclass Prometheus(Service):\n    \"\"\"Monitoring metric query service\"\"\"\n\n    structure = {\"structured\"}\n\n    def __init__(\n        self,\n        url,\n        options: Optional[dict] = None,\n        metric: Optional[str] = None,\n        labels: Optional[dict] = None,\n        start_time=None,\n        end_time=None,\n        query: Optional[str] = None,\n        metadata=None,\n    ):\n        if query:\n            # this is the totally custom route\n            assert metric or labels or start_time or end_time is None\n        super().__init__(url, options, metadata)\n        self.query = query\n        self.metric = metric\n        self.labels = labels\n        self.start_time = start_time\n        self.end_time = end_time\n\n\nclass LlamaCPPService(Service):\n    \"\"\"Simple local HTTP chat\n\n    Also had OpenAI compatible endpoints\n\n    https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md\n    \"\"\"\n\n    def open(self):\n        \"\"\"Open chat config and chat page\"\"\"\n        import webbrowser\n\n        webbrowser.open(self.url)\n\n\nclass OpenAIService(Service):\n    \"\"\"OpenAI compatible chatbot\n\n    See https://platform.openai.com/docs/api-reference/making-requests\n    \"\"\"\n\n    def __init__(\n        self,\n        url=\"https://api.openai.com/\",\n        key: str = \"sk-no-key-required\",\n        options=None,\n        metadata=None,\n    ):\n        self.key = key\n        super().__init__(url, options=options, metadata=metadata)\n\n\nclass SQLite(FileData):\n    \"\"\"Database data stored in files\"\"\"\n\n    structure = {\"sequence\", \"table\"}\n    filepattern = \"sqlite$|sqlitedb$|db$\"\n    magic = {b\"SQLite format\"}\n\n\nclass AVRO(FileData):\n    \"\"\"Structured record passing file format\"\"\"\n\n    structure = {\"nested\"}\n    filepattern = \"avro$\"\n    magic = {b\"Obj\\x01\"}\n    mimetypes = \"avro/binary\"\n\n\nclass ORC(FileData):\n    \"\"\"Columnar-optimized tabular binary file format\"\"\"\n\n    structure = {\"nested\", \"tabular\"}\n    filepattern = \"orc$\"\n    magic = {b\"ORC\"}\n\n\nclass YAMLFile(FileData):\n    \"\"\"Human-readable JSON/object-like format\"\"\"\n\n    filepattern = \"ya?ml$\"\n    mimetypes = \"text/yaml\"\n    structure = {\"nested\"}\n\n\nclass CatalogFile(Catalog, YAMLFile):\n    \"\"\"Intake catalog expressed as YAML\"\"\"\n\n\nclass CatalogAPI(Catalog, Service):\n    \"\"\"An API endpoint capable of describing Intake catalogs\"\"\"\n\n    filepattern = \"^https?:\"\n\n\nclass JSONFile(FileData):\n    \"\"\"Nested record format as readable text, very common over HTTP\"\"\"\n\n    filepattern = \"json[l]$\"\n    mimetypes = \"(text|application)/json\"\n    structure = {\"nested\", \"table\"}\n    magic = {b\"{\"}\n\n\nclass GeoJSON(JSONFile):\n    \"\"\"Geo data (position and geometries) within JSON\"\"\"\n\n    filepattern = \"(?:geo)?json$\"\n    magic = {(None, b'\"type\": \"Feature')}  # not guaranteed, but good indicator\n\n\nclass Shapefile(FileData):\n    \"\"\"Geo data (position and geometries) in a set of related binary files\"\"\"\n\n    # this would only be found as a member of a .ZIP, since you need all three mandatory\n    # files to make a dataset https://en.wikipedia.org/wiki/Shapefile#Overview\n    # However, Fiona can read some .shp files with env SHAPE_RESTORE_SHX=YES\n    filepattern = \"shp$|shx$|dbf$\"\n    mimetypes = \"x-gis/x-shapefile\"\n    magic = {b\"\\x00\\x00\\x27\\x0a\"}\n\n\nclass FlatGeoBuf(FileData):\n    \"\"\"Geo data in flatbuffers\"\"\"\n\n    filepattern = \"fgb$\"\n    magic = {b\"fgb\"}\n    # b\"fgb\\x03fgb\\x01\" would be a full magic, encoding version number; here is 3.2\n\n\nclass GeoPackage(SQLite):\n    \"\"\"Geo data (position and geometries) in a SQLite DB file\"\"\"\n\n    filepattern = \"gpkg$\"\n\n\nclass STACJSON(JSONFile):\n    \"\"\"Data assets related to geo data, either as static JSON or a searchable API\"\"\"\n\n    magic = {(None, b'\"stac_version\":')}  # None means \"somewhere in the file head\"\n    mimetypes = \"(text|application)/geo\\\\+json\"\n\n\nclass TiledService(CatalogAPI):\n    magic = {(None, b\"<title>Tiled</title>\")}\n\n\nclass TiledDataset(Service):\n    \"\"\"Data access service for data-aware portals and data science tools\"\"\"\n\n    structure = {\"array\", \"table\", \"nested\"}\n\n\nclass TileDB(Service):\n    \"\"\"Service exposing versioned, chunked and potentially sparse arrays\"\"\"\n\n    filepattern = \"tiled://\"  # or a real URL, local or remote\n    contains = {\"__meta\", \"__schema\"}\n    structure = {\"array\", \"table\"}\n\n\nclass IcebergDataset(JSONFile):\n    \"\"\"Indexed set of parquet files with servioning and diffs\"\"\"\n\n    structure = {\"tabular\"}\n    magic = {(None, b'\"format-version\":')}\n\n\nclass DeltalakeTable(FileData):\n    \"\"\"Indexed set of parquet files with servioning and diffs\"\"\"\n\n    # a directory by convention, but otherwise can't be distinguished\n    contains = {\"_delta_log\"}\n    structure = {\"tabular\"}\n\n\nclass NumpyFile(FileData):\n    \"\"\"Simple array format\"\"\"\n\n    # will also match .npz since it will be recognised as a ZIP archive\n    magic = {b\"\\x93NUMPY\"}\n    filepattern = \"(npy$|text$)\"\n    structure = {\"array\"}\n\n\nclass RawBuffer(FileData):\n    \"\"\"A C or FORTRAN N-dimensional array buffer without metadata\"\"\"\n\n    filepattern = \"raw$\"\n    structure = {\"array\"}\n\n    def __init__(\n        self,\n        url: str,\n        dtype: str,\n        storage_options: dict | None = None,\n        metadata: dict | None = None,\n    ):\n        super().__init__(url, storage_options=storage_options, metadata=metadata)\n        self.dtype = dtype  # numpy-style\n\n\nclass Literal(BaseData):\n    \"\"\"A value that can be embedded directly to YAML (text, dict, list)\"\"\"\n\n    def __init__(self, data, metadata=None):\n        self.data = data\n        super().__init__(metadata=metadata)\n\n\nclass Handle(JSONFile):\n    \"\"\"An identifier registered on handle registry\n\n    See https://handle.net/ .\n\n    May refer to a single file or a set of files\n    \"\"\"\n\n    filepattern = \"hdl:\"\n\n\nclass Feather2(FileData):\n    \"\"\"Tabular format based on Arrow IPC\"\"\"\n\n    magic = {b\"ARROW1\"}\n    structure = {\"tabular\", \"nested\"}\n\n\nclass Feather1(FileData):\n    \"\"\"Deprecated tabular format from the Arrow project\"\"\"\n\n    magic = {b\"FEA1\"}\n    structure = {\"tabular\", \"nested\"}\n\n\nclass PythonSourceCode(FileData):\n    \"\"\"Source code file\"\"\"\n\n    structure = {\"code\"}\n    filepattern = \"py$\"\n\n\nclass GDALRasterFile(FileData):\n    \"\"\"One of the filetpes at https://gdal.org/drivers/raster/index.html\n\n    This class overlaps with some other types, so only use when necessary.\n    These must be local paths or use GDAL's own virtual file system.\n    \"\"\"\n\n    structure = {\"array\"}\n\n\nclass GDALVectorFile(FileData):\n    \"\"\"One of the filetypes at https://gdal.org/drivers/vector/index.html\n\n    This class overlaps with some other types, so only use when necessary.\n    These must be local paths or use GDAL's own virtual file system.\n    \"\"\"\n\n    structure = {\n        \"nested\",\n        \"tabular\",\n    }  # tabular when read by geopandas, could be called a conversion\n\n\nclass HuggingfaceDataset(BaseData):\n    \"\"\"https://github.com/huggingface/datasets\"\"\"\n\n    structure = {\"nested\", \"text\"}\n\n    def __init__(self, name, split=None, metadata=None):\n        super().__init__(metadata)\n        self.name = name\n        self.split = split\n\n\nclass TFRecord(FileData):\n    \"\"\"Tensorflow record file, ready for machine learning\"\"\"\n\n    structure = {\"nested\"}\n    filepattern = \"tfrec$\"\n\n\nclass KerasModel(FileData):\n    \"\"\"Keras model parameter set\"\"\"\n\n    structure = {\"model\"}  # complex\n    filepattern = \"pb$\"  # possibly protobuf\n\n\nclass GGUF(FileData):\n    \"\"\"Trained model\n\n    (see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md)\"\"\"\n\n    structure = {\"model\"}\n    filepattern = \"gguf$\"\n    magic = {b\"GGUF\"}\n\n\nclass SafeTensors(FileData):\n    \"\"\"Trained model\n\n    (see https://github.com/huggingface/safetensors?tab=readme-ov-file#format)\n    \"\"\"\n\n    # TODO: .bin sees to be an older pytorch-specific version of this\n    structure = {\"model\"}\n    filepattern = \"safetensors$\"\n    magic = {(8, b\"{\")}\n\n\nclass PickleFile(FileData):\n    \"\"\"Python pickle, arbitrary serialized object\"\"\"\n\n    structure = set()\n\n\nclass ModelConfig(FileData):\n    \"\"\"HuggingFace-style multi-file model directory\n\n    Looks like a catalog of related models\n    \"\"\"\n\n    structure = {\"model\"}\n    filepattern = \"config.json\"\n    magic = {b'\"model_type\":'}\n\n\nclass SKLearnPickleModel(PickleFile):\n    \"\"\"Trained model made by sklearn and saved as pickle\"\"\"\n\n\ncomp_magic = {\n    # These are a bit like datatypes making raw bytes/file object output\n    (0, b\"\\x1f\\x8b\"): \"gzip\",\n    (0, b\"BZh\"): \"bzip2\",\n    (0, b\"(\\xc2\\xb5/\\xc3\\xbd\"): \"zstd\",\n    (0, b\"\\xff\\x06\\x00\\x00sNaPpY\"): \"sz\",  # stream framed format\n}\ncontainer_magic = {\n    # these are like datatypes making filesystems\n    (257, b\"ustar\"): \"tar\",\n    (0, b\"PK\"): \"zip\",\n}\n\n\ndef recommend(\n    url: str | None = None,\n    mime: str | None = None,\n    head: bool = True,\n    contents: bool = False,\n    storage_options=None,\n    ignore: set[str] | None = None,\n) -> set[BaseData]:\n    \"\"\"Show which Intake data types can apply to the given details\n\n    Parameters\n    ----------\n    url: str\n        Location of data\n    mime: str\n        MIME type, usually \"x/y\" form\n    head: bytes | bool | None\n        A small number of bytes from the file head, for seeking magic bytes. If it is\n        True, fetch these bytes from th given URL/storage_options and use them. If None,\n        only fetch bytes if there is no match by mime type or path, if False, don't\n        fetch at all.\n    contents: bool | None\n        Attempt to delve into URL to analyse constituent files. This can significantly slow\n        your recommendation.\n    storage_options: dict | None\n        If passing a URL which might be a remote file, storage_options can be used\n        by fsspec.\n    ignore: set | None\n        Don't include these in the output\n\n    Returns\n    -------\n    set of matching datatype classes.\n    \"\"\"\n    # TODO: more complex returns defining which type of match hit what, or some kind of score\n    outs = ignore or set()\n    out = []\n    if isinstance(url, (list, tuple)):\n        url = url[0]\n    if head is True and url:\n        try:\n            fs, url2 = fsspec.core.url_to_fs(url, **(storage_options or {}))\n            mime = mime or fs.info(url2, refresh=True).get(\"ContentType\", None)\n        except (IOError, TypeError, AttributeError, ValueError):\n            mime = mime or None\n        try:\n            fs, url2 = fsspec.core.url_to_fs(url, **(storage_options or {}))\n            head = fs.cat_file(url2[0] if isinstance(url2, list) else url2, end=2**20)\n        except (IOError, IndexError, ValueError):\n            head = False\n    else:\n        fs = None\n\n    if isinstance(head, bytes):\n        # more specific first\n        for cls in subclasses(BaseData):\n            if cls in outs:\n                continue\n            for m in cls.magic:\n                if isinstance(m, tuple):\n                    off, m = m\n                    if off is None:\n                        if re.findall(m, head):\n                            out.append(cls)\n                            outs.add(cls)\n                            break\n                else:\n                    off = 0\n                if off is not None and head[off:].startswith(m):\n                    out.append(cls)\n                    outs.add(cls)\n                    break\n    if mime:\n        mime = mime.lower()\n        for cls in subclasses(BaseData):\n            if cls not in outs and cls.mimetypes and re.match(cls._mimetypes(), mime):\n                out.append(cls)\n                outs.add(cls)\n    if url:\n        poss = {}\n        if fs is not None and fs.isdir(url):\n            try:\n                allfiles = fs.ls(url, detail=False)\n            except IOError:\n                allfiles = None\n        else:\n            allfiles = None\n        files = set(subclasses(FileData))\n        bases = set(subclasses(BaseData)) - files\n        # file types first, then other/services, more specific first\n        for cls in chain(files, bases):\n            if cls in outs:\n                continue\n            if cls.filepattern:\n                find = re.search(cls._filepattern(), url.lower())\n                if find and not allfiles:\n                    # not a directory or empty directory\n                    poss[cls] = find.start()\n            if cls.contains and allfiles:\n                if any(re.search(c, a) for c in cls.contains for a in allfiles):\n                    poss[cls] = 0\n        out.extend(sorted(poss, key=lambda x: poss[x]))\n    if contents and url:\n        for ext in {\".gz\", \".gzip\", \".bzip2\", \"bz2\", \".zstd\", \".tar\", \".tgz\"}:\n            if url.endswith(ext):\n                out.extend(recommend(url[: -len(ext)], head=False, ignore=outs))\n    if out:\n        return out\n\n    if head is None and url:\n        return recommend(url, mime=mime, head=True, storage_options=storage_options)\n\n    if isinstance(head, bytes):\n        for (off, mag), comp in comp_magic.items():\n            if head[off:].startswith(mag):\n                storage_options = (storage_options or {}).copy()\n                storage_options[\"compression\"] = comp\n                out = recommend(url, storage_options=storage_options)\n                if out:\n                    print(\"Update storage_options: \", storage_options)\n                    return out\n        for (off, mag), comp in container_magic.items():\n            if head[off:].startswith(mag):\n                prot = fsspec.core.split_protocol(url)[0]\n                out = recommend(f\"{comp}://*::{url}\", storage_options={prot: storage_options})\n                if out:\n                    print(\"Update url: \", url, \"\\nstorage_options: \", storage_options)\n                    return out\n        # TODO: if directory, look inside files?\n    return []\n"
  },
  {
    "path": "intake/readers/entry.py",
    "content": "\"\"\"Description of the ways to load a data set\n\nThese are the definitions as they would appear in a Catalog: they may have (unevaluated)\nuser parameters and references to one-another, as well as other templated values.\n\nThe rule is: when placing an entry in a catalog, it is converted to its constituent data and\nreader descriptions. When accessing an entry, it is re-instantiated as a reader. The entries\nwithin a catalog can be amended in-place (such as extracting user parameters or templating)\nand persisted as catalog files.\n\"\"\"\nfrom __future__ import annotations\n\nfrom collections.abc import Mapping\nfrom copy import copy\nfrom itertools import chain\nimport re\nfrom typing import Any, Iterable\n\nimport fsspec\nimport yaml\n\nfrom intake import import_name\nfrom intake.readers.user_parameters import (\n    BaseUserParameter,\n    SimpleUserParameter,\n    set_values,\n)\nfrom intake.readers.utils import (\n    Tokenizable,\n    check_imports,\n    extract_by_path,\n    extract_by_value,\n    merge_dicts,\n    _is_tok,\n)\n\n\nclass DataDescription(Tokenizable):\n    \"\"\"Defines some data: class and arguments. This may be laoded in a number of ways\n\n    A DataDescription normally resides in a Catalog, and can contain templated arguments.\n    When there are user_parameters, these will also be applied to any reader that\n    depends on this data.\n    \"\"\"\n\n    def __init__(\n        self,\n        datatype: str,\n        kwargs: dict = None,\n        metadata: dict = None,\n        user_parameters: dict = None,\n    ):\n        self.datatype = datatype\n        self.kwargs = kwargs or {}\n        self.metadata = metadata or {}\n        self.user_parameters = user_parameters or {}\n\n    def __repr__(self):\n        part = f\"DataDescription type {self.datatype}\\n kwargs {self.kwargs}\"\n        if self.user_parameters:\n            part += f\"\\n parameters {self.user_parameters}\"\n        return part\n\n    def to_data(self, user_parameters=None, **kwargs):\n        cls = import_name(self.datatype)\n        ups = self.user_parameters.copy()\n        ups.update(user_parameters or {})\n        kw = self.get_kwargs(user_parameters=ups, **kwargs)\n        return cls(**kw, metadata=self.metadata)\n\n    def __call__(self, **kwargs):\n        return self.to_data(**kwargs)\n\n    def get_kwargs(\n        self, user_parameters: dict[str | BaseUserParameter] | None = None, **kwargs\n    ) -> dict[str, Any]:\n        \"\"\"Get set of kwargs for given reader, based on prescription, new args and user parameters\n\n        Here, `user_parameters` is intended to come from the containing catalog. To provide values\n        for a user parameter, include it by name in kwargs\n        \"\"\"\n        kw = self.kwargs.copy()\n        kw.update(kwargs)\n        up = self.user_parameters.copy()\n        up.update(user_parameters or {})\n        kw = set_values(up, kw)\n        return kw\n\n    def extract_parameter(\n        self,\n        name: str,\n        path: str | None = None,\n        value: Any = None,\n        cls: type = SimpleUserParameter,\n        **kw,\n    ):\n        if not ((path is None) ^ (value is None)):\n            raise ValueError\n        if path is not None:\n            kw, up = extract_by_path(path, cls, name, self.kwargs, **kw)\n        else:\n            kw, up = extract_by_value(value, cls, name, self.kwargs, **kw)\n        self.kwargs = kw\n        self.user_parameters[name] = up\n\n\nclass ReaderDescription(Tokenizable):\n    \"\"\"\n    A serialisable description of a reader or pipeline\n\n    This class is typically stored inside Catalogs, and can contain templated arguments\n    which get evaluated at the time that it is accessed from a Catalog.\n    \"\"\"\n\n    def __init__(\n        self,\n        reader: str,\n        kwargs: dict[str, Any] | None = None,\n        user_parameters: dict[str | BaseUserParameter] | None = None,\n        metadata: dict | None = None,\n        output_instance: str | None = None,\n    ):\n        self.reader = reader\n        self.kwargs = kwargs or dict[str, Any]()\n        self.output_instance = output_instance\n        self.user_parameters: dict[str | BaseUserParameter] = user_parameters or {}\n        self.metadata = metadata or {}\n\n    def check_imports(self):\n        \"\"\"Are the packages listed in the \"imports\" key of the metadata available?\"\"\"\n        cls = import_name(self.reader)\n        class_import = cls.check_imports()\n        meta_imports = check_imports(*self.metadata.get(\"imports\", set()))\n        return class_import & meta_imports\n\n    def get_kwargs(self, user_parameters=None, **kwargs) -> dict[str, Any]:\n        \"\"\"Get set of kwargs for given reader, based on prescription, new args and user parameters\n\n        Here, `user_parameters` is intended to come from the containing catalog. To provide values\n        for a user parameter, include it by name in kwargs\n        \"\"\"\n        kw = self.kwargs.copy()\n        user_parameters = user_parameters or {}\n        kw.update(kwargs)\n\n        # make data instance\n        up = user_parameters or {}  # global/catalog\n        if \"data\" in kw and isinstance(kw[\"data\"], DataDescription):\n            extra = kw[\"data\"]\n            up.update(kw[\"data\"].user_parameters)\n            kw_subset = {k: v for k, v in kwargs.items() if k in user_parameters or k in extra}\n            kw[\"data\"] = kw[\"data\"].to_data(user_parameters=user_parameters, **kw_subset)\n        else:\n            extra = {}\n        kw[\"output_instance\"] = self.output_instance\n\n        # now make reader\n        up.update(user_parameters)\n        kw = set_values(up, kw)\n        return kw\n\n    def extract_parameter(self, name: str, path=None, value=None, cls=SimpleUserParameter, **kw):\n        \"\"\"Creates new version of the description\n\n        Creates new instance, since the token will in general change\n        \"\"\"\n        if not ((path is None) ^ (value is None)):\n            raise ValueError\n        if path is not None:\n            kw, up = extract_by_path(path, cls, name, self.kwargs, **kw)\n        else:\n            kw, up = extract_by_value(value, cls, name, self.kwargs, **kw)\n        self.kwargs = kw\n        self.user_parameters[name] = up\n\n    def to_reader(self, user_parameters=None, **kwargs):\n        cls = import_name(self.reader)\n        if \"data\" in kwargs and isinstance(kwargs[\"data\"], DataDescription):\n            # if not, is already a BaseData\n            ups = kwargs[\"data\"].user_parameters.copy()\n        else:\n            ups = {}\n        ups.update(self.user_parameters)\n        ups.update(user_parameters or {})\n        kw = self.get_kwargs(user_parameters=ups, **kwargs)\n        return cls(metadata=self.metadata, **kw)\n\n    def to_cat(self, name=None):\n        \"\"\"Create a Catalog containing only this entry\"\"\"\n        cat = Catalog()\n        cat.add_entry(self, name)\n        return cat\n\n    def __call__(self, user_parameters=None, **kwargs):\n        return self.to_reader(user_parameters=user_parameters, **kwargs)\n\n    @classmethod\n    def from_dict(cls, data):\n        # note that there should never be any embedded intake classes in kwargs, as they get pulled out\n        # when any reader is added to to a catalog\n        obj = super().from_dict(data)\n        obj.user_parameters = {\n            k: BaseUserParameter.from_dict(v) for k, v in data[\"user_parameters\"].items()\n        }\n        return obj\n\n    def __repr__(self):\n        extra = f\"\\n  parameters: {self.user_parameters}\" if self.user_parameters else \"\"\n        return (\n            f\"Entry for reader: {self.reader}\\n  kwargs: {self.kwargs}\\n\"\n            f\"  producing: {self.output_instance}\" + extra\n        )\n\n\nclass Catalog(Tokenizable):\n    \"\"\"A collection of data and reader descriptions.\"\"\"\n\n    def __init__(\n        self,\n        entries: Iterable[ReaderDescription] | Mapping | None = None,\n        aliases: dict[str, int] | None = None,\n        data: Iterable[DataDescription] | Mapping = None,\n        user_parameters: dict[str, BaseUserParameter] | None = None,\n        parameter_overrides: dict[str, Any] | None = None,\n        metadata: dict | None = None,\n    ):\n        self.version = 2\n        self.data: dict = data or {}\n        self.aliases: dict = aliases or {}\n        self.metadata: dict = metadata or {}\n        self.user_parameters: dict[str, BaseUserParameter] = user_parameters or {}\n        self._up_overrides: dict = parameter_overrides or {}\n        if isinstance(entries, Mapping) or entries is None:\n            self.entries = entries or {}\n        else:\n            self.entries = {}\n            [self.add_entry(e) for e in entries]\n\n    def add_entry(\n        self, entry, name: str | None = None, clobber: bool = True, simplify: bool = False\n    ):\n        \"\"\"Add entry/reader (and its requirements) in-place, with optional alias\n\n        Parameters\n        ----------\n        entry: instance of BaseData, BaseReader or their descriptions\n        name: set the key value the iterm will be known as\n        clobber: if False, will not overwrite an entry\n        simplify: if True, checks if an equivalent entity already exists, and\n            returns it's token if found. Such comparisons are relatively slow\n            when you have >>100 entries.\n        \"\"\"\n        from intake.readers import BaseData, BaseReader\n        from intake.readers.utils import find_funcs, replace_values\n\n        if isinstance(entry, (BaseReader, BaseData)):\n            entry = entry.to_entry()\n        if simplify and entry in self:\n            if name and name != entry.token:\n                self.aliases[name] = entry.token\n            return entry.token\n        tokens = {}\n        entry.kwargs = find_funcs(entry.kwargs, tokens)\n        if name:\n            entry._tok = name\n        for tok, thing in reversed(tokens.items()):\n            thing = thing.to_entry()\n            if thing == entry:\n                continue\n            tok = self.add_entry(thing)\n            if tok not in self:\n                # did not add new one, because it's already there\n                old_tok = next(iter(self._find_iter(thing))).token\n                entry.kwargs = replace_values(\n                    entry.kwargs, \"{data(%s)}\" % tok, \"{data(%s)}\" % old_tok\n                )\n        if isinstance(entry, ReaderDescription):\n            if clobber is False and entry.token in self.entries:\n                raise ValueError(\"Name {} exists in catalog, and clobber is False\", entry.token)\n            self.entries[entry.token] = entry\n        elif isinstance(entry, DataDescription):\n            if clobber is False and entry.token in self.data:\n                raise ValueError(\"Name {} exists in catalog, and clobber is False\", entry.token)\n            self.data[entry.token] = entry\n        else:\n            raise ValueError\n\n        if name and name != entry.token:\n            self.aliases[name] = entry.token\n        return entry.token\n\n    def _ipython_key_completions_(self):\n        return sorted(set(chain(self.aliases, self.data, self.entries)))\n\n    def delete(self, name, recursive=False):\n        \"\"\"Remove named entity (data/entry) from catalog\n\n        We do not check whether any other entity in the catalog refers *to*\n        what is being deleted, so you can break other entries this way.\n\n        Parameters\n        ----------\n        recursive: bool\n            Also removed data/entries references by the given one, and those\n            they refer to in turn.\n        \"\"\"\n        if recursive:\n            raise NotImplementedError\n        del self[name]\n\n    def extract_parameter(\n        self,\n        item: str,\n        name: str,\n        path: str | None = None,\n        value: Any = None,\n        cls=SimpleUserParameter,\n        store_to: str | None = None,\n        **kw,\n    ):\n        \"\"\"\n        Descend into data & reader descriptions to create a user_parameter\n\n        There are two ways to fund and replace values by a template:\n\n        - if ``path`` is given, the kwargs will be walked to this location\n          e.g., \"field.0.special_value\" -> kwargs[\"field\"][0][\"special_value\"]\n        - if ``value`` is given, all kwargs will be recursively walked, looking\n          for values that equal that given.\n\n        Matched values will be replaced by a template string like ``\"{name}\"``,\n        and a user_parameter of class ``cls`` will be placed in the location\n        given by ``store_to`` (could be \"data\", \"catalog\").\n        \"\"\"\n        # TODO: if entity is \"Catalog\", extract over all entities; currently this will\n        #  cause a recursion loop\n        entity = self.get_entity(item)\n        entity.extract_parameter(name, path=path, value=value, cls=cls, **kw)\n        if store_to is None:\n            return\n        elif store_to == \"data\" and isinstance(entity, ReaderDescription):\n            entity.kwargs[\"data\"].user_parameters[name] = entity.user_parameters.pop(name)\n        else:\n            self.move_parameter(item, store_to, name)\n\n    def move_parameter(self, from_entity: str, to_entity: str, parameter_name: str) -> Catalog:\n        \"\"\"Move user-parameter from between entry/data\n\n        `entity` is an alias name or entry/data token\n        \"\"\"\n        entity1 = self.get_entity(from_entity)\n        entity2 = self.get_entity(to_entity)\n        entity2.user_parameters[parameter_name] = entity1.user_parameters.pop(parameter_name)\n        return self\n\n    def promote_parameter_name(self, parameter_name: str, level: str = \"cat\") -> Catalog:\n        \"\"\"Find and promote given named parameter, assuming they are all identical\n\n        parameter_name:\n            the key string referring to the parameter\n        level: cat | data\n            If the parameter is found in a reader, it can be promoted to the data it\n            depends on. Parameters in a data description can only be promoted to a\n            catalog global.\n        \"\"\"\n        up = None\n        ups = None\n        if level not in (\"cat\", \"data\"):\n            raise ValueError\n        for entity in self.entries.values():\n            if parameter_name in entity.user_parameters and up is None:\n                ups = entity.user_parameters[parameter_name]\n                up = entity.user_parameters[parameter_name]\n                entity0 = entity\n            elif (\n                parameter_name in entity.user_parameters\n                and up == entity.user_parameters[parameter_name]\n            ):\n                continue\n            elif parameter_name in entity.user_parameters:\n                ups[parameter_name] = up  # rewind\n                raise ValueError\n        for entity in self.data.values():\n            if parameter_name in entity.user_parameters and up is None:\n                assert level == \"cat\"\n                ups = entity.user_parameters[parameter_name]\n                up = entity.user_parameters[parameter_name]\n            elif (\n                parameter_name in entity.user_parameters\n                and up == entity.user_parameters[parameter_name]\n            ):\n                continue\n            elif parameter_name in entity.user_parameters:\n                ups[parameter_name] = up  # rewind\n                raise ValueError\n\n        if level == \"cat\":\n            self.user_parameters[parameter_name] = up\n        else:\n            entity0.kwargs[\"data\"].user_parameters[parameter_name] = up\n        return self\n\n    def __getattr__(self, item):\n        super().tab_completion_fixer(item)\n        try:\n            return self[item]\n        except KeyError:\n            pass\n        except RecursionError as e:\n            raise AttributeError from e\n        raise AttributeError(item)\n\n    def to_yaml_file(self, path: str, **storage_options):\n        \"\"\"Persist the state of this catalog as a YAML file\n\n        storage_options:\n            kwargs to pass to fsspec for opening the file to write\n        \"\"\"\n        # TODO: remove ['CATALOG_DIR', 'CATALOG_PATH', 'STORAGE_OPTIONS'] UPs?\n        with fsspec.open(path, mode=\"wt\", **storage_options) as stream:\n            yaml.safe_dump(self.to_dict(), stream)\n\n    @staticmethod\n    def from_yaml_file(path: str, **kwargs):\n        \"\"\"Load YAML representation into a new Catalog instance\n\n        storage_options:\n            kwargs to pass to fsspec for opening the file to read; can pass as storage_options=\n            or will pick up any unused kwargs for simplicity\n        \"\"\"\n        storage_options = kwargs.pop(\"storage_options\", kwargs)\n        of = fsspec.open(path, **storage_options)\n        with of as stream:\n            cat = Catalog.from_dict(yaml.safe_load(stream))\n        cat.user_parameters[\"CATALOG_PATH\"] = path\n        cat.user_parameters[\"CATALOG_DIR\"] = of.fs.unstrip_protocol(of.fs._parent(path))\n        cat.user_parameters[\"STORAGE_OPTIONS\"] = storage_options\n        return cat\n\n    @classmethod\n    def from_entries(cls, data: dict, metadata=None):\n        \"\"\"Assemble catalog from a dict of entries\"\"\"\n        cat = cls(metadata=metadata)\n        for k, v in data.items():\n            cat[k] = v\n        return cat\n\n    @classmethod\n    def from_dict(cls, data):\n        \"\"\"Assemble catalog from dict representation\"\"\"\n        if data.get(\"version\") != 2:\n            raise ValueError(\"Not a V2 catalog\")\n        cat = cls()\n        for key, clss in zip(\n            [\"entries\", \"data\", \"user_parameters\"],\n            [ReaderDescription, DataDescription, BaseUserParameter],\n        ):\n            for k, v in data[key].items():\n                if isinstance(v, dict):\n                    desc = clss.from_dict(v)\n                    desc._tok = k\n                else:\n                    desc = v\n                getattr(cat, key)[k] = desc\n        cat.aliases = data[\"aliases\"]\n        cat.metadata = data[\"metadata\"]\n        return cat\n\n    def get_entity(self, item: str):\n        \"\"\"Get the objects by reference\n\n        Use this method if you want to change the catalog in-place\n\n        item can be an entry in .aliases, in which case the original wil be returned,\n        or a key in .entries, .user_parameters or .data.\n        The entity in question is returned without processing.\n        \"\"\"\n        if item.lower() in [\"cat\", \"catalog\"]:\n            return self\n        # TODO: this can be simplified with `get(..) or`\n        if item in self.aliases:\n            item = self.aliases[item]\n        if item in self.entries:\n            return self.entries[item]\n        elif item in self.data:\n            return self.data[item]\n        elif item in self.user_parameters:\n            return self.user_parameters[item]\n        else:\n            raise KeyError(item)\n\n    def get_aliases(self, entity: str):\n        \"\"\"Return those alias names that point to the given opaque key\"\"\"\n        return {k for k, v in self.aliases.items() if v == entity}\n\n    def search(self, expr) -> Catalog:\n        \"\"\"Make new catalog with a subset of this catalog\n\n        The new catalog will have those entries which pass the filter `expr`, which\n        is an instance of `intake.readers.search.BaseSearch` (i.e., has a method\n        like `filter(entry) -> bool`).\n\n        In the special case that expr is just a string, the `Text` search expression\n        will be used.\n        \"\"\"\n        from intake.readers.search import Text\n\n        if isinstance(expr, str):\n            expr = Text(expr)\n        cat = Catalog()\n        for e, v in self.entries.items():\n            if expr.filter(v):\n                cat.add_entry(v)\n                aliases = self.get_aliases(e)\n                cat.aliases.update({a: e for a in aliases})\n        return cat\n\n    def __getitem__(self, item):\n        ups = self.user_parameters.copy()\n        kw = self._up_overrides.copy()\n        if item in self.aliases:\n            item = self.aliases[item]\n        if item in self.entries:\n            item = copy(self.entries[item])\n            # TODO: does not pass data's UPs to reader instantiation because data is still a str,\n            #  but could grab from self.data\n            # ups.update(item.data.user_parameters)\n            item = self._rehydrate(item)\n            return item(user_parameters=ups, **(kw or {}))\n        elif item in self.data:\n            item = self.data[item]\n            item = self._rehydrate(item)\n            return item.to_data(user_parameters=ups, **(kw or {}))\n        else:\n            raise KeyError(item)\n\n    def _rehydrate(self, val):\n        \"\"\"Recreate reader instances when accessed from this catalog, filling in refs and templates\"\"\"\n        from intake.readers.entry import DataDescription, ReaderDescription\n\n        if isinstance(val, dict):\n            return {k: self._rehydrate(v) for k, v in val.items()}\n        elif isinstance(val, str):\n            m = re.match(r\"{?data[(]([^)]+)[)]}?\", val)\n            if m:\n                return self[m.groups()[0]]\n            return val\n        elif isinstance(val, bytes):\n            return val\n        elif isinstance(val, (tuple, set, list)):\n            return type(val)(self._rehydrate(v) for v in val)\n        elif isinstance(val, (DataDescription, ReaderDescription)):\n            val2 = copy(val)\n            val2.__dict__ = self._rehydrate(val.__dict__)\n            return val2\n        return val\n\n    def __delitem__(self, key):\n        # remove alias, data or entry with no further actions\n        if key in self.aliases:\n            self.data.pop(self.aliases[key], None)\n            self.entries.pop(self.aliases[key], None)\n        for k, v in self.aliases.copy().items():\n            # remove alias pointing TO key\n            if v == key:\n                self.aliases.pop(k)\n        self.aliases.pop(key, None)\n        self.data.pop(key, None)\n        self.entries.pop(key, None)\n\n    def __delattr__(self, item):\n        del self[item]\n\n    def _find_iter(self, thing):\n        if isinstance(thing, DataDescription):\n            return (_ for _ in self.data.values() if thing.to_dict() == _.to_dict())\n        elif isinstance(thing, ReaderDescription):\n            return (_ for _ in self.entries.values() if thing.to_dict() == _.to_dict())\n        else:\n            return ()\n\n    def __contains__(self, thing):\n        from intake.readers import BaseReader, BaseData\n\n        if isinstance(thing, (BaseData, BaseReader)):\n            thing = thing.to_entry()\n        if hasattr(thing, \"token\"):\n            thing0 = thing.token\n        else:\n            thing0 = thing\n        easy = thing0 in self.data or thing0 in self.entries or thing0 in self.aliases\n        return easy or any(self._find_iter(thing))\n\n    def __call__(self, **kwargs):\n        \"\"\"Set override values for any named user parameters\n\n        Returns a new instance of Catalog with overrides set\n        \"\"\"\n        up_over = self._up_overrides.copy()\n        up_over.update(kwargs)\n\n        new = Catalog(\n            entries=self.entries,\n            aliases=self.aliases,\n            data=self.data,\n            user_parameters=self.user_parameters,\n            parameter_overrides=up_over,\n            metadata=self.metadata,\n        )\n        return new\n\n    def __iter__(self):\n        return iter(self.aliases)\n\n    def __len__(self) -> int:\n        return len(self.aliases)\n\n    def __dir__(self) -> Iterable[str]:\n        return sorted(chain(object.__dir__(self), self.aliases))\n\n    def __add__(self, other: Catalog | DataDescription):\n        if not isinstance(other, (Catalog, DataDescription)):\n            raise TypeError\n        if isinstance(other, (DataDescription, ReaderDescription)):\n            other = Catalog(entries=[other])\n        return Catalog(\n            entries=chain(self.entries.values(), other.entries.values()),\n            aliases=merge_dicts(self.aliases, other.aliases),\n            data=merge_dicts(self.data, other.data),\n            user_parameters=merge_dicts(self.user_parameters, other.user_parameters),\n            metadata=merge_dicts(self.metadata, other.metadata),\n        )\n\n    def __iadd__(self, other: Catalog | ReaderDescription):\n        if not isinstance(other, Catalog):\n            other = Catalog([other])\n        self.entries.update(other.entries)\n        self.aliases.update(other.aliases)\n        self.user_parameters.update(other.user_parameters)\n        self.metadata.update(other.metadata)\n        return self\n\n    def __repr__(self):\n        aliases = set(self.aliases).union(e for e in self.entries if not _is_tok(e))\n        txt = f\"\"\"{type(self).__name__}\ndata definitions: {len(self.data)}\nreader entries: {len(self.entries)}\nnamed datasets: {sorted(aliases)}\"\"\"\n        if self.user_parameters:\n            txt = txt + f\"\\n  parameters: {sorted(self.user_parameters)}\"\n        return txt\n\n    def __setitem__(self, name: str, entry):\n        \"\"\"Add the entry to this catalog with the given alias name\n\n        If the entry is already in the catalog, this effectively just adds an alias. Any existing alias of\n        the same name will be clobbered.\n        \"\"\"\n        self.add_entry(entry, name=name)\n\n    def rename(self, old: str, new: str, clobber=True):\n        \"\"\"Change the alias of a dataset\"\"\"\n        if not clobber and new in self.aliases:\n            raise ValueError\n        self.aliases[new] = self.aliases.pop(old)\n\n    @property\n    def name(self):\n        if not re.match(\"^[0-9a-f]{16}$\", self.token):\n            return self.token\n        else:\n            return self.metadata.get(\"name\", \"unnamed\")\n\n    def give_name(self, tok: str, name: str, clobber=True):\n        \"\"\"Give an alias to a dataset\n\n        tok:\n            a key in the .entries dict\n        \"\"\"\n        if not clobber and name in self.aliases:\n            raise ValueError\n        if not isinstance(tok, str):\n            tok = tok.token\n        if tok not in self.entries:\n            raise KeyError\n        self.aliases[name] = tok\n\n    alias = give_name\n\n    # TODO: methods to split a pipeline into sequence of entries and to rejoin them\n"
  },
  {
    "path": "intake/readers/examples.py",
    "content": "\"\"\"This module can contain examples of complex Intake use we wish to refer to\"\"\"\nimport operator\n\n\ndef ms_building_parquet():\n    \"\"\"22k build polygon outlines in the US Virgin Islands\"\"\"\n    import intake.readers\n    import planetary_computer\n\n    cat = intake.readers.datatypes.STACJSON(\n        \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n    ).to_reader(\n        reader=\"StacSearch\",\n        query={\"collections\": [\"ms-buildings\"]},\n        signer=planetary_computer.sign_inplace,\n        prefer=\"Awkward\",\n    )\n    return cat.apply(operator.getitem, \"USVirginIslands_32300213_2023-04-25\").apply(\n        operator.getitem,\n        \"data\",\n        output_instance=\"intake.readers.readers:AwkwardParquet\",\n    )\n\n\nms_us_virgin_cat = \"\"\"\naliases:\n  building_outlines: 1e150595b4343b5a\ndata:\n  ea539c01591e5e69:\n    datatype: intake.readers.datatypes:STACJSON\n    kwargs:\n      metadata: {}\n      storage_options: null\n      url: https://planetarycomputer.microsoft.com/api/stac/v1\n    metadata: {}\n    user_parameters: {}\nentries:\n  1e150595b4343b5a:\n    kwargs:\n      out_instances:\n      - intake.readers.entry:Catalog\n      - intake.readers.entry:Catalog\n      - intake.readers.entry:Catalog\n      steps:\n      - - '{data(bd3879bda378781f)}'\n        - []\n        - {}\n      - - '{func(intake.readers.convert:GenericFunc)}'\n        - - USVirginIslands_32300213_2023-04-25\n        - func: '{func(_operator:getitem)}'\n      - - '{func(intake.readers.convert:GenericFunc)}'\n        - - data\n        - func: '{func(_operator:getitem)}'\n    metadata: {}\n    output_instance: intake.readers.entry:Catalog\n    reader: intake.readers.convert:Pipeline\n    user_parameters: {}\n  bd3879bda378781f:\n    kwargs:\n      data: '{data(ea539c01591e5e69)}'\n      prefer: Awkward\n      query:\n        collections:\n        - ms-buildings\n      signer: '{func(planetary_computer.sas:sign_inplace)}'\n    metadata: {}\n    output_instance: intake.readers.entry:Catalog\n    reader: intake.readers.catalogs:StacSearch\n    user_parameters: {}\nmetadata: {}\nuser_parameters: {}\nversion: 2\n\"\"\"\n\n\ndef ms_delta_buildings():\n    # replicates https://planetarycomputer.microsoft.com/dataset/ms-buildings#Example-Notebook\n    import intake.readers\n    import planetary_computer\n\n    cat = intake.readers.datatypes.STACJSON(\n        \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n    ).to_reader(reader=\"StacCatalog\", signer=planetary_computer.sign_inplace, prefer=\"Delta\")\n    return cat.apply(operator.getitem, \"ms-buildings\").apply(\n        operator.getitem, \"delta\", output_instance=\"deltalake:DeltaTable\"\n    )\n"
  },
  {
    "path": "intake/readers/importlist.py",
    "content": "\"\"\"Imports made my intake when it itself is imported\n\nSince \"plugins\" are just subclasses of things like intake.readers.readers.BaseReader,\nimporting them is enough for registration.\n\nTo include imports from a package in the list of things to import, the canonical thing\nto do is include an entry under \"intake.imports\" in the package entrypoints; the value\nof each item will be imported.\n\nThe following config keys define behaviour:\n\n- import_on_startup: if False, makes no automatic imports\n- entrypoints_block_list: if an entrypoints import has a top-level package name\n  in this list, it will be skipped\n- import_extras: values in this list will be imported after processing entrypoints. This is\n  a way to include imports without installing packages/entrypoints.\n\"\"\"\n\nfrom intake import conf, import_name, logger\nfrom importlib.metadata import entry_points\n\n\ndef process_entries():\n    eps = entry_points()\n    if hasattr(eps, \"select\"):  # Python 3.10+ / importlib_metadata >= 3.9.0\n        specs = eps.select(group=\"intake.imports\")\n    else:\n        specs = eps.get(\"intake.imports\", [])\n    for spec in specs:\n        top_level = spec.value.split(\":\", 1)[0].split(\".\", 1)[0]\n        bl = conf.get(\"import_block_list\", [])\n        if top_level in bl or spec.name in bl:\n            logger.debug(\"Skipping import of %s\", spec)\n            continue\n        try:\n            import_name(spec.value)\n        except Exception as e:\n            logger.warning(\n                \"Importing %s as part of processing intake entrypoints failed\\n(%s)\",\n                spec.value,\n                e,\n            )\n    for impname in conf[\"extra_imports\"]:\n        try:\n            import_name(impname)\n        except Exception as e:\n            logger.warning(\n                \"Importing %s as part of processing intake extra_imports failed\\n(%s)\",\n                impname,\n                e,\n            )\n\n\nif conf[\"import_on_startup\"]:\n    process_entries()\n"
  },
  {
    "path": "intake/readers/metadata.py",
    "content": "\"\"\"Some types and meanings of fields that can be expected in metadata dictionaries\n\nMetadata should be JSON-serializable.\n\nWe may decide to have different recommended keys for data versus readers/pipelines\n\nFor a possible schema we could decide to use, see\nhttps://specs.frictionlessdata.io/data-resource/\n\"\"\"\nfrom __future__ import annotations\n\nfrom typing import List\n\nmetadata_fields = {\n    \"description\": (str, \"one-line description of the dataset\"),\n    \"text\": (str, \"long-form prose description of the dataset\"),\n    \"timestamp\": (\n        str,\n        \"most recent datum in the set, ISO format\",\n    ),  # timespan would be in \"data\" as an extent\n    \"imports\": (List[str], \"top-level packages needed to read this\"),\n    \"environment\": (str, \"YAML string or URL of a conda env spec\"),  # or requirements.txt\n    \"references\": (List[str], \"URLs with further information relating to this\"),\n    \"repr\": (str, \"string form of output\"),\n    \"data\": (\n        dict,\n        \"any data-specific details, such as field types, missing values bounds or statistics\",\n    ),\n    \"history\": (\n        List[dict],\n        \"Time-ordered list of operations done to get this data. Keys are ISO timestamps.\",\n    ),\n    \"datashape\": (str, \"if applicable, may have datashape, dtype(s), jsonschema or similar\"),\n    \"thumbnail\": (str, \"url location of an image, ideally PNG format\"),\n}\n"
  },
  {
    "path": "intake/readers/mixins.py",
    "content": "\"\"\"Helpers for creating pipelines\"\"\"\n\nfrom __future__ import annotations\n\nimport re\nfrom itertools import chain\n\nfrom intake import import_name\nfrom intake.readers.utils import Completable\n\n\nclass PipelineMixin(Completable):\n    \"\"\"Make it possible to associate transforms with the given class\"\"\"\n\n    def __getattr__(self, item):\n        super().tab_completion_fixer(item)\n        try:\n            if item in dir(self.transform):\n                return getattr(self.transform, item)\n            if \"Catalog\" in self.output_instance:\n                # a better way to mark this condition, perhaps the datatype's structure?\n                out = self.read()[item]\n            elif item in self._namespaces:\n                out = self._namespaces[item]\n            # the following can go very wrong - only allow via explicit opt-in?\n            else:\n                out = self.transform.__getattr__(item)  # arbitrary method call\n        except RecursionError as e:\n            raise AttributeError(item) from e\n        else:\n            return out\n\n    def __getitem__(self, item: str):\n        from intake.readers.convert import Pipeline\n        from intake.readers.transform import GetItem\n\n        outtype = self.output_instance\n        if \"Catalog\" in outtype:\n            # a better way to mark this condition, perhaps the datatype's structure?\n            # TODO: this prevents from doing a transform/convert on a cat, so must use\n            #  .transform for that\n            return self.read()[item]\n        if isinstance(self, Pipeline):\n            return self.with_step((GetItem, (item,), {}), out_instance=outtype)\n\n        return Pipeline(\n            steps=[(self, (), {}), (GetItem, (item,), {})],\n            out_instances=[self.output_instance, outtype],\n            metadata=self.metadata,\n        )\n\n    def __dir__(self):\n        return sorted(chain(object.__dir__(self), dir(self.transform), self._namespaces))\n\n    @property\n    def _namespaces(self):\n        from intake.readers.namespaces import get_namespaces\n\n        return get_namespaces(self)\n\n    @classmethod\n    def output_doc(cls):\n        \"\"\"Doc associated with output type\"\"\"\n        out = import_name(cls.output_instance)\n        return out.__doc__\n\n    def apply(self, func, *args, output_instance=None, **kwargs):\n        \"\"\"Make a pipeline by applying a function to this reader's output\"\"\"\n        from intake.readers.convert import GenericFunc, Pipeline\n\n        kwargs[\"func\"] = func\n\n        return Pipeline(\n            steps=[(self, (), {}), (GenericFunc, args, kwargs)],\n            out_instances=[\n                self.output_instance,\n                output_instance or self.output_instance,\n            ],\n            metadata=self.metadata,\n        )\n\n    @property\n    def transform(self):\n        from intake.readers.convert import convert_classes\n\n        funcdict = convert_classes(self.output_instance)\n        return Functioner(self, funcdict)\n\n\nclass Functioner(Completable):\n    \"\"\"Find and apply transform functions to reader output\"\"\"\n\n    def __init__(self, reader, funcdict):\n        self.reader = reader\n        self.funcdict = funcdict\n\n    def _ipython_key_completions_(self):\n        return [_[1].__name__ for _ in self.funcdict]\n\n    def __getitem__(self, item):\n        from intake.readers.convert import Pipeline\n        from intake.readers.transform import GetItem\n\n        found = False\n        for out, func in self.funcdict:\n            ######\n            if func.__name__ == item or re.findall(item, func):\n                arg = ()\n                kw = {}\n                found = True\n                break\n        if not found:\n            func = GetItem\n            arg = (item,)\n            kw = {}\n        if isinstance(self.reader, Pipeline):\n            return self.reader.with_step((func, (), kw), out_instance=item)\n\n        return Pipeline(\n            steps=[(self.reader, (), {}), (func, arg, kw)],\n            out_instances=[self.reader.output_instance, item],\n            metadata=self.reader.metadata,\n        )\n\n    def __repr__(self):\n        import pprint\n\n        return f\"Transformers for {self.reader.output_instance}:\\n{pprint.pformat(self.funcdict)}\"\n\n    def __call__(self, func, *args, output_instance=None, **kwargs):\n        from intake.readers.convert import Pipeline\n\n        if isinstance(self.reader, Pipeline):\n            return self.reader.with_step((func, args, kwargs), out_instance=output_instance)\n        # TODO: get output_instance from func, if possible\n\n        return Pipeline(\n            steps=[(self.reader, (), {}), (func, args, kwargs)],\n            out_instances=[self.reader.output_instance, output_instance],\n            metadata=self.reader.metadata,\n        )\n\n    def methods(self):\n        \"\"\"Methods and attributes associated with the output_instance\"\"\"\n        try:\n            cls = import_name(self.reader.output_instance)\n            dnames = (_ for _ in dir(cls) if not _.startswith(\"_\"))\n        except (ImportError, AttributeError):\n            dnames = []\n        return dnames\n\n    def __dir__(self):\n        return list(sorted(set(chain((f.__name__ for (_, f) in self.funcdict), self.methods()))))\n\n    def __getattr__(self, item):\n        super().tab_completion_fixer(item)\n        from intake.readers.convert import Pipeline\n        from intake.readers.transform import Method\n\n        out = [(outtype, func) for outtype, func in self.funcdict if func.__name__ == item]\n        if not len(out):\n            # TODO: import class being acted on, to see if attribute requested\n            #  really is available. Perhaps tie to config option.\n            # TODO: exclude certain attributes that might be called during\n            #  a stack trace or other non-normal code, causing accidental\n            #  massive pipelines. E.g., dunders. For those, require `apply()`.\n\n            outtype = self.reader.output_instance\n            if item.startswith(\"_\"):\n                raise AttributeError(item)\n\n            func = Method\n            kw = {\"method_name\": item}\n        else:\n            outtype, func = out[0]\n            kw = {}\n        try:\n            if isinstance(self.reader, Pipeline):\n                out = self.reader.with_step((func, (), kw), out_instance=outtype)\n            else:\n                out = Pipeline(\n                    steps=[(self.reader, (), {}), (func, (), kw)],\n                    out_instances=[self.reader.output_instance, outtype],\n                    metadata=self.reader.metadata,\n                )\n        except RecursionError as e:\n            raise AttributeError from e\n        else:\n            return out\n"
  },
  {
    "path": "intake/readers/namespaces.py",
    "content": "\"\"\"Add module accessors to pipelines, providing functions appropriate for its output\n\nThe code here allow something like pipeline.np.<tab> to get completions from the numpy\nnamespace, and apply to the pipeline.\n\"\"\"\nfrom __future__ import annotations\n\nimport importlib.metadata\nimport re\nfrom functools import lru_cache as cache\nfrom typing import Iterable\n\nfrom intake.readers.utils import Completable, subclasses\n\n\nclass Namespace(Completable):\n    \"\"\"A set of functions as an accessor on a Reader, producing a Pipeline\"\"\"\n\n    acts_on: tuple[str] = ()  #: types that this namespace is associated with\n    imports: tuple[str] = ()  #: requires this top-level package\n\n    def __init__(self, reader):\n        self.reader = reader\n\n    @classmethod\n    @cache\n    def _funcs(cls) -> Iterable[str]:\n        if not cls.check_imports():\n            return []\n        # if self.reader.output_instance doesn't match self.acts_on\n        cls.mod = importlib.import_module(cls.imports[0])\n        return [f for f in dir(cls.mod) if callable(getattr(cls.mod, f)) and not f.startswith(\"_\")]\n\n    def __dir__(self) -> Iterable[str]:\n        # if self.reader.output_instance doesn't match self.acts_on:\n        # return []\n        return self._funcs()\n\n    def __getattr__(self, item):\n        super().tab_completion_fixer(item)\n        try:\n            dir(self)\n            func = getattr(self.mod, item)\n            return FuncHolder(self.reader, func)\n        except RecursionError as e:\n            raise AttributeError from e\n\n    def __repr__(self):\n        return f\"{self.imports} namespace\"\n\n\nclass FuncHolder:\n    \"\"\"Acts like a function to capture a call into a pipeline stage\"\"\"\n\n    def __init__(self, reader, func):\n        self.reader = reader\n        self.func = func\n\n    def __call__(self, *args, **kwargs):\n        return self.reader.apply(self.func, **kwargs)\n\n\nclass np(Namespace):\n    acts_on = (\".*\",)  # numpy works with a wide variety of objects\n    imports = (\"numpy\",)\n\n\nclass ak(Namespace):\n    acts_on = \"awkward:Array\", \"dask_awkward:Array\"\n    imports = (\"awkward\",)\n\n\nclass xr(Namespace):\n    acts_on = \"xarray:DataArray\", \"xarray:Dataset\"\n    imports = (\"xarray\",)\n\n\nclass pd(Namespace):\n    acts_on = (\"pandas:DataFrame\", \"pandas.Series\")\n    imports = (\"pandas\",)\n\n\nclass pl(Namespace):\n    acts_on = (\"polars:DataFrame\", \"polars:Series\", \"polars:LazyFrame\")\n    imports = (\"polars\",)\n\n\ndef get_namespaces(reader):\n    \"\"\"These namespaces are available on the reader\"\"\"\n    out = {}\n    for space in subclasses(Namespace):\n        if any(re.match(act.lower(), reader.output_instance.lower()) for act in space.acts_on):\n            out[space.__name__] = space(reader)\n    return out\n"
  },
  {
    "path": "intake/readers/output.py",
    "content": "\"\"\"Serialise and output data into persistent formats\n\nThis is how to \"export\" data from Intake.\n\nBy convention, functions here produce an instance of FileData (or other data type),\nwhich can then be used to produce new catalog entries.\n\"\"\"\nfrom __future__ import annotations\n\nimport fsspec\n\nfrom intake.readers.convert import BaseConverter\nfrom intake.readers.datatypes import (\n    CSV,\n    HDF5,\n    PNG,\n    CatalogFile,\n    Feather2,\n    NumpyFile,\n    Parquet,\n    Zarr,\n    recommend,\n)\nfrom intake.readers.utils import all_to_one\n\n\n# TODO: superclass for output, so they show up differently in any graph viz?\n#  *most* things here produce a datatypes:BaseData, but not all\n\n\nclass PandasToParquet(BaseConverter):\n    instances = all_to_one(\n        {\"pandas:DataFrame\", \"dask.dataframe:DataFrame\", \"geopandas:GeoDataFrame\"},\n        \"intake.readers.datatypes:Parquet\",\n    )\n\n    def run(self, x, url, storage_options=None, metadata=None, **kwargs):\n        x.to_parquet(url, storage_options=storage_options, **kwargs)\n        return Parquet(url=url, storage_options=storage_options, metadata=metadata)\n\n\nclass PandasToCSV(BaseConverter):\n    instances = all_to_one(\n        {\"pandas:DataFrame\", \"dask.dataframe:DataFrame\", \"geopandas:GeoDataFrame\"},\n        \"intake.readers.datatypes:CSV\",\n    )\n\n    def run(self, x, url, storage_options=None, metadata=None, **kwargs):\n        x.to_csv(url, storage_options=storage_options, **kwargs)\n        return CSV(url=url, storage_options=storage_options, metadata=metadata)\n\n\nclass PandasToHDF5(BaseConverter):\n    instances = all_to_one(\n        {\"pandas:DataFrame\", \"dask.dataframe:DataFrame\"},\n        \"intake.readers.datatypes:HDF5\",\n    )\n\n    def run(self, x, url, table, storage_options=None, metadata=None, **kwargs):\n        x.to_hdf(url, table, storage_options=storage_options, **kwargs)\n        return HDF5(url=url, path=table, storage_options=storage_options, metadata=metadata)\n\n\nclass PandasToFeather(BaseConverter):\n    instances = all_to_one(\n        {\"pandas:DataFrame\", \"geopandas:GeoDataFrame\"},\n        \"intake.readers.datatypes:Feather2\",\n    )\n\n    def run(self, x, url, storage_options=None, metadata=None, **kwargs):\n        # TODO: fsspec output\n        x.to_feather(url, storage_options=storage_options, **kwargs)\n        return Feather2(url=url, storage_options=storage_options, metadata=metadata)\n\n\nclass XarrayToNetCDF(BaseConverter):\n    instances = {\"xarray:Dataset\": \"intake.readers.datatypes:HDF5\"}\n\n    def run(self, x, url, group=\"\", metadata=None, **kwargs):\n        x.to_netcdf(path=url, group=group or None, **kwargs)\n        return HDF5(url, path=group, metadata=metadata)\n\n\nclass XarrayToZarr(BaseConverter):\n    instances = {\"xarray:Dataset\": \"intake.readers.datatypes:Zarr\"}\n    func = \"xarray:Dataset.to_zarr\"\n\n    def run(self, x, url, group=\"\", storage_options=None, metadata=None, **kwargs):\n        x.to_zarr(store=url, group=group or None, storage_options=storage_options, **kwargs)\n        return Zarr(url, storage_options=storage_options, root=group, metadata=metadata)\n\n\nclass DaskArrayToZarr(BaseConverter):\n    instances = {\"dask.array:Array\": \"intake.readers.datatypes:Zarr\"}\n    func = \"xarray:Dataset.to_zarr\"\n\n    def run(self, x, url, group=\"\", storage_options=None, metadata=None, **kwargs):\n        x.to_zarr(\n            store=url,\n            component=group or None,\n            storage_options=storage_options,\n            **kwargs,\n        )\n        return Zarr(url, storage_options=storage_options, root=group, metadata=metadata)\n\n\nclass NumpyToNumpyFile(BaseConverter):\n    \"\"\"Save a single array into a single binary file\"\"\"\n\n    instances = {\"numpy:ndarray\": \"intake.readers.datatypes:NumpyFile\"}\n    func = \"numpy:save\"\n\n    def run(self, x, path, *args, storage_options=None, metadata=None, **kwargs):\n        if storage_options or \"://\" in path or \"::\" in path:\n            with fsspec.open(path, **storage_options) as f:\n                self._func(x, f)\n        else:\n            self._func(x, path)\n        return NumpyFile(path, storage_options, metadata=metadata)\n\n\nclass ToMatplotlib(BaseConverter):\n    instances = all_to_one(\n        {\"pandas:DataFrame\", \"geopandas:GeoDataFrame\", \"xarray:Dataset\"},\n        \"matplotlib.pyplot:Figure\",\n    )\n    func = \"matplotlib.pyplot:Figure\"\n    func_doc = \"matplotlib.pyplot:plot\"\n\n    def run(self, x, **kwargs):\n        fig = self._func()\n        ax = fig.add_subplot(111)\n        x.plot(ax=ax, **kwargs)\n        return fig\n\n\nclass MatplotlibToPNG(BaseConverter):\n    \"\"\"Take a matplotlib figure and save to PNG file\n\n    This could be used to produce thumbnails if followed by FileByteReader;\n    to use temporary storage rather than concrete files, can use memory: or caching filesystems\n    \"\"\"\n\n    instances = {\"matplotlib.pyplot:Figure\": \"intake.readers.datatypes:PNG\"}\n\n    def run(self, x, url, metadata=None, storage_options=None, **kwargs):\n        with fsspec.open(url, mode=\"wb\", **(storage_options or {})) as f:\n            x.savefig(f, format=\"png\", **kwargs)\n        return PNG(url=url, metadata=metadata, storage_options=storage_options)\n\n\nclass GeopandasToFile(BaseConverter):\n    \"\"\"creates one of several output file types\n\n    Uses url extension or explicit driver= kwarg\n    \"\"\"\n\n    instances = {\"geopandas:GeoDataFrame\": \"intake.readers.datatypes:GeoJSON\"}\n\n    def run(self, x, url, metadata=None, **kwargs):\n        x.to_file(url, **kwargs)\n        return recommend(url)[0](url=url, metadata=metadata)\n\n\nclass Repr(BaseConverter):\n    \"\"\"good for including \"peek\" at data in entries' metadata\"\"\"\n\n    instances = {\".*\": \"builtins:str\"}\n    func = \"builtins:repr\"\n\n\nclass IPythonDisplay(BaseConverter):\n    # maybe restrict to types known to render well?\n    instances = {\".*\": \"builtins:dict\"}\n\n    def run(self, x, **kwargs):\n        \"\"\"Produce ipython/jupyter compatible output without imports\"\"\"\n        # does not consider _ipython_display_\n        types = {\n            \"html\": \"text/html\",\n            \"svg\": \"image/svg+xml\",\n            \"png\": \"image/png\",\n            \"jpeg\": \"image/jpeg\",\n            \"latex\": \"text/latex\",\n            \"json\": \"application/json\",\n            \"pretty\": \"text/plain\",\n        }\n        for name, mime in types.items():\n            if hasattr(x, f\"_repr_{name}_\"):\n                out = getattr(x, f\"_repr_{name}_\")()\n                # out can be (data, metadata) for some calls\n                # https://ipython.readthedocs.io/en/stable/config/integrating.html#metadata\n                return {mime: out[0] if isinstance(out, tuple) else out}\n        if hasattr(x, \"_repr_mimebundle_\"):\n            return x._repr_mimebundle_()\n        return {\"text/plain\": repr(x)}\n\n\nclass CatalogToJson(BaseConverter):\n    instances = {\"intake.readers.entry:Catalog\": \"intake.readers.datatypes:CatalogFile\"}\n    func = \"intake.readers.entry:Catalog.to_yaml_file\"\n\n    def run(self, x, url, metadata=None, storage_options=None, **kwargs):\n        if storage_options:\n            kwargs.update(storage_options)\n        x.to_yaml_file(url, **kwargs)\n        return CatalogFile(url=url, storage_options=storage_options, metadata=metadata)\n"
  },
  {
    "path": "intake/readers/readers.py",
    "content": "\"\"\"Classes for reading data into a python objects\"\"\"\n\nfrom __future__ import annotations\n\nimport inspect\nimport itertools\nimport json\nimport os\nimport re\nfrom functools import lru_cache\n\nimport fsspec\nfrom fsspec.callbacks import _DEFAULT_CALLBACK as DEFAULT_CALLBACK\n\nimport intake.readers.datatypes\nfrom intake import import_name, logger\nfrom intake.readers import datatypes\nfrom intake.readers.mixins import PipelineMixin\nfrom intake.readers.utils import Tokenizable, subclasses, port_in_use, find_free_port\nfrom intake.utils import is_fsspec_url\n\n\nclass BaseReader(Tokenizable, PipelineMixin):\n    imports: set[str] = set()  #: top-level packages required to use this\n    implements: set[datatypes.BaseData] = set()  #: datatype(s) this applies to\n    optional_imports: set[str] = set()  #: packages that might be required by some options\n    func: str = \"builtins:NotImplementedError\"  #: function name for loading data\n    func_doc: str = None  #: docstring origin if not from func\n    output_instance: str = None  #: type the reader produces\n    other_funcs: set[str] = set()  #: function names to recognise when matching user calls\n\n    def __init__(\n        self,\n        *args,\n        metadata: dict | None = None,\n        output_instance: str | None = None,\n        **kwargs,\n    ):\n        if (\n            self.implements\n            and (args and not isinstance(args[0], datatypes.BaseData))\n            and not any(isinstance(_, datatypes.BaseData) for _ in kwargs.values())\n        ):\n            # reader requires data input, but not given - guess\n            if len(self.implements) == 1:\n                d_cls = list(self.implements)[0]\n                sig = inspect.signature(d_cls.__init__).parameters\n                kw2 = {}\n                for k, v in kwargs.copy().items():\n                    if k in sig:\n                        kw2[k] = kwargs.pop(k)\n                args = (d_cls(*args, **kw2),)\n            else:\n                raise NotImplementedError(\n                    \"Guessing the data type not supported for a reader implementing multiple data \"\n                    \"classes. Please Instantiate the data type directly.\"\n                )\n\n        self.kwargs = kwargs\n        if args:\n            self.kwargs[\"args\"] = args\n        met = {}\n        for a in itertools.chain(\n            reversed(kwargs.get(\"args\", [])), reversed(kwargs.values()), reversed(args)\n        ):\n            if isinstance(a, datatypes.BaseData):\n                met.update(a.metadata)\n        met.update(metadata or {})\n        self.metadata = met\n        if output_instance:\n            self.output_instance = output_instance\n\n    def __repr__(self):\n        return f\"{type(self).__name__} reader producing {self.output_instance}\"\n\n    def __call__(self, *args, **kwargs):\n        \"\"\"New version of this instance with altered arguments\"\"\"\n        kw = self.kwargs.copy()\n        kw.update(kwargs)\n        if args:\n            kw[\"args\"] = args\n        return type(self)(**kw)\n\n    @classmethod\n    def doc(cls):\n        \"\"\"Doc associated with loading function\"\"\"\n        f = cls.func_doc or cls.func\n        if isinstance(f, str):\n            f = import_name(f)\n        upstream = f.__doc__ if f is not NotImplementedError else \"\"\n        sig = str(inspect.signature(cls._read))\n        doc = cls._read.__doc__\n        return \"\\n\\n\".join(_ for _ in [cls.qname(), cls.__doc__, sig, doc, upstream] if _)\n\n    def discover(self, **kwargs):\n        \"\"\"Part of the data\n\n        The intent is to return a minimal dataset, but for some readers and conditions this may be\n        up to the whole of the data. Output type is the same as for read().\n        \"\"\"\n        return self.read(**kwargs)\n\n    @property\n    def _func(self):\n        \"\"\"Import and replace .func, if it is a string\"\"\"\n        if isinstance(self.func, str):\n            return import_name(self.func)\n        return self.func\n\n    def read(self, *args, **kwargs):\n        \"\"\"Produce data artefact\n\n        Any of the arguments encoded in the data instance can be overridden.\n\n        Output type is given by the .output_instance attribute\n        \"\"\"\n        logger.debug(\"Reading %s\", self)\n        kw = self.kwargs.copy()\n        kw.update(kwargs)\n        args = kw.pop(\"args\", ()) or args\n        return self._read(*args, **kw)\n\n    def _read(self, *args, **kwargs):\n        \"\"\"This is the method subclasses will tend to override\"\"\"\n        raise NotImplementedError\n\n    def to_entry(self):\n        \"\"\"Create an entry version of this, ready to be inserted into a Catalog\"\"\"\n        from intake.readers.entry import ReaderDescription\n\n        return ReaderDescription(\n            reader=self.qname(),\n            kwargs=self.kwargs,\n            output_instance=self.output_instance,\n            metadata=self.metadata,\n        )\n\n    def to_cat(self, name=None):\n        \"\"\"Create a Catalog containing on this reader\"\"\"\n        return self.to_entry().to_cat(name)\n\n    @property\n    def data(self):\n        \"\"\"The BaseData this reader depends on, if it has one\"\"\"\n        data = self.kwargs.get(\"data\")\n        if data is None:\n            args = self.kwargs.get(\"args\", ())\n            if not (args):\n                raise ValueError(\"Cloud not find a data entity in this reader\")\n            data = args[0]\n        if not isinstance(data, datatypes.BaseData):\n            raise ValueError(\"Data argument isn't a BaseData\")\n        return data\n\n    def to_reader(self, outtype: tuple[str] | str | None = None, reader: str | None = None, **kw):\n        \"\"\"Make a different reader for the data used by this reader\"\"\"\n        return self.data.to_reader(outtype=outtype, reader=reader, metadata=self.metadata, **kw)\n\n    def auto_pipeline(self, outtype: str | tuple[str], avoid: list[str] | None = None):\n        from intake import auto_pipeline\n\n        return auto_pipeline(self, outtype=outtype, avoid=avoid)\n\n\nclass FileReader(BaseReader):\n    \"\"\"Convenience superclass for readers of files\"\"\"\n\n    url_arg = \"url\"\n    other_urls = {}  # if we have other_funcs, may have different url_args for each\n    storage_options = False\n\n    def _read(self, data, **kw):\n        kw[self.url_arg] = data.url\n        if self.storage_options and data.storage_options:\n            kw[\"storage_options\"] = data.storage_options\n        return self._func(**kw)\n\n\nclass OpenFilesReader(FileReader):\n    url_arg = \"urlpath\"\n    implements = {datatypes.FileData}\n    func = \"fsspec:open_files\"\n    output_instance = \"fsspec.core:OpenFiles\"\n\n\nclass PanelImageViewer(FileReader):\n    output_instance = \"panel.pane:Image\"\n    implements = {datatypes.PNG, datatypes.JPEG}\n    func = \"panel.pane:Image\"\n    url_arg = \"object\"\n\n\nclass FileByteReader(FileReader):\n    \"\"\"The contents of file(s) as bytes\"\"\"\n\n    output_instance = \"builtin:bytes\"\n    implements = {datatypes.FileData}\n\n    def discover(self, data=None, **kwargs):\n        data = data or self.kwargs[\"data\"]\n        with fsspec.open(data.url, mode=\"rb\", **(data.storage_options or {})) as f:\n            return f.read()\n\n    def _read(self, data, **kwargs):\n        out = []\n        for of in fsspec.open_files(data.url, mode=\"rb\", **(data.storage_options or {})):\n            with of as f:\n                out.append(f.read())\n        return b\"\".join(out)\n\n\nclass FileTextReader(FileReader):\n    \"\"\"The contents of file(s) as str\"\"\"\n\n    output_instance = \"builtins:str\"\n    implements = {datatypes.FileData}\n\n    def discover(self, data=None, encoding=None, **kwargs):\n        data = data or self.kwargs[\"data\"]\n        if encoding:\n            data.storage_options[\"encoding\"] = encoding\n        with fsspec.open(data.url, mode=\"rt\", **(data.storage_options or {})) as f:\n            return f.read()\n\n    def _read(self, data, encoding=None, **kwargs):\n        out = []\n        if encoding:\n            data.storage_options[\"encoding\"] = encoding\n        for of in fsspec.open_files(data.url, mode=\"rt\", **(data.storage_options or {})):\n            with of as f:\n                out.append(f.read())\n        return \"\".join(out)\n\n\nclass FileSizeReader(FileReader):\n    output_instance = \"builtins:int\"\n    implements = {datatypes.FileData}\n\n    def _read(self, data, **kw):\n        fs, path = fsspec.core.url_to_fs(data.url, **(data.storage_options or {}))\n        path = fs.expand_path(path)  # or use fs.du with deep\n        return sum(fs.info(p)[\"size\"] for p in path)\n\n\nclass Pandas(FileReader):\n    imports = {\"pandas\"}\n    output_instance = \"pandas:DataFrame\"\n    storage_options = True\n\n\nclass PandasParquet(Pandas):\n    implements = {datatypes.Parquet}\n    optional_imports = {\"fastparquet\", \"pyarrow\"}\n    func = \"pandas:read_parquet\"\n    url_arg = \"path\"\n\n\nclass PandasFeather(Pandas):\n    implements = {datatypes.Feather2, datatypes.Feather1}\n    imports = {\"pandas\", \"pyarrow\"}\n    func = \"pandas:read_feather\"\n    url_arg = \"path\"\n\n\nclass PandasORC(Pandas):\n    implements = {datatypes.ORC}\n    imports = {\"pandas\", \"pyarrow\"}\n    func = \"pandas:read_orc\"\n    url_arg = \"path\"\n\n\nclass PandasExcel(Pandas):\n    implements = {datatypes.Excel}\n    imports = {\"pandas\", \"openpyxl\"}\n    func = \"pandas:read_excel\"\n    url_arg = \"io\"\n\n\nclass PandasSQLAlchemy(BaseReader):\n    implements = {datatypes.SQLQuery}\n    func = \"pandas:read_sql\"\n    imports = {\"sqlalchemy\", \"pandas\"}\n    output_instance = \"pandas:DataFrame\"\n\n    def discover(self, **kwargs):\n        if \"chunksize\" not in kwargs:\n            kwargs[\"chunksize\"] = 10\n        return next(iter(self.read(**kwargs)))\n\n    def _read(self, data, **kwargs):\n        read_sql = import_name(self.func)\n        return read_sql(sql=data.query, con=data.conn, **kwargs)\n\n\nclass DaskDF(FileReader):\n    imports = {\"dask\", \"pandas\"}\n    output_instance = \"dask.dataframe:DataFrame\"\n    storage_options = True\n\n    def discover(self, **kwargs):\n        return self.read().head()\n\n\nclass DaskParquet(DaskDF):\n    implements = {datatypes.Parquet}\n    optional_imports = {\"fastparquet\", \"pyarrow\"}\n    func = \"dask.dataframe:read_parquet\"\n    url_arg = \"path\"\n\n\nclass DaskGeoParquet(DaskParquet):\n    imports = {\"dask\", \"geopandas\"}\n    func = \"dask_geopandas:read_parquet\"\n    output_instance = \"dask_geopandas.core:GeoDataFrame\"\n\n\nclass DaskHDF(DaskDF):\n    implements = {datatypes.HDF5}\n    optional_imports = {\"h5py\"}\n    func = \"dask.dataframe:read_hdf\"\n    url_arg = \"pattern\"\n\n    def _read(self, data, **kw):\n        return self._func(data.url, key=data.path, **kw)\n\n\nclass DaskJSON(DaskDF):\n    implements = {datatypes.JSONFile}\n    func = \"dask.dataframe:read_json\"\n    url_arg = \"url_path\"\n\n\nclass DaskDeltaLake(DaskDF):\n    implements = {datatypes.DeltalakeTable}\n    imports = {\"dask_deltatable\"}\n    func = \"dask_deltatable:read_deltalake\"\n    url_arg = \"path\"\n\n\nclass DaskSQL(BaseReader):\n    implements = {datatypes.SQLQuery}\n    imports = {\"dask\", \"pandas\", \"sqlalchemy\"}\n    func = \"dask.dataframe:read_sql\"\n\n    def _read(self, data, index_col, **kw):\n        \"\"\"Dask requires `index_col` to partition the dataframe on.\"\"\"\n        return self._func(data.quary, data.conn, index_col, **kw)\n\n\nclass DaskNPYStack(FileReader):\n    \"\"\"Requires a directory with .npy files and an \"info\" pickle file\"\"\"\n\n    # TODO: single npy file, or stack without info (which can be read from any one file)\n    implements = {datatypes.NumpyFile}\n    imports = {\"dask\", \"numpy\"}\n    func = \"dask.array:from_npy_stack\"\n    output_instance = \"dask.array:Array\"\n    url_arg = \"dirname\"\n\n\nclass DaskZarr(FileReader):\n    implements = {datatypes.Zarr}\n    imports = {\"dask\", \"zarr\"}\n    output_instance = \"dask.array:Array\"\n    func = \"dask.array:from_zarr\"\n\n    def _read(self, data, **kwargs):\n        return self._func(\n            url=data.url,\n            component=data.root or None,\n            storage_options=data.storage_options,\n            **kwargs,\n        )\n\n\nclass NumpyZarr(FileReader):\n    implements = {datatypes.Zarr}\n    imports = {\"zarr\"}\n    output_instance = \"numpy:ndarray\"\n    func = \"zarr:open\"\n\n    def _read(self, data, **kwargs):\n        return self._func(data.url, storage_options=data.storage_options, path=data.root, **kwargs)[\n            :\n        ]\n\n\nclass DuckDB(BaseReader):\n    imports = {\"duckdb\"}\n    output_instance = \"duckdb:DuckDBPyRelation\"  # can be converted to pandas with .df\n    func_doc = \"duckdb:query\"\n    implements = {datatypes.SQLQuery}\n    _dd = {}  # hold the engines, so results are still valid\n\n    def discover(self, **kwargs):\n        return self.read().limit(10)\n\n    @classmethod\n    def _duck(cls, data, conn=None):\n        import duckdb\n\n        conn = getattr(data, \"conn\", conn) or {}  # only SQL type normally has this\n        if str(conn) not in cls._dd:\n            # TODO:  separate engine creation and caching?\n            if isinstance(conn, str):\n                # https://duckdb.org/docs/extensions/\n                if conn.startswith(\"sqlite:\"):\n                    duckdb.connect(\":default:\").execute(\"INSTALL sqlite;LOAD sqlite;\")\n                    conn1 = re.sub(\"^sqlite3?:/{0,3}\", \"\", conn)\n                    conn1 = {\"database\": conn1}\n                    d = duckdb.connect(**conn1)\n                elif conn.startswith(\"postgres\"):\n                    d = duckdb.connect()\n                    d.execute(\"INSTALL postgres;LOAD postgres;\")\n                    # extra params possible here https://duckdb.org/docs/extensions/postgres_scanner#usage\n                    d.execute(f\"CALL postgres_attach('{conn}');\")\n                else:\n                    d = duckdb.connect(conn)\n            else:\n                d = duckdb.connect(**conn)\n            cls._dd[str(conn)] = d  # connection must be cached for results to be usable\n        d = cls._dd[str(conn)]\n        if isinstance(data, datatypes.FileData) and \"://\" in data.url:\n            d.execute(\"INSTALL httpfs;LOAD httpfs;\")\n        return d\n\n\nclass DuckParquet(DuckDB, FileReader):\n    implements = {datatypes.Parquet}\n\n    def _read(self, data, **kwargs):\n        return self._duck(data).query(f\"SELECT * FROM read_parquet('{data.url}')\")\n\n\nclass DuckCSV(DuckDB, FileReader):\n    implements = {datatypes.CSV}\n\n    def _read(self, data, **kwargs):\n        return self._duck(data).query(f\"SELECT * FROM read_csv_auto('{data.url}')\")\n\n\nclass DuckJSON(DuckDB, FileReader):\n    implements = {datatypes.JSONFile}\n\n    def _read(self, data, **kwargs):\n        return self._duck(data).query(f\"SELECT * FROM read_json_auto('{data.url}')\")\n\n\nclass DuckSQL(DuckDB):\n    implements = {datatypes.SQLQuery}\n\n    def _read(self, data, **kwargs):\n        words = len(data.query.split())\n        q = data.query if words > 1 else f\"SELECT * FROM {data.query}\"\n        return self._duck(data).query(q)\n\n\nclass SparkDataFrame(FileReader):\n    imports = {\"pyspark\"}\n    func = \"pyspark.sql:SparkSession.builder.getOrCreate\"\n    func_doc = \"pyspark.sql:SparkSession.read\"\n    output_instance = \"pyspark.sql:DataFrame\"\n\n    def discover(self, **kwargs):\n        return self.read(**kwargs).limit(10)\n\n\nclass SparkCSV(SparkDataFrame):\n    implements = {datatypes.CSV}\n\n    def _read(self, data, **kwargs):\n        return self._func().read.csv(data.url, **kwargs)\n\n\nclass SparkParquet(SparkDataFrame):\n    implements = {datatypes.Parquet}\n\n    def _read(self, data, **kwargs):\n        return self._func().read.parquet(data.url, **kwargs)\n\n\nclass SparkText(SparkDataFrame):\n    implements = {datatypes.Text}\n\n    def _read(self, data, **kwargs):\n        return self._func().read.text(data.url, **kwargs)\n\n\nclass SparkDeltaLake(SparkDataFrame):\n    implements = {datatypes.DeltalakeTable}\n    imports = {\"pyspark\", \"delta-spark\"}\n\n    def _read(self, data, **kw):\n        # see https://docs.delta.io/latest/quick-start.html#python for config\n        return self._func().read.format(\"delta\").load(data.url, **kw)\n\n\nclass HuggingfaceReader(BaseReader):\n    imports = {\"datasets\"}\n    implements = {datatypes.HuggingfaceDataset}\n    func = \"datasets:load_dataset\"\n    output_instance = \"datasets.arrow_dataset:Dataset\"\n\n    def _read(self, data, *args, **kwargs):\n        return self._func(data.name, split=data.split, **kwargs)\n\n\nclass SKLearnExampleReader(BaseReader):\n    func = \"sklearn:datasets\"\n    imports = {\"sklearn\"}\n    output_instance = \"sklearn.utils:Bunch\"\n\n    def _read(self, name, **kw):\n        import sklearn.datasets\n\n        loader = getattr(sklearn.datasets, f\"load_{name}\", None) or getattr(\n            sklearn.datasets, f\"fetch_{name}\"\n        )\n        return loader()\n\n\nclass LlamaServerReader(BaseReader):\n    \"\"\"Create llama.cpp server using local pretrained model file\n\n    The read() method allows you to pass arguments directly to the llama.cpp server\n    as kwargs. Common arguments are\n\n    host: (str) hostname for the the server to listen on, default: 127.0.0.1\n    port: (int) port number for the server to listen on, default: 0, which means first free port\n    system_prompt_file: (uri) Special handling here to support fsspec uri where the file is cached to disk first\n\n    Additional kwargs not passed to llama.cpp\n\n    startup_timeout: (int) time in seconds to wait for server to respond to a health check before failing, default 60\n    callback: fsspec.callbacks.Callback derived instance progress indicator during model download, default None\n\n    Any remaining kwargs are passed as '--<key> <value>' to llama.cpp. Where '_' is replaced with '-'. For\n    options that do not take arguments, like `--verbose` the value should  be None or the empty string \"\".\n\n    The following short-name arguments are supported as kwargs, they will be transformed to long-form when\n    passed to llama.cpp. If a short-name option is missing it is best to use the long-form.\n    \"\"\"\n\n    output_instance = \"intake.readers.datatypes:LlamaCPPService\"\n    implements = {datatypes.GGUF}\n    imports = {\"requests\"}\n\n    _short_kwargs = {\n        \"v\": \"verbose\",\n        \"s\": \"seed\",\n        \"t\": \"threads\",\n        \"tb\": \"threads-draft\",\n        \"tbd\": \"threads-batch-draft\",\n        \"ps\": \"p-split\",\n        \"lcs\": \"lookup-cache-static\",\n        \"lcd\": \"lookup-cache-dynamic\",\n        \"c\": \"ctx-size\",\n        \"n\": \"predict\",\n        \"b\": \"batch-size\",\n        \"ub\": \"ubatch-size\",\n        \"fa\": \"flash-attn\",\n        \"p\": \"prompt\",\n        \"f\": \"file\",\n        \"bf\": \"binary-file\",\n        \"e\": \"escape\",\n        \"ptc\": \"prompt-token-count\",\n        \"r\": \"reverse-prompt\",\n        \"sp\": \"special\",\n        \"cnv\": \"conversation\",\n        \"l\": \"logit-bias\",\n        \"j\": \"json-schema\",\n        \"gan\": \"grp-attn-n\",\n        \"gaw\": \"grp-attn-w\",\n        \"dkvc\": \"dump-kv-cache\",\n        \"nkvo\": \"no-ko-offload\",\n        \"ctk\": \"cache-type-k\",\n        \"ctv\": \"cache-type-v\",\n        \"dt\": \"defrag-thold\",\n        \"np\": \"parallel\",\n        \"ns\": \"sequences\",\n        \"cb\": \"cont-batching\",\n        \"ngl\": \"gpu-layers\",\n        \"ngld\": \"gpu-layers-draft\",\n        \"sm\": \"split-mode\",\n        \"ts\": \"tensor-split\",\n        \"mg\": \"main-gpu\",\n        \"md\": \"model-draft\",\n        \"o\": \"output\",\n        \"sps\": \"slot-prompt-similarity\",\n        \"ld\": \"logdir\",\n    }\n\n    @classmethod\n    def _short_kwargs_docs(cls):\n        return \"\\n\".join(f\"    -{k:4s}-> --{v}\" for k, v in cls._short_kwargs.items())\n\n    @classmethod\n    @lru_cache()\n    def _find_executable(cls):\n        import shutil\n\n        # executables were renamed in https://github.com/ggerganov/llama.cpp/pull/7809\n        path = shutil.which(\"llama-server\")\n        if path is None:\n            # fallback on old name\n            path = shutil.which(\"server\")\n        return path\n\n    @classmethod\n    def check_imports(cls):\n        imports = super().check_imports()\n        path = cls._find_executable()\n        return imports & (path is not None)\n\n    def _local_model_path(self, data, callback=DEFAULT_CALLBACK):\n        import os\n        from fsspec.core import split_protocol\n        from intake.catalog.default import user_data_dir\n\n        protocol, _ = split_protocol(data.url)\n        if protocol is None:\n            # no protocol means local path\n            return data.url\n\n        storage_options = {} if data.storage_options is None else data.storage_options\n        cache_location = os.path.join(user_data_dir(), \"llama.cpp\")\n        options = {\n            protocol: storage_options,\n            \"simplecache\": {\"cache_storage\": cache_location},\n        }\n        fs, path = fsspec.core.url_to_fs(f\"simplecache::{data.url}\", **options)\n\n        cached_fn = fs._check_file(path)\n        if cached_fn:\n            return cached_fn\n\n        sha = fs._mapper(path)\n        cached_fn = os.path.join(fs.storage[-1], sha)\n\n        fs.fs.get_file(path, cached_fn, callback=callback)\n        return cached_fn\n\n    def _read(self, data, log_file=\"llama-cpp.log\", **kwargs):\n        startup_timeout = kwargs.pop(\"startup_timeout\", 60)\n        callback = kwargs.pop(\"callback\", DEFAULT_CALLBACK)\n\n        port = kwargs.pop(\"port\", 0)\n        host = kwargs.pop(\"host\", \"127.0.0.1\")\n\n        if port == 0:\n            port = find_free_port()\n\n        URL = f\"http://{host}:{port}\"\n        if port_in_use(host, port):\n            raise RuntimeError(f\"{URL} in use.\")\n\n        import requests\n        import subprocess\n        import atexit\n\n        f = open(log_file, \"wb\")\n        server_path = self._find_executable()\n        path = self._local_model_path(data, callback=callback)\n        cmd = [server_path, \"-m\", path, \"--host\", host, \"--port\", str(port), \"--log-disable\"]\n        for k, v in kwargs.items():\n            if k in self._short_kwargs:\n                k = self._short_kwargs[k]\n\n            k = k.replace(\"_\", \"-\")\n            if not k.startswith(\"-\"):\n                k = f\"--{k}\"\n\n            if k == \"--system-prompt-file\":\n                path = fsspec.open_local(f\"simplecache::{v}\")\n                cmd.extend([str(k), path])\n            elif v not in [None, \"\"]:\n                cmd.extend([str(k), str(v)])\n            else:\n                cmd.append(str(k))\n\n        P = subprocess.Popen(cmd, stdout=f, stderr=f)\n        import time\n\n        t0 = time.time()\n        while True:\n            try:\n                res = requests.get(f\"{URL}/health\")\n                if res.ok:\n                    break\n            except requests.ConnectionError:\n                pass\n            elapsed = time.time() - t0\n            if (P.poll() is not None) or (elapsed > startup_timeout):\n                raise RuntimeError(\n                    f\"Could not start {server_path}. See {log_file} for more details.\"\n                )\n\n        atexit.register(P.terminate)\n        return intake.readers.datatypes.LlamaCPPService(\n            url=URL, options={\"Process\": P, \"log_file\": log_file}\n        )\n\n\nLlamaServerReader.__doc__ += f\"\\n{LlamaServerReader._short_kwargs_docs()}\"\n\n\nclass LlamaCPPCompletion(BaseReader):\n    implements = {datatypes.LlamaCPPService}\n    imports = {\"requests\"}\n    output_instance = \"builtins:str\"\n\n    def _read(self, data, prompt: str = \"\", *args, **kwargs):\n        import requests\n\n        r = requests.post(\n            f\"{data.url}/completion\",\n            json={\"prompt\": prompt, **kwargs},\n            headers={\"Content-Type\": \"application/json\"},\n        )\n        return r.json()[\"content\"]\n\n\nclass LlamaCPPEmbedding(BaseReader):\n    implements = {datatypes.LlamaCPPService}\n    imports = {\"requests\"}\n    output_instance = \"builtins:str\"\n\n    def _read(self, data, prompt: str = \"\", *args, **kwargs):\n        import requests\n\n        r = requests.post(\n            f\"{data.url}/embedding\",\n            json={\"content\": prompt, **kwargs},\n            headers={\"Content-Type\": \"application/json\"},\n        )\n        return r.json()[\"embedding\"]\n\n\nclass OpenAIReader(BaseReader):\n    implements = {datatypes.OpenAIService}\n    imports = {\"openai\"}\n    output_instance = \"openai:OpenAI\"\n\n    def _read(self, data, **kwargs):\n        import openai\n\n        client = openai.Client(api_key=data.key, base_url=data.url, **kwargs)\n        return client\n\n\nclass OpenAICompletion(BaseReader):\n    implements = {datatypes.OpenAIService}\n    imports = {\"requests\"}\n    output_instance = \"builtins:dict\"\n    # related high-volume endpoints, assistant, embeddings\n\n    def _read(self, data, messages: list[dict], *args, model=\"gtp-3.5-turbo\", **kwargs):\n        import requests\n\n        url = f\"{data.url}/v1/chat/completions\"\n        options = data.options.copy()\n        options.update(kwargs)\n        r = requests.get(\n            url,\n            headers={\"Content-Type\": \"application/json\", \"Authorization\": f\"Bearer {data.key}\"},\n            json=dict(messages=messages, **options),\n        )\n        return r.json()[\"choices\"][0][\"message\"]\n\n\nclass TorchDataset(BaseReader):\n    output_instance = \"torch.utils.data:Dataset\"\n\n    def _read(self, modname, funcname, rootdir, **kw):\n        import importlib\n\n        mod = importlib.import_module(f\"torch{modname}\")\n        func = getattr(mod.datasets, funcname)\n        try:\n            return func(rootdir, download=True)\n        except TypeError:\n            return func(rootdir)\n\n\nclass TFPublicDataset(BaseReader):\n    # contains ({split: tensorflow.data.Dataset}, data_info) by default\n    output_instance = \"builtins:tuple\"\n    func = \"tensorflow_datasets:load\"\n\n    def _read(self, name, *args, **kwargs):\n        return self._func(name, download=True, with_info=True, **kwargs)\n\n\nclass TFTextreader(FileReader):\n    imports = {\"tensorflow\"}\n    implements = {datatypes.Text}\n    func = \"tensorflow.data:TextLineDataset\"\n    output_instance = \"tensorflow.data:Dataset\"\n    url_arg = \"filenames\"\n\n\nclass TFORC(FileReader):\n    imports = {\"tensorflow_io\"}\n    implements = {datatypes.ORC}\n    func = \"tensorflow_io:IODataset.from_orc\"\n    url_arg = \"filename\"\n    output_instance = \"tensorflow.data:Dataset\"\n\n\nclass TFSQL(BaseReader):\n    imports = {\"tensorflow_io\"}\n    implements = {datatypes.SQLQuery}\n    func = \"tensorflow_io:experimental.IODataset.from_sql\"\n    output_instance = \"tensorflow.data:Dataset\"\n\n    def _read(self, data, **kwargs):\n        return self._func(endpoint=data.conn, query=data.query, **kwargs)\n\n\nclass KerasImageReader(FileReader):\n    imports = {\"keras\"}\n    implements = {datatypes.PNG, datatypes.JPEG}  # others\n    func = \"keras.utils:image_dataset_from_directory\"\n    output_instance = \"tensorflow.data:Dataset\"\n    url_arg = \"directory\"\n\n\nclass KerasText(FileReader):\n    imports = {\"keras\"}\n    implements = {datatypes.Text}\n    func = \"keras.utils:text_dataset_from_directory\"\n    output_instance = \"tensorflow.data:Dataset\"\n    url_arg = \"directory\"\n\n\nclass KerasAudio(FileReader):\n    imports = {\"keras\"}\n    implements = {datatypes.WAV}\n    func = \"keras.utils:audio_dataset_from_directory\"\n    output_instance = \"tensorflow.data:Dataset\"\n    url_arg = \"directory\"\n\n\nclass KerasModelReader(FileReader):\n    imports = {\"keras\"}\n    implements = {datatypes.KerasModel}\n    func = \"tensorflow.keras.models:load_model\"\n    url_arg = \"filepath\"\n    output_instance = \"keras.engine.training:Model\"\n\n\nclass TFRecordReader(FileReader):\n    imports = {\"tensorflow\"}\n    implements = {datatypes.TFRecord}\n    func = \"tensorflow.data:TFRecordDataset\"\n    output_instance = \"tensorflow.data:TFRecordDataset\"\n    url_arg = \"filenames\"\n\n\nclass SKLearnModelReader(FileReader):\n    # https://scikit-learn.org/stable/model_persistence.html\n    # recommends skops, which seems little used\n    imports = {\"sklearn\"}\n    implements = {datatypes.SKLearnPickleModel}\n    func = \"pickle:load\"\n    output_instance = \"sklearn.base:BaseEstimator\"\n\n    def _read(self, data, **kw):\n        with fsspec.open(data.url, **(data.storage_options or {})) as f:\n            return self._func(f)\n\n\nclass Awkward(FileReader):\n    imports = {\"awkward\"}\n    output_instance = \"awkward:Array\"\n    storage_options = True\n\n\nclass AwkwardParquet(Awkward):\n    implements = {datatypes.Parquet}\n    imports = {\"awkward\", \"pyarrow\"}\n    func = \"awkward:from_parquet\"\n    url_arg = \"path\"\n\n    def discover(self, **kwargs):\n        kwargs[\"row_groups\"] = [0]\n        return self.read(**kwargs)\n\n\nclass DaskAwkwardParquet(AwkwardParquet):\n    imports = {\"dask_awkward\", \"pyarrow\", \"dask\"}\n    func = \"dask_awkward:from_parquet\"\n    output_instance = \"dask_awkward:Array\"\n\n    def discover(self, **kwargs):\n        return self.read(**kwargs).partitions[0]\n\n\nclass AwkwardJSON(Awkward):\n    implements = {datatypes.JSONFile}\n    func = \"awkward:from_json\"\n    url_arg = \"source\"\n\n\nclass AwkwardAVRO(Awkward):\n    implements = {datatypes.AVRO}\n    func = \"awkward:from_avro_file\"\n    url_arg = \"file\"\n\n\nclass DaskAwkwardJSON(Awkward):\n    imports = {\"dask_awkward\", \"dask\"}\n    func = \"dask_awkward:from_json\"\n    output_instance = \"dask_awkward:Array\"\n    url_arg = \"source\"\n\n    def discover(self, **kwargs):\n        return self.read(**kwargs).partitions[0]\n\n\nclass HandleToUrlReader(BaseReader):\n    \"\"\"Dereference handle (hdl:) identifiers\n\n    See handle.net for a description of the registry.\n    \"\"\"\n\n    implements = {datatypes.Handle}\n    func = \"requests:get\"\n    imports = {\"requests\", \"aiohttp\"}\n    output_instance = datatypes.BaseData.qname()\n\n    @classmethod\n    def _extract(cls, meta, base):\n        h = fsspec.filesystem(\"http\")\n        if \"URL_ORIGINAL_DATA\" in meta:\n            # file\n            url = re.findall('href=\"(.*?)\"', meta[\"URL_ORIGINAL_DATA\"][\"value\"])[0]\n        elif \"HAS_PARTS\" in meta:\n            # dataset\n            ids = meta[\"HAS_PARTS\"][\"value\"].split(\";\")\n            rr = h.cat([f\"{base}/{u.lstrip('hdl:/')}\" for u in ids])\n            rr2 = [{i[\"type\"]: i[\"data\"] for i in json.loads(r)[\"values\"]} for r in rr.values()]\n            url = [cls._extract(r2, base) for r2 in rr2]\n        return url\n\n    def _read(self, data, base=\"https://hdl.handle.net/api/handles\", **kwargs):\n        h = fsspec.filesystem(\"http\")\n        r = h.cat(f\"{base}/{data.url.lstrip('hdl:/')}\")\n        j = json.loads(r)\n        meta = {i[\"type\"]: i[\"data\"] for i in j[\"values\"]}\n        url = self._extract(meta, base)\n        # TODO: we can assume HDF->xarray here?\n        cls = datatypes.recommend(url[0] if isinstance(url, list) else url)[0]\n        return cls(url=url, metadata=meta)\n\n\nclass PandasCSV(Pandas):\n    implements = {datatypes.CSV}\n    func = \"pandas:read_csv\"\n    url_arg = \"filepath_or_buffer\"\n\n    def discover(self, **kw):\n        kw[\"nrows\"] = 10\n        kw.pop(\"skipfooter\", None)\n        kw.pop(\"chunksize\", None)\n\n        return self.read(**kw)\n\n\nclass PandasHDF5(Pandas):\n    implements = {datatypes.HDF5}\n    func = \"pandas:read_hdf\"\n    imports = {\"pandas\", \"pytables\"}\n\n    def _read(self, data, **kw):\n        if data.storage_options:  # or fsspec-like\n            with fsspec.open(data.url, \"rb\", **data.storage_options) as f:\n                self._func(f, data.path, **kw)\n        return self._func(data.url, **kw)\n\n\nclass DaskCSV(DaskDF):\n    implements = {datatypes.CSV}\n    func = \"dask.dataframe:read_csv\"\n    url_arg = \"urlpath\"\n\n\nclass DaskText(FileReader):\n    imports = {\"dask\"}\n    implements = {datatypes.Text}\n    func = \"dask.bag:read_text\"\n    output_instance = \"dask.bag.core:Bag\"\n    storage_options = True\n    url_arg = \"urlpath\"\n\n    def discover(self, n=10, **kwargs):\n        return self.read().take(n)\n\n\nclass DaskCSVPattern(DaskCSV):\n    \"\"\"Apply categorical data extraction to a set of CSV paths using dask\n\n    Paths are of the form \"proto://path/{field}/measurement_{date:%Y-%m-%d}.csv\",\n    where the format-like fields will be captured as columns in the output.\n    \"\"\"\n\n    implements = {datatypes.CSVPattern}\n\n    def _read(self, data, **kw):\n        from pandas.api.types import CategoricalDtype\n\n        from intake.readers.utils import pattern_to_glob\n        from intake.source.utils import reverse_formats\n\n        url = pattern_to_glob(data.url)\n        df = self._func(url, storage_options=data.storage_options, include_path_column=True, **kw)\n\n        paths = sorted(df[\"path\"].cat.categories)\n\n        column_by_field = {\n            field: df[\"path\"]\n            .cat.codes.map(dict(enumerate(values)))\n            .astype(CategoricalDtype(set(values)))\n            for field, values in reverse_formats(data.url, paths).items()\n        }\n\n        return df.assign(**column_by_field).drop(columns=[\"path\"])\n\n\nclass Polars(FileReader):\n    imports = {\"polars\"}\n    output_instance = \"polars:LazyFrame\"\n    url_arg = \"source\"\n\n    def discover(self, **kwargs):\n        # https://pola-rs.github.io/polars/py-polars/html/reference/\n        #   lazyframe/api/polars.LazyFrame.fetch.html\n        return self.read().fetch()\n\n\nclass PolarsDeltaLake(Polars):\n    implements = {datatypes.DeltalakeTable}\n    func = \"polars:scan_delta\"\n\n\nclass PolarsAvro(Polars):\n    implements = {datatypes.AVRO}\n    func = \"polars:read_avro\"\n    output_instance = \"polars:DataFrame\"  # i.e., not lazy\n\n\nclass PolarsFeather(Polars):\n    implements = {datatypes.Feather2}\n    func = \"polars:scan_ipc\"\n\n\nclass PolarsParquet(Polars):\n    implements = {datatypes.Parquet}\n    func = \"polars:scan_parquet\"\n\n\nclass PolarsCSV(Polars):\n    implements = {datatypes.CSV}\n    func = \"polars:scan_csv\"\n\n\nclass PolarsJSON(Polars):\n    implements = {datatypes.JSONFile}\n    func = \"polars:scan_ndjson\"\n\n\nclass PolarsIceberg(Polars):\n    imports = {\"polars\", \"pyiceberg\"}\n    implements = {datatypes.IcebergDataset}\n    func = \"polars:scan_iceberg\"\n\n\nclass PolarsExcel(Polars):\n    implements = {datatypes.Excel}\n    func = \"polars:read_excel\"\n    output_instance = \"polars:DataFrame\"  # i.e., not lazy\n\n\nclass Ray(FileReader):\n    # https://docs.ray.io/en/latest/data/creating-datasets.html#supported-file-formats\n    imports = {\"ray\"}\n    output_instance = \"ray.data:Dataset\"\n    url_arg = \"paths\"\n\n    def discover(self, **kwargs):\n        return self.read(**kwargs).limit(10)\n\n    def _read(self, data, **kw):\n        if (\n            data.url.startswith(\"s3://\")\n            and data.storage_options\n            and data.storage_options.get(\"anon\")\n        ):\n            data = type(data)(url=f\"s3://anonymous@{data.url[5:]}\")\n        # TODO: other auth parameters, key/secret, token\n        #  apparently, creating an S3FileSystem here is also allowed\n        return super()._read(data, **kw)\n\n\nclass RayParquet(Ray):\n    implements = {datatypes.Parquet}\n    func = \"ray.data:read_parquet\"\n\n\nclass RayCSV(Ray):\n    implements = {datatypes.CSV}\n    func = \"ray.data:read_csv\"\n\n\nclass RayJSON(Ray):\n    implements = {datatypes.JSONFile}\n    func = \"ray.data:read_json\"\n\n\nclass RayText(Ray):\n    implements = {datatypes.Text}\n    func = \"ray.data:read_text\"\n\n\nclass RayBinary(Ray):\n    implements = {datatypes.FileData}\n    func = \"ray.data:read_binary_files\"\n\n\nclass RayDeltaLake(Ray):\n    implements = {datatypes.DeltalakeTable}\n    imports = {\"deltaray\"}\n    func = \"deltaray:read_delta\"\n    url_arg = \"table_uri\"\n\n\nclass DeltaReader(FileReader):\n    implements = {datatypes.Parquet, datatypes.DeltalakeTable}\n    imports = {\"deltalake\"}\n    func = \"deltalake:DeltaTable\"\n    url_arg = \"table_uri\"\n    storage_options = True\n    output_instance = \"deltalake:DeltaTable\"\n\n\nclass TiledNode(BaseReader):\n    implements = {datatypes.TiledService}\n    imports = {\"tiled\"}\n    output_instance = \"tiled.client.node:Node\"\n    func = \"tiled.client:from_uri\"\n\n    def _read(self, data, **kwargs):\n        opts = data.options.copy()\n        opts.update(kwargs)\n        return self._func(data.url, **opts)\n\n\nclass TiledClient(BaseReader):\n    # returns dask/normal x/array/dataframe\n    implements = {datatypes.TiledDataset}\n    output_instance = \"tiled.client.base:BaseClient\"\n\n    def _read(self, data, as_client=True, dask=False, **kwargs):\n        from tiled.client import from_uri\n\n        opts = data.options.copy()\n        opts.update(kwargs)\n        if dask:\n            opts[\"structure_clients\"] = \"dask\"\n        client = from_uri(data.url, **opts)\n        if as_client:\n            return client\n        else:\n            return client.read()\n\n\nclass TileDBReader(BaseReader):\n    imports = {\"tiledb\"}\n    implements = {datatypes.TileDB}\n    output_instance = \"tiledb.libtiledb.Array\"\n    func = \"tiledb:open\"\n\n    def _read(self, data, attribute=None, **kwargs):\n        return self._func(data.url, attr=attribute, config=data.options, **kwargs)\n\n\nclass TileDBDaskReader(BaseReader):\n    imports = {\"tiledb\", \"dask\"}\n    func = \"dask.array:from_tiledb\"\n    implements = {datatypes.TileDB}\n    output_instance = \"dask.array:Array\"\n\n    def _read(self, data, attribute=None, **kwargs):\n        return self._func(data.url, attribute=attribute, config=data.options, **kwargs)\n\n\nclass PythonModule(BaseReader):\n    output_instance = \"builtins:module\"\n    implements = {datatypes.PythonSourceCode}\n\n    def _read(self, data, module_name=None, **kwargs):\n        from types import ModuleType\n\n        if module_name is None:\n            module_name = data.url.rsplit(\"/\", 1)[-1].split(\".\", 1)[0]\n        with fsspec.open(data.url, \"rt\", **(data.storage_options or {})) as f:\n            mod = ModuleType(module_name)\n            exec(f.read(), mod.__dict__)\n            return mod\n\n\nclass SKImageReader(FileReader):\n    output_instance = \"numpy:ndarray\"\n    imports = {\"scikit-image\"}\n    implements = {datatypes.PNG, datatypes.TIFF, datatypes.JPEG}\n    func = \"skimage.io:imread\"\n    url_arg = \"fname\"\n\n\nclass NumpyText(FileReader):\n    output_instance = \"numpy:ndarray\"\n    implements = {datatypes.Text}\n    imports = {\"numpy\"}\n    func = \"numpy:loadtxt\"\n\n    def _read(self, data, **kw):\n        if data.storage_options or \"://\" in data.url or \"::\" in data.url:\n            with fsspec.open(data.url, **(data.storage_options or {})) as f:\n                return self._func(f, **kw)\n        return self._func(data.url, **kw)\n\n\nclass NumpyReader(NumpyText):\n    func = \"numpy:load\"\n    implements = {datatypes.NumpyFile}\n\n\nclass CupyNumpyReader(NumpyText):\n    output_instance = \"cupy:ndarray\"\n    implements = {datatypes.NumpyFile}\n    imports = {\"cupy\"}\n    func = \"cupy:loadtxt\"\n\n\nclass CupyTextReader(CupyNumpyReader):\n    implements = {datatypes.Text}\n    func = \"numpy:loadtxt\"\n\n\nclass XArrayDatasetReader(FileReader):\n    output_instance = \"xarray:Dataset\"\n    imports = {\"xarray\"}\n    optional_imports = {\"zarr\", \"h5netcdf\", \"cfgrib\", \"scipy\", \"tiledb\"}  # and others\n    # DAP is not a file but an API, maybe should be separate\n    implements = {\n        datatypes.NetCDF3,\n        datatypes.HDF5,\n        datatypes.GRIB2,\n        datatypes.IcechunkRepo,\n        datatypes.Zarr,\n        datatypes.OpenDAP,\n        datatypes.TileDB,\n    }\n    # xarray also reads from images and tabular data\n    func = \"xarray:open_mfdataset\"\n    other_funcs = {\"xarray:open_dataset\"}\n    other_urls = {\"xarray:open_dataset\": \"filename_or_obj\"}\n    url_arg = \"paths\"\n\n    def _read(self, data, open_local=False, **kw):\n        from xarray import open_dataset, open_mfdataset\n\n        if \"engine\" not in kw:\n            if isinstance(data, (datatypes.Zarr, datatypes.IcechunkRepo)):\n                kw[\"engine\"] = \"zarr\"\n                if data.root and \"group\" not in kw:\n                    kw[\"group\"] = data.root\n            elif isinstance(data, datatypes.TileDB):\n                kw[\"engine\"] = \"tiledb\"\n                if data.options:\n                    kw.setdefault(\"backend_kwargs\", {})[\"config\"] = data.options\n            elif isinstance(data, datatypes.NetCDF3):\n                kw[\"engine\"] = \"scipy\"\n            elif isinstance(data, datatypes.HDF5):\n                kw[\"engine\"] = \"h5netcdf\"\n        if kw.get(\"engine\", \"\") in [\"zarr\", \"kerchunk\"] and data.storage_options:\n            kw.setdefault(\"backend_kwargs\", {})[\"storage_options\"] = data.storage_options\n        if isinstance(data, (datatypes.HDF5, datatypes.NetCDF3)):\n            kw.setdefault(\"engine\", \"h5netcdf\")\n            if getattr(data, \"path\", False):\n                kw[\"group\"] = data.path\n        if isinstance(data, datatypes.IcechunkRepo):\n            import icechunk\n\n            url = f\"{data.url}_storage\" if \"storage\" not in data.url else data.url\n            store_cls = getattr(icechunk, url)\n            store = store_cls(**(data.storage_options or {}))\n            repo = icechunk.Repository.open(store)\n            session = repo.readonly_session(data.ref)\n            zarr_store = session.store\n            kw.get(\"backend_kwargs\", {}).pop(\"storage_options\", None)\n            kw.setdefault(\"backend_kwargs\", {})[\"consolidated\"] = False\n            return open_dataset(zarr_store, **kw)\n        auth = kw.pop(\"auth\", \"\")\n        if isinstance(data, datatypes.OpenDAP) and auth and kw.get(\"engine\", \"\") == \"pydap\":\n            import requests\n\n            if isinstance(auth, str):\n                setup = intake.import_name(f\"pydap.cas.{auth}:setup_session\")\n                un = kw.pop(\"username\", os.getenv(\"DAP_USER\"))\n                pw = kw.pop(\"password\", os.getenv(\"DAP_PASSWORD\"))\n                session = setup(un, pw, check_url=data.url)\n            else:\n                session = requests.Session()\n                session.auth = auth\n            return open_dataset(data.url, session=session, **kw)\n        if isinstance(data.url, (tuple, set, list)) and len(data.url) == 1:\n            return open_dataset(data.url[0], **kw)\n        elif (isinstance(data.url, (tuple, set, list)) and len(data.url) > 1) or (\n            isinstance(data.url, str) and \"*\" in data.url\n        ):\n            if isinstance(data, (datatypes.Zarr, datatypes.OpenDAP)):\n                ofs = data.url\n            elif open_local:\n                ofs = fsspec.open_local(data.url, **(data.storage_options or {}))\n            elif (isinstance(data.url, str) and is_fsspec_url(data.url)) or is_fsspec_url(\n                data.url[0]\n            ):\n                ofs = [\n                    _.open() for _ in fsspec.open_files(data.url, **(data.storage_options or {}))\n                ]\n            else:\n                ofs = data.url\n            return open_mfdataset(ofs, **kw)\n        else:\n            if (\n                isinstance(data, datatypes.FileData)\n                and is_fsspec_url(data.url)\n                and not isinstance(data, datatypes.Zarr)\n            ):\n                # special case, because xarray would assume a DAP endpoint\n                if open_local:\n                    f = fsspec.open_local(data.url, **(data.storage_options or {}))\n                    return open_dataset(f, **kw)\n                else:\n                    f = fsspec.open(data.url, **(data.storage_options or {})).open()\n                    return open_dataset(f, **kw)\n            return open_dataset(data.url, **kw)\n\n\nclass XArrayPatternReader(XArrayDatasetReader):\n    \"\"\"Same as XarrayDatasetReader, but recognises file patterns\n\n    If you use a URL like \"/path/file_{value}_.nc\". The template may include\n    specifiers like \":d\" to determine the type of the values inferred. This\n    reader supports all the same filetypes as XArrayDatasetReader.\n\n    The read step may be accelerated by providing arguments like\n    ``parallel=True`` and ``combine_attrs=\"override\" - see the\n    xr.open_mfdataset documentation.\n\n    Note: this method determined the ``concat_dim`` and ``combine`` arguments,\n    so passing these will raise an exception.\n    \"\"\"\n\n    # should we have an explicit pattern type data input?\n\n    def _read(self, data, open_local=False, pattern=None, **kw):\n        import pandas as pd\n        from intake.readers.utils import pattern_to_glob\n        from intake.source.utils import reverse_formats\n\n        if isinstance(data.url, str):\n            url = pattern_to_glob(data.url)\n            fs, _, paths = fsspec.get_fs_token_paths(url, **(data.storage_options or {}))\n            val_dict = reverse_formats(data.url, paths)\n        else:\n            paths = data.url\n            val_dict = reverse_formats(pattern, data.url)\n        indices = [pd.Index(v, name=k) for k, v in val_dict.items()]\n        data2 = type(data)(url=paths, storage_options=data.storage_options, metadata=data.metadata)\n        if \"concat_dim\" in kw:\n            ccm = kw.pop(\"concat_dim\")\n            ccm = [ccm] if isinstance(ccm, str) else ccm\n            for ind, cd in zip(indices, ccm):\n                ind.name = cd\n        kw.setdefault(\"combine\", \"nested\")\n        return super()._read(data2, concat_dim=indices, open_local=open_local, **kw)\n\n\nclass RasterIOXarrayReader(FileReader):\n    output_instance = \"xarray:Dataset\"\n    imports = {\"rioxarray\"}\n    implements = {datatypes.TIFF, datatypes.GDALRasterFile}\n    func = \"rioxarray:open_rasterio\"\n    url_arg = \"filename\"\n\n    def _read(self, data, concat_kwargs=None, **kwargs):\n        import xarray as xr\n        from rioxarray import open_rasterio\n\n        concat_kwargs = concat_kwargs or {\n            k: kwargs.pop(k)\n            for k in {\"dim\", \"data_vars\", \"coords\", \"compat\", \"position\", \"join\"}\n            if k in kwargs\n        }\n\n        ofs = fsspec.open_files(data.url, **(data.storage_options or {}))\n        bits = [open_rasterio(of.open(), **kwargs) for of in ofs]\n        if len(bits) == 1:\n            return bits\n        else:\n            # requires dim= in kwargs\n            return xr.concat(bits, **concat_kwargs)\n\n\nclass GeoPandasReader(FileReader):\n    # TODO: geopandas also supports postGIS\n    output_instance = \"geopandas:GeoDataFrame\"\n    imports = {\"geopandas\"}\n    implements = {\n        datatypes.GeoJSON,\n        datatypes.CSV,\n        datatypes.SQLite,\n        datatypes.Shapefile,\n        datatypes.GDALVectorFile,\n        datatypes.GeoPackage,\n        datatypes.FlatGeoBuf,\n    }\n    func = \"geopandas:read_file\"\n    url_arg = \"filename\"\n\n    def _read(self, data, with_fsspec=None, **kwargs):\n        import geopandas\n\n        if with_fsspec is None:\n            with_fsspec = (\n                (\"://\" in data.url and \"!\" not in data.url)\n                or \"::\" in data.url\n                or data.storage_options\n            )\n        if with_fsspec:\n            with fsspec.open(data.url, **(data.storage_options or {})) as f:\n                return geopandas.read_file(f, **kwargs)\n        return geopandas.read_file(data.url, **kwargs)\n\n\nclass GeoPandasTabular(FileReader):\n    output_instance = \"geopandas:GeoDataFrame\"\n    imports = {\"geopandas\", \"pyarrow\"}\n    implements = {datatypes.Parquet, datatypes.Feather2}\n    func = \"geopandas:read_parquet\"\n    other_funcs = {\"geopandas:read_feather\"}\n    url_arg = \"path\"\n\n    def _read(self, data, **kwargs):\n        import geopandas\n\n        if \"://\" in data.url or \"::\" in data.url:\n            f = fsspec.open(data.url, **(data.storage_options or {})).open()\n        else:\n            f = data.url\n        if isinstance(data, datatypes.Parquet):\n            return geopandas.read_parquet(f, **kwargs)\n        elif isinstance(data, datatypes.Feather2):\n            return geopandas.read_feather(f, **kwargs)\n        else:\n            raise ValueError\n\n\nclass ScipyMatlabReader(FileReader):\n    output_instance = \"numpy:ndarray\"\n    implements = {datatypes.MatlabArray}\n    imports = {\"scipy\"}\n    func = \"scipy.io:loadmat\"\n\n    def _read(self, data, **kwargs):\n        return self._func(data.path, appendmat=False, **kwargs)[data.variable]\n\n\nclass ScipyMatrixMarketReader(FileReader):\n    output_instance = \"scipy.sparse:coo_matrix\"  # numpy-like\n    implements = {datatypes.MatrixMarket}\n    imports = {\"scipy\"}\n    func = \"scipy.io:mmread\"\n\n    def _read(self, data, **kw):\n        with fsspec.open(data.url, **data.storage_options) as f:\n            return self._func(f)\n\n\nclass NibabelNiftiReader(FileReader):\n    output_instance = \"nibabel.spatialimages:SpatialImage\"\n    implements = {datatypes.Nifti}  # and other medical image types\n    imports = {\"nibabel\"}\n    func = \"nibabel:load\"\n    url_arg = \"filename\"\n\n    def _read(self, data, **kw):\n        with fsspec.open(data.url, **(data.storage_options or {})) as f:\n            return self._func(f, **kw)\n\n\nclass FITSReader(FileReader):\n    output_instance = \"astropy.io.fits:HDUList\"\n    implements = {datatypes.FITS}\n    imports = {\"astropy\"}\n    func = \"astropy.io.fits:open\"\n\n    def _read(self, data, **kw):\n        if data.storage_options:\n            kw.pop(\"use_fsspec\")\n            kw.pop(\"fsspec_kwargs\")\n            return self._func(data.url, use_fsspec=True, fsspec_kwargs=data.storage_options, **kw)\n        return self._func(data.url, **kw)\n\n\nclass ASDFReader(FileReader):\n    implements = {datatypes.ASDF}\n    imports = {\"asdf\"}\n    func = \"asdf:open\"\n    output_instance = \"asdf:AsdfFile\"\n\n    def _read(self, data, **kw):\n        if data.storage_options or \"://\" in data.url or \"::\" in data.url:\n            # want the file to stay open, since array access is lazy by default\n            f = fsspec.open(data.url, **(data.storage_options or {})).open()\n            return self._func(f, **kw)\n        return self._func(data.url, **kw)\n\n\nclass DicomReader(FileReader):\n    output_instance = \"pydicom.dataset:FileDataset\"\n    implements = {datatypes.DICOM}\n    imports = {\"pydicom\"}\n    func = \"pydicom:read_file\"\n    url_arg = \"fp\"  # can be file-like\n    storage_options = True\n\n    def _read(self, data, **kw):\n        with fsspec.open(data.url, **(data.storage_options or {})) as f:\n            return self._func(f, **kw)\n\n\nclass Condition(BaseReader):\n    def _read(\n        self, if_true, if_false, condition: callable[[BaseReader, ...], bool] | bool, **kwargs\n    ):\n        if isinstance(condition, bool):\n            cond = condition\n        elif isinstance(condition, BaseReader):\n            cond = condition.read()\n        else:\n            cond = condition(**kwargs)\n        if cond:\n            return if_true.read() if isinstance(if_true, BaseReader) else if_true\n        else:\n            return if_false.read() if isinstance(if_false, BaseReader) else if_false\n\n\nclass PMTileReader(BaseReader):\n    implements = {datatypes.PMTiles}\n    func = \"pmtiles.reader:Reader\"\n    output_instance = \"pmtiles.reader:Reader\"\n\n    def _read(self, data):\n        import pmtiles.reader\n\n        if \"://\" in data.url or \"::\" in data.url:\n            f = fsspec.open(data.url, **(data.storage_options or {})).open()\n\n            def get_bytes(offset, length):\n                f.seek(offset)\n                return f.read(length)\n\n        else:\n            f = open(data.url)\n            get_bytes = pmtiles.reader.MmapSource(f)\n        return self._func(get_bytes)\n\n\nclass FileExistsReader(BaseReader):\n    implements = {datatypes.FileData}\n    func = \"fsspec.core:url_to_fs\"\n    output_instance = \"builtins:bool\"\n\n    def _read(self, data, *args, **kwargs):\n        try:\n            fs, path = fsspec.core.url_to_fs(data.url, **(data.storage_options or {}))\n        except FileNotFoundError:\n            return False\n        return fs.exists(path)\n\n\nclass YAMLCatalogReader(FileReader):\n    implements = {datatypes.YAMLFile, datatypes.YAMLFile}\n    func = \"intake.readers.entry:Catalog.from_yaml_file\"\n    url_arg = \"path\"\n    storage_options = True\n    output_instance = \"intake.readers.entry:Catalog\"\n\n\nclass PrometheusMetricReader(BaseReader):\n    implements = {datatypes.Prometheus}\n    imports = {\"prometheus_api_client\"}\n    output_instance = \"typing:Iterator\"\n    func = \"prometheus_api_client:custom_query\"\n    other_funcs = {\"prometheus_api_client:get_metric_range_data\"}\n\n    def _read(self, data: datatypes.Prometheus, *args, **kwargs):\n        from prometheus_api_client import PrometheusConnect\n        from prometheus_api_client.utils import parse_datetime\n\n        prom = PrometheusConnect(url=data.url, **(data.options or {}))\n        if data.query:\n            # this is a catalog, should be separate reader?\n            return prom.custom_query(data.query, **kwargs)\n        if not data.metric:\n            return prom.all_metrics(**kwargs)\n        start_time = parse_datetime(data.start_time) if data.start_time else parse_datetime(\"1900\")\n        end_time = parse_datetime(data.end_time) if data.end_time else parse_datetime(\"now\")\n        return prom.get_metric_range_data(\n            data.metric,\n            label_config=data.labels,\n            start_time=start_time,\n            end_time=end_time,\n            **kwargs,\n        )\n\n\nclass Retry(BaseReader):\n    \"\"\"Retry (part of) a pipeline until it returns without exception\n\n    Retries the whole of the selected pipeline; an exception will start at the beginning.\n    \"\"\"\n\n    def _read(\n        self,\n        data,\n        max_tries=10,\n        allowed_exceptions=(Exception,),\n        backoff0=0.1,\n        backoff_factor=1.3,\n        start_stage=None,\n        **kw,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        data: intake pipeline/reader\n        max_tries: number of attempts that can be made\n        allowed_exceptions: tuple of Exceptions we will try again for; others will raise\n        start_stage: if given, index of pipeline member stage to start from for retries (else all);\n            may be negative from the most recent previous stage (-1).\n        \"\"\"\n        import time\n\n        if isinstance(allowed_exceptions, (list, set)):\n            allowed_exceptions = tuple(allowed_exceptions)\n        reader = data if isinstance(data, BaseReader) else data.reader\n        if start_stage and start_stage < 0:\n            start_stage = len(reader.steps) + start_stage\n        if start_stage:\n            data = reader.first_n_stages(start_stage).read()\n        else:\n            data = None\n            start_stage = 0\n        for j in range(max_tries):\n            try:\n                for i in range(start_stage, len(reader.steps)):\n                    if i == 0:\n                        data = reader._read_stage_n(stage=0)\n                    else:\n                        data = reader._read_stage_n(stage=1, data=data)\n            except allowed_exceptions:\n                if j == max_tries:\n                    raise\n                time.sleep(backoff0 * backoff_factor**i)\n        return data\n\n\ndef recommend(data):\n    \"\"\"Show which readers claim to support the given data instance or a superclass\n\n    The ordering is more specific readers first\n    \"\"\"\n    seen = set()\n    out = {\"importable\": [], \"not_importable\": []}\n    data = type(data) if not isinstance(data, type) else data\n    for datacls in data.mro():\n        for cls in subclasses(BaseReader):\n            if any(datacls == imp for imp in cls.implements):\n                if cls not in seen:\n                    seen.add(cls)\n                    if cls.check_imports():\n                        out[\"importable\"].append(cls)\n                    else:\n                        out[\"not_importable\"].append(cls)\n    return out\n\n\ndef reader_from_call(func: str, *args, join_lines=False, **kwargs) -> BaseReader:\n    \"\"\"Attempt to construct a reader instance by finding one that matches the function call\n\n    Fails for readers that don't define a func, probably because it depends on the file\n    type or needs a dynamic instance to be a method of.\n\n    Parameters\n    ----------\n    func: callable | str\n        If a callable, pass args and kwargs as you would have done to execute the function.\n        If a string, it should look like ``\"func(arg1, args2, kwarg1, **kw)\"``, i.e., a normal\n        python call but as a string. In the latter case, args and kwargs are ignored\n    \"\"\"\n\n    import re\n    from itertools import chain\n\n    if isinstance(func, str):\n        if join_lines:\n            func = func.replace(\"\\n\", \"\")\n        frame = inspect.currentframe().f_back\n        match = re.match(\"^([^(]+=)?([^(]+)[(](.*)[)][^)]?$\", func)\n        if match:\n            groups = match.groups()\n        else:\n            raise ValueError\n        func = eval(groups[1], frame.f_globals, frame.f_locals)\n        args, kwargs = eval(\n            f\"\"\"(lambda *args, **kwargs: (args, kwargs))({groups[2]})\"\"\",\n            frame.f_globals,\n            frame.f_locals,\n        )\n\n    package = func.__module__.split(\".\", 1)[0]\n\n    found = False\n    for cls in subclasses(BaseReader):\n        if cls.check_imports() and any(\n            f.split(\":\", 1)[0].split(\".\", 1)[0] == package for f in ({cls.func} | cls.other_funcs)\n        ):\n            ffs = [f for f in ({cls.func} | cls.other_funcs) if import_name(f) == func]\n            if ffs:\n                found = cls\n                func_name = ffs[0]\n                break\n    if not found:\n        raise ValueError(\"Function not found in the set of readers\")\n\n    pars = inspect.signature(func).parameters\n    kw = dict(zip(pars, args), **kwargs)\n    data_kw = {}\n    if issubclass(cls, FileReader):\n        data_kw[\"storage_options\"] = kw.pop(\"storage_options\", None)\n        data_kw[\"url\"] = kw.pop(getattr(cls, \"other_urls\", {}).get(func_name, cls.url_arg))\n    datacls = None\n    if len(cls.implements) == 1:\n        datacls = next(iter(cls.implements))\n    elif getattr(cls, \"url_arg\", None):\n        clss = datatypes.recommend(data_kw[\"url\"], storage_options=data_kw[\"storage_options\"])\n        clss2 = [c for c in clss if c in cls.implements]\n        if clss:\n            datacls = next(iter(chain(clss2, clss)))\n    if datacls:\n        datacls = datacls(**data_kw)\n        if data_kw[\"storage_options\"] is None:\n            del data_kw[\"storage_options\"]\n        cls = cls(datacls, **kwargs)\n    else:\n        url = data_kw.pop(\"url\")\n        cls = cls(url, **data_kw)\n\n    return cls\n"
  },
  {
    "path": "intake/readers/search.py",
    "content": "\"\"\"Find datasets meeting some complex criteria\"\"\"\n\nfrom __future__ import annotations\n\nfrom intake.readers.entry import ReaderDescription\n\n\n# some inspiration https://blueskyproject.io/tiled/reference/queries.html\n\n\nclass SearchBase:\n    \"\"\"Prototype for a single term in a search expression\n\n    The method `filter()` is meant to be overridden in subclasses.\n    \"\"\"\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        \"\"\"Does the given ReaderDescription entry match the query?\"\"\"\n        # should not raise: an exception counts as False\n        return True\n\n    def __or__(self, other):\n        return Or(self, other)\n\n    def __and__(self, other):\n        return And(self, other)\n\n    def __inv__(self):\n        return Not(self)\n\n\nclass Or(SearchBase):\n    def __init__(self, first: SearchBase, second: SearchBase):\n        self.first = first\n        self.second = second\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return self.first.filter(entry) or self.second.filter(entry)\n\n\nclass And(SearchBase):\n    def __init__(self, first: SearchBase, second: SearchBase):\n        self.first = first\n        self.second = second\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return self.first.filter(entry) and self.second.filter(entry)\n\n\nclass Not(SearchBase):\n    def __init__(self, first: SearchBase):\n        self.first = first\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return not self.first.filter(entry)\n\n\nclass Any(SearchBase):\n    def __init__(self, *terms: tuple[SearchBase, ...]):\n        self.terms = terms\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return any(t.filter(entry) for t in self.terms)\n\n\nclass All(SearchBase):\n    def __init__(self, *terms: tuple[SearchBase, ...]):\n        self.terms = terms\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return all(t.filter(entry) for t in self.terms)\n\n\nclass Text(SearchBase):\n    \"\"\"Search for given string anywhere in the text repr of an entry.\"\"\"\n\n    def __init__(self, text: str):\n        self.text = text\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return self.text in str(entry)\n\n\nclass Importable(SearchBase):\n    \"\"\"Check if the packages listed in \"imports\" field exist in the environment\n\n    This only checks for top-level packages, and does not actually import anything.\n    If any package is not found, the filter fails.\n    \"\"\"\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        return entry.check_imports()\n\n\nclass EnvironmentSatisfied(SearchBase):\n    \"\"\"Compare the \"environment\" metadata field to the current information in conda\n\n    The value of \"environment\" can be a URL to load from (and environment.yml file),\n    or literal YAML text. See\n    https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html\n    for specs.\n\n    The filter passes if the environment is satisfied, i.e., the packages and versions in\n    the current environment are allowed by the spec.\n    \"\"\"\n\n    def filter(self, entry: ReaderDescription) -> bool:\n        env = entry.metadata.get(\"environment\")\n        if not env:\n            # no env restrictions means a pass\n            return True\n        return self._is_consistent(env)\n\n    @staticmethod\n    def _is_consistent(env, output=False):\n        # TODO: this is quite slow, should cache?\n        import fsspec\n        import os\n        import subprocess\n        import tempfile\n        import shlex\n\n        fn = tempfile.mktemp(suffix=\".yaml\")\n        try:\n            if \"dependencies:\" not in env:\n                with fsspec.open(env, \"rt\") as f:\n                    env = f.read()\n            with open(fn, \"wt\") as f:\n                f.write(env)\n\n            cmd = shlex.split(f\"conda compare {fn}\")\n            kw = {} if output else dict(stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)\n            out = subprocess.check_call(cmd, **kw)\n            return out == 0\n        except Exception:\n            return False\n        finally:\n            if os.path.exists(fn):\n                os.remove(fn)\n"
  },
  {
    "path": "intake/readers/tests/__init__.py",
    "content": ""
  },
  {
    "path": "intake/readers/tests/cats/__init__.py",
    "content": ""
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/catalog/catalog.json",
    "content": "{\n  \"type\": \"Catalog\",\n  \"id\": \"test\",\n  \"stac_version\": \"1.0.0\",\n  \"description\": \"test catalog\",\n  \"links\": [\n    {\n      \"rel\": \"child\",\n      \"href\": \"./child-catalog.json\",\n      \"type\": \"application/json\"\n    },\n    {\n      \"rel\": \"root\",\n      \"href\": \"./catalog.json\",\n      \"type\": \"application/json\"\n    }\n  ],\n  \"stac_extensions\": []\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/catalog/child-catalog.json",
    "content": "{\n  \"type\": \"Catalog\",\n  \"id\": \"test\",\n  \"stac_version\": \"1.0.0\",\n  \"description\": \"child catalog\",\n  \"links\": [\n    {\n      \"rel\": \"root\",\n      \"href\": \"./catalog.json\",\n      \"type\": \"application/json\"\n    }\n  ],\n  \"stac_extensions\": []\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/collection/collection.json",
    "content": "{\n  \"id\": \"simple-collection\",\n  \"type\": \"Collection\",\n  \"stac_extensions\": [\n    \"https://stac-extensions.github.io/eo/v1.0.0/schema.json\",\n    \"https://stac-extensions.github.io/view/v1.0.0/schema.json\"\n  ],\n  \"stac_version\": \"1.0.0\",\n  \"description\": \"A simple collection demonstrating core catalog fields with links to a couple of items\",\n  \"title\": \"Simple Example Collection\",\n  \"providers\": [\n    {\n      \"name\": \"Remote Data, Inc\",\n      \"description\": \"Producers of awesome spatiotemporal assets\",\n      \"roles\": [\"producer\", \"processor\"],\n      \"url\": \"http://remotedata.io\"\n    }\n  ],\n  \"extent\": {\n    \"spatial\": {\n      \"bbox\": [\n        [\n          172.91173669923782, 1.3438851951615003, 172.95469614953714,\n          1.3690476620161975\n        ]\n      ]\n    },\n    \"temporal\": {\n      \"interval\": [[\"2020-12-11T22:38:32.125Z\", \"2020-12-14T18:02:31.437Z\"]]\n    }\n  },\n  \"license\": \"CC-BY-4.0\",\n  \"summaries\": {\n    \"platform\": [\"cool_sat1\", \"cool_sat2\"],\n    \"constellation\": [\"ion\"],\n    \"instruments\": [\"cool_sensor_v1\", \"cool_sensor_v2\"],\n    \"gsd\": {\n      \"minimum\": 0.512,\n      \"maximum\": 0.66\n    },\n    \"eo:cloud_cover\": {\n      \"minimum\": 1.2,\n      \"maximum\": 1.2\n    },\n    \"proj:epsg\": {\n      \"minimum\": 32659,\n      \"maximum\": 32659\n    },\n    \"view:sun_elevation\": {\n      \"minimum\": 54.9,\n      \"maximum\": 54.9\n    },\n    \"view:off_nadir\": {\n      \"minimum\": 3.8,\n      \"maximum\": 3.8\n    },\n    \"view:sun_azimuth\": {\n      \"minimum\": 135.7,\n      \"maximum\": 135.7\n    }\n  },\n  \"links\": [\n    {\n      \"rel\": \"root\",\n      \"href\": \"./collection.json\",\n      \"type\": \"application/json\"\n    },\n    {\n      \"rel\": \"item\",\n      \"href\": \"./simple-item.json\",\n      \"type\": \"application/geo+json\",\n      \"title\": \"Simple Item\"\n    }\n  ]\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/collection/simple-item.json",
    "content": "{\n  \"stac_version\": \"1.0.0\",\n  \"stac_extensions\": [\n    \"https://stac-extensions.github.io/projection/v1.0.0/schema.json\",\n    \"https://stac-extensions.github.io/eo/v1.0.0/schema.json\"\n  ],\n  \"type\": \"Feature\",\n  \"id\": \"S2B_MSIL2A_20171227T160459_N0212_R054_T17QLA_20201014T165101\",\n  \"bbox\": [\n    172.91173669923782, 1.3438851951615003, 172.95469614953714,\n    1.3690476620161975\n  ],\n  \"geometry\": {\n    \"coordinates\": [\n      [\n        [-82.89978, 18.98277161],\n        [-81.85693, 18.99053787],\n        [-81.85202, 17.99825755],\n        [-82.888855, 17.99092482],\n        [-82.89978, 18.98277161]\n      ]\n    ],\n    \"type\": \"Polygon\"\n  },\n  \"properties\": {\n    \"datetime\": \"2017-12-27T16:04:59.027000Z\"\n  },\n  \"collection\": \"simple-collection\",\n  \"links\": [\n    {\n      \"rel\": \"collection\",\n      \"href\": \"./collection.json\",\n      \"type\": \"application/json\",\n      \"title\": \"Simple Example Collection\"\n    },\n    {\n      \"rel\": \"root\",\n      \"href\": \"./collection.json\",\n      \"type\": \"application/json\"\n    },\n    {\n      \"rel\": \"parent\",\n      \"href\": \"./collection.json\",\n      \"type\": \"application/json\"\n    }\n  ],\n  \"assets\": {\n    \"B02\": {\n      \"href\": \"https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/17/Q/LA/2017/12/27/S2B_MSIL2A_20171227T160459_N0212_R054_T17QLA_20201014T165101.SAFE/GRANULE/L2A_T17QLA_A004227_20171227T160750/IMG_DATA/R10m/T17QLA_20171227T160459_B02_10m.tif\",\n      \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n      \"title\": \"Band 2 - Blue\",\n      \"eo:bands\": [\n        {\n          \"name\": \"B02\",\n          \"common_name\": \"blue\",\n          \"description\": \"Band 2 - Blue\",\n          \"center_wavelength\": 0.49,\n          \"full_width_half_max\": 0.098\n        }\n      ],\n      \"gsd\": 10,\n      \"proj:shape\": [10980, 10980],\n      \"proj:bbox\": [300000, 1990200, 409800, 2100000],\n      \"proj:transform\": [10, 0, 300000, 0, -10, 2100000],\n      \"roles\": [\"data\"]\n    },\n    \"B03\": {\n      \"href\": \"https://sentinel2l2a01.blob.core.windows.net/sentinel2-l2/17/Q/LA/2017/12/27/S2B_MSIL2A_20171227T160459_N0212_R054_T17QLA_20201014T165101.SAFE/GRANULE/L2A_T17QLA_A004227_20171227T160750/IMG_DATA/R10m/T17QLA_20171227T160459_B03_10m.tif\",\n      \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n      \"title\": \"Band 3 - Green\",\n      \"eo:bands\": [\n        {\n          \"name\": \"B03\",\n          \"common_name\": \"green\",\n          \"description\": \"Band 3 - Green\",\n          \"center_wavelength\": 0.56,\n          \"full_width_half_max\": 0.045\n        }\n      ],\n      \"gsd\": 10,\n      \"proj:shape\": [10980, 10980],\n      \"proj:bbox\": [300000, 1990200, 409800, 2100000],\n      \"proj:transform\": [10, 0, 300000, 0, -10, 2100000],\n      \"roles\": [\"data\"]\n    }\n  }\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/collection/zarr-collection.json",
    "content": "{\n  \"type\": \"Collection\",\n  \"id\": \"daymet-daily-hi\",\n  \"stac_version\": \"1.0.0\",\n  \"description\": \"{{ collection.description }}\",\n  \"links\": [\n    {\n      \"rel\": \"license\",\n      \"href\": \"https://science.nasa.gov/earth-science/earth-science-data/data-information-policy\"\n    }\n  ],\n  \"stac_extensions\": [\n    \"https://stac-extensions.github.io/datacube/v2.0.0/schema.json\"\n  ],\n  \"cube:dimensions\": {\n    \"time\": {\n      \"type\": \"temporal\",\n      \"description\": \"24-hour day based on local time\",\n      \"extent\": [\"1980-01-01T12:00:00Z\", \"2020-12-30T12:00:00Z\"]\n    },\n    \"x\": {\n      \"type\": \"spatial\",\n      \"axis\": \"x\",\n      \"description\": \"x coordinate of projection\",\n      \"extent\": [-5802250.0, -5519250.0],\n      \"step\": 1000.0,\n      \"reference_system\": {\n        \"$schema\": \"https://proj.org/schemas/v0.2/projjson.schema.json\",\n        \"type\": \"ProjectedCRS\",\n        \"name\": \"undefined\",\n        \"base_crs\": {\n          \"name\": \"undefined\",\n          \"datum\": {\n            \"type\": \"GeodeticReferenceFrame\",\n            \"name\": \"undefined\",\n            \"ellipsoid\": {\n              \"name\": \"undefined\",\n              \"semi_major_axis\": 6378137,\n              \"inverse_flattening\": 298.257223563\n            }\n          },\n          \"coordinate_system\": {\n            \"subtype\": \"ellipsoidal\",\n            \"axis\": [\n              {\n                \"name\": \"Longitude\",\n                \"abbreviation\": \"lon\",\n                \"direction\": \"east\",\n                \"unit\": \"degree\"\n              },\n              {\n                \"name\": \"Latitude\",\n                \"abbreviation\": \"lat\",\n                \"direction\": \"north\",\n                \"unit\": \"degree\"\n              }\n            ]\n          }\n        },\n        \"conversion\": {\n          \"name\": \"unknown\",\n          \"method\": {\n            \"name\": \"Lambert Conic Conformal (2SP)\",\n            \"id\": {\n              \"authority\": \"EPSG\",\n              \"code\": 9802\n            }\n          },\n          \"parameters\": [\n            {\n              \"name\": \"Latitude of 1st standard parallel\",\n              \"value\": 25,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8823\n              }\n            },\n            {\n              \"name\": \"Latitude of 2nd standard parallel\",\n              \"value\": 60,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8824\n              }\n            },\n            {\n              \"name\": \"Latitude of false origin\",\n              \"value\": 42.5,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8821\n              }\n            },\n            {\n              \"name\": \"Longitude of false origin\",\n              \"value\": -100,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8822\n              }\n            },\n            {\n              \"name\": \"Easting at false origin\",\n              \"value\": 0,\n              \"unit\": \"metre\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8826\n              }\n            },\n            {\n              \"name\": \"Northing at false origin\",\n              \"value\": 0,\n              \"unit\": \"metre\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8827\n              }\n            }\n          ]\n        },\n        \"coordinate_system\": {\n          \"subtype\": \"Cartesian\",\n          \"axis\": [\n            {\n              \"name\": \"Easting\",\n              \"abbreviation\": \"E\",\n              \"direction\": \"east\",\n              \"unit\": \"metre\"\n            },\n            {\n              \"name\": \"Northing\",\n              \"abbreviation\": \"N\",\n              \"direction\": \"north\",\n              \"unit\": \"metre\"\n            }\n          ]\n        }\n      }\n    },\n    \"y\": {\n      \"type\": \"spatial\",\n      \"axis\": \"y\",\n      \"description\": \"y coordinate of projection\",\n      \"extent\": [-622000.0, -39000.0],\n      \"step\": -1000.0,\n      \"reference_system\": {\n        \"$schema\": \"https://proj.org/schemas/v0.2/projjson.schema.json\",\n        \"type\": \"ProjectedCRS\",\n        \"name\": \"undefined\",\n        \"base_crs\": {\n          \"name\": \"undefined\",\n          \"datum\": {\n            \"type\": \"GeodeticReferenceFrame\",\n            \"name\": \"undefined\",\n            \"ellipsoid\": {\n              \"name\": \"undefined\",\n              \"semi_major_axis\": 6378137,\n              \"inverse_flattening\": 298.257223563\n            }\n          },\n          \"coordinate_system\": {\n            \"subtype\": \"ellipsoidal\",\n            \"axis\": [\n              {\n                \"name\": \"Longitude\",\n                \"abbreviation\": \"lon\",\n                \"direction\": \"east\",\n                \"unit\": \"degree\"\n              },\n              {\n                \"name\": \"Latitude\",\n                \"abbreviation\": \"lat\",\n                \"direction\": \"north\",\n                \"unit\": \"degree\"\n              }\n            ]\n          }\n        },\n        \"conversion\": {\n          \"name\": \"unknown\",\n          \"method\": {\n            \"name\": \"Lambert Conic Conformal (2SP)\",\n            \"id\": {\n              \"authority\": \"EPSG\",\n              \"code\": 9802\n            }\n          },\n          \"parameters\": [\n            {\n              \"name\": \"Latitude of 1st standard parallel\",\n              \"value\": 25,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8823\n              }\n            },\n            {\n              \"name\": \"Latitude of 2nd standard parallel\",\n              \"value\": 60,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8824\n              }\n            },\n            {\n              \"name\": \"Latitude of false origin\",\n              \"value\": 42.5,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8821\n              }\n            },\n            {\n              \"name\": \"Longitude of false origin\",\n              \"value\": -100,\n              \"unit\": \"degree\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8822\n              }\n            },\n            {\n              \"name\": \"Easting at false origin\",\n              \"value\": 0,\n              \"unit\": \"metre\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8826\n              }\n            },\n            {\n              \"name\": \"Northing at false origin\",\n              \"value\": 0,\n              \"unit\": \"metre\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 8827\n              }\n            }\n          ]\n        },\n        \"coordinate_system\": {\n          \"subtype\": \"Cartesian\",\n          \"axis\": [\n            {\n              \"name\": \"Easting\",\n              \"abbreviation\": \"E\",\n              \"direction\": \"east\",\n              \"unit\": \"metre\"\n            },\n            {\n              \"name\": \"Northing\",\n              \"abbreviation\": \"N\",\n              \"direction\": \"north\",\n              \"unit\": \"metre\"\n            }\n          ]\n        }\n      }\n    },\n    \"nv\": {\n      \"type\": \"count\",\n      \"description\": \"Size of the 'time_bnds' variable.\",\n      \"values\": [0, 1]\n    }\n  },\n  \"cube:variables\": {\n    \"dayl\": {\n      \"type\": \"data\",\n      \"description\": \"daylength\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"s\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"daylength\",\n        \"units\": \"s\"\n      }\n    },\n    \"lambert_conformal_conic\": {\n      \"type\": \"data\",\n      \"dimensions\": [],\n      \"shape\": [],\n      \"attrs\": {\n        \"false_easting\": 0.0,\n        \"false_northing\": 0.0,\n        \"grid_mapping_name\": \"lambert_conformal_conic\",\n        \"inverse_flattening\": 298.257223563,\n        \"latitude_of_projection_origin\": 42.5,\n        \"longitude_of_central_meridian\": -100.0,\n        \"semi_major_axis\": 6378137.0,\n        \"standard_parallel\": [25.0, 60.0]\n      }\n    },\n    \"lat\": {\n      \"type\": \"auxiliary\",\n      \"description\": \"latitude coordinate\",\n      \"dimensions\": [\"y\", \"x\"],\n      \"unit\": \"degrees_north\",\n      \"shape\": [584, 284],\n      \"chunks\": [584, 284],\n      \"attrs\": {\n        \"long_name\": \"latitude coordinate\",\n        \"standard_name\": \"latitude\",\n        \"units\": \"degrees_north\"\n      }\n    },\n    \"lon\": {\n      \"type\": \"auxiliary\",\n      \"description\": \"longitude coordinate\",\n      \"dimensions\": [\"y\", \"x\"],\n      \"unit\": \"degrees_east\",\n      \"shape\": [584, 284],\n      \"chunks\": [584, 284],\n      \"attrs\": {\n        \"long_name\": \"longitude coordinate\",\n        \"standard_name\": \"longitude\",\n        \"units\": \"degrees_east\"\n      }\n    },\n    \"prcp\": {\n      \"type\": \"data\",\n      \"description\": \"daily total precipitation\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"mm/day\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean time: sum\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"daily total precipitation\",\n        \"units\": \"mm/day\"\n      }\n    },\n    \"srad\": {\n      \"type\": \"data\",\n      \"description\": \"daylight average incident shortwave radiation\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"W/m2\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean time: mean\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"daylight average incident shortwave radiation\",\n        \"units\": \"W/m2\"\n      }\n    },\n    \"swe\": {\n      \"type\": \"data\",\n      \"description\": \"snow water equivalent\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"kg/m2\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean time: mean\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"snow water equivalent\",\n        \"units\": \"kg/m2\"\n      }\n    },\n    \"time_bnds\": {\n      \"type\": \"data\",\n      \"dimensions\": [\"time\", \"nv\"],\n      \"shape\": [14965, 2],\n      \"chunks\": [365, 2],\n      \"attrs\": {}\n    },\n    \"tmax\": {\n      \"type\": \"data\",\n      \"description\": \"daily maximum temperature\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"degrees C\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean time: maximum\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"daily maximum temperature\",\n        \"units\": \"degrees C\"\n      }\n    },\n    \"tmin\": {\n      \"type\": \"data\",\n      \"description\": \"daily minimum temperature\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"degrees C\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean time: minimum\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"daily minimum temperature\",\n        \"units\": \"degrees C\"\n      }\n    },\n    \"vp\": {\n      \"type\": \"data\",\n      \"description\": \"daily average vapor pressure\",\n      \"dimensions\": [\"time\", \"y\", \"x\"],\n      \"unit\": \"Pa\",\n      \"shape\": [14965, 584, 284],\n      \"chunks\": [365, 584, 284],\n      \"attrs\": {\n        \"cell_methods\": \"area: mean time: mean\",\n        \"grid_mapping\": \"lambert_conformal_conic\",\n        \"long_name\": \"daily average vapor pressure\",\n        \"units\": \"Pa\"\n      }\n    },\n    \"yearday\": {\n      \"type\": \"data\",\n      \"description\": \"day of year (DOY) starting with day 1 on January 1st\",\n      \"dimensions\": [\"time\"],\n      \"shape\": [14965],\n      \"chunks\": [365],\n      \"attrs\": {\n        \"long_name\": \"day of year (DOY) starting with day 1 on January 1st\"\n      }\n    }\n  },\n  \"title\": \"Daymet Daily Hawaii\",\n  \"keywords\": [\n    \"Daymet\",\n    \"Hawaii\",\n    \"Temperature\",\n    \"Precipitation\",\n    \"Vapor Pressure\",\n    \"Weather\"\n  ],\n  \"providers\": [\n    {\n      \"name\": \"Microsoft\",\n      \"roles\": [\"host\", \"processor\"],\n      \"url\": \"https://planetarycomputer.microsoft.com\"\n    },\n    {\n      \"name\": \"ORNL DAAC\",\n      \"roles\": [\"producer\"],\n      \"url\": \"https://doi.org/10.3334/ORNLDAAC/1840\"\n    }\n  ],\n  \"assets\": {\n    \"zarr-https\": {\n      \"href\": \"https://daymeteuwest.blob.core.windows.net/daymet-zarr/daily/hi.zarr\",\n      \"type\": \"application/vnd+zarr\",\n      \"title\": \"Daily Hawaii Daymet HTTPS Zarr root\",\n      \"description\": \"HTTPS URI of the daily Hawaii Daymet Zarr Group on Azure Blob Storage.\",\n      \"xarray:open_kwargs\": {\n        \"consolidated\": true\n      },\n      \"roles\": [\"data\", \"zarr\", \"https\"]\n    },\n    \"zarr-abfs\": {\n      \"href\": \"abfs://daymet-zarr/daily/hi.zarr\",\n      \"type\": \"application/vnd+zarr\",\n      \"title\": \"Daily Hawaii Daymet Azure Blob File System Zarr root\",\n      \"description\": \"Azure Blob File System of the daily Hawaii Daymet Zarr Group on Azure Blob Storage for use with adlfs.\",\n      \"xarray:storage_options\": {\n        \"account_name\": \"daymeteuwest\"\n      },\n      \"xarray:open_kwargs\": {\n        \"consolidated\": true\n      },\n      \"roles\": [\"data\", \"zarr\", \"abfs\"]\n    },\n    \"thumbnail\": {\n      \"href\": \"https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/daymet-daily-hi.png\",\n      \"type\": \"image/png\",\n      \"title\": \"Daymet daily Hawaii map thumbnail\",\n      \"roles\": [\"thumbnail\"]\n    }\n  },\n  \"msft:short_description\": \"Daily surface weather data on a 1-km grid for Hawaii\",\n  \"msft:storage_account\": \"daymeteuwest\",\n  \"msft:container\": \"daymet-zarr\",\n  \"msft:group_id\": \"daymet\",\n  \"msft:group_keys\": [\"daily\", \"hawaii\"],\n  \"extent\": {\n    \"spatial\": {\n      \"bbox\": [[-160.3056, 17.9539, -154.772, 23.5186]]\n    },\n    \"temporal\": {\n      \"interval\": [[\"1980-01-01T12:00:00Z\", \"2020-12-30T12:00:00Z\"]]\n    }\n  },\n  \"license\": \"proprietary\"\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/item/zarr-item.json",
    "content": "{\n  \"type\": \"Feature\",\n  \"stac_version\": \"1.0.0\",\n  \"id\": \"daymet-daily-hi\",\n  \"properties\": {\n    \"cube:dimensions\": {\n      \"time\": {\n        \"type\": \"temporal\",\n        \"description\": \"24-hour day based on local time\",\n        \"extent\": [\"1980-01-01T12:00:00Z\", \"2020-12-30T12:00:00Z\"]\n      },\n      \"x\": {\n        \"type\": \"spatial\",\n        \"axis\": \"x\",\n        \"description\": \"x coordinate of projection\",\n        \"extent\": [-5802250.0, -5519250.0],\n        \"step\": 1000.0,\n        \"reference_system\": {\n          \"$schema\": \"https://proj.org/schemas/v0.2/projjson.schema.json\",\n          \"type\": \"ProjectedCRS\",\n          \"name\": \"undefined\",\n          \"base_crs\": {\n            \"name\": \"undefined\",\n            \"datum\": {\n              \"type\": \"GeodeticReferenceFrame\",\n              \"name\": \"undefined\",\n              \"ellipsoid\": {\n                \"name\": \"undefined\",\n                \"semi_major_axis\": 6378137,\n                \"inverse_flattening\": 298.257223563\n              }\n            },\n            \"coordinate_system\": {\n              \"subtype\": \"ellipsoidal\",\n              \"axis\": [\n                {\n                  \"name\": \"Longitude\",\n                  \"abbreviation\": \"lon\",\n                  \"direction\": \"east\",\n                  \"unit\": \"degree\"\n                },\n                {\n                  \"name\": \"Latitude\",\n                  \"abbreviation\": \"lat\",\n                  \"direction\": \"north\",\n                  \"unit\": \"degree\"\n                }\n              ]\n            }\n          },\n          \"conversion\": {\n            \"name\": \"unknown\",\n            \"method\": {\n              \"name\": \"Lambert Conic Conformal (2SP)\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 9802\n              }\n            },\n            \"parameters\": [\n              {\n                \"name\": \"Latitude of 1st standard parallel\",\n                \"value\": 25,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8823\n                }\n              },\n              {\n                \"name\": \"Latitude of 2nd standard parallel\",\n                \"value\": 60,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8824\n                }\n              },\n              {\n                \"name\": \"Latitude of false origin\",\n                \"value\": 42.5,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8821\n                }\n              },\n              {\n                \"name\": \"Longitude of false origin\",\n                \"value\": -100,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8822\n                }\n              },\n              {\n                \"name\": \"Easting at false origin\",\n                \"value\": 0,\n                \"unit\": \"metre\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8826\n                }\n              },\n              {\n                \"name\": \"Northing at false origin\",\n                \"value\": 0,\n                \"unit\": \"metre\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8827\n                }\n              }\n            ]\n          },\n          \"coordinate_system\": {\n            \"subtype\": \"Cartesian\",\n            \"axis\": [\n              {\n                \"name\": \"Easting\",\n                \"abbreviation\": \"E\",\n                \"direction\": \"east\",\n                \"unit\": \"metre\"\n              },\n              {\n                \"name\": \"Northing\",\n                \"abbreviation\": \"N\",\n                \"direction\": \"north\",\n                \"unit\": \"metre\"\n              }\n            ]\n          }\n        }\n      },\n      \"y\": {\n        \"type\": \"spatial\",\n        \"axis\": \"y\",\n        \"description\": \"y coordinate of projection\",\n        \"extent\": [-622000.0, -39000.0],\n        \"step\": -1000.0,\n        \"reference_system\": {\n          \"$schema\": \"https://proj.org/schemas/v0.2/projjson.schema.json\",\n          \"type\": \"ProjectedCRS\",\n          \"name\": \"undefined\",\n          \"base_crs\": {\n            \"name\": \"undefined\",\n            \"datum\": {\n              \"type\": \"GeodeticReferenceFrame\",\n              \"name\": \"undefined\",\n              \"ellipsoid\": {\n                \"name\": \"undefined\",\n                \"semi_major_axis\": 6378137,\n                \"inverse_flattening\": 298.257223563\n              }\n            },\n            \"coordinate_system\": {\n              \"subtype\": \"ellipsoidal\",\n              \"axis\": [\n                {\n                  \"name\": \"Longitude\",\n                  \"abbreviation\": \"lon\",\n                  \"direction\": \"east\",\n                  \"unit\": \"degree\"\n                },\n                {\n                  \"name\": \"Latitude\",\n                  \"abbreviation\": \"lat\",\n                  \"direction\": \"north\",\n                  \"unit\": \"degree\"\n                }\n              ]\n            }\n          },\n          \"conversion\": {\n            \"name\": \"unknown\",\n            \"method\": {\n              \"name\": \"Lambert Conic Conformal (2SP)\",\n              \"id\": {\n                \"authority\": \"EPSG\",\n                \"code\": 9802\n              }\n            },\n            \"parameters\": [\n              {\n                \"name\": \"Latitude of 1st standard parallel\",\n                \"value\": 25,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8823\n                }\n              },\n              {\n                \"name\": \"Latitude of 2nd standard parallel\",\n                \"value\": 60,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8824\n                }\n              },\n              {\n                \"name\": \"Latitude of false origin\",\n                \"value\": 42.5,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8821\n                }\n              },\n              {\n                \"name\": \"Longitude of false origin\",\n                \"value\": -100,\n                \"unit\": \"degree\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8822\n                }\n              },\n              {\n                \"name\": \"Easting at false origin\",\n                \"value\": 0,\n                \"unit\": \"metre\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8826\n                }\n              },\n              {\n                \"name\": \"Northing at false origin\",\n                \"value\": 0,\n                \"unit\": \"metre\",\n                \"id\": {\n                  \"authority\": \"EPSG\",\n                  \"code\": 8827\n                }\n              }\n            ]\n          },\n          \"coordinate_system\": {\n            \"subtype\": \"Cartesian\",\n            \"axis\": [\n              {\n                \"name\": \"Easting\",\n                \"abbreviation\": \"E\",\n                \"direction\": \"east\",\n                \"unit\": \"metre\"\n              },\n              {\n                \"name\": \"Northing\",\n                \"abbreviation\": \"N\",\n                \"direction\": \"north\",\n                \"unit\": \"metre\"\n              }\n            ]\n          }\n        }\n      },\n      \"nv\": {\n        \"type\": \"count\",\n        \"description\": \"Size of the 'time_bnds' variable.\",\n        \"values\": [0, 1]\n      }\n    },\n    \"cube:variables\": {\n      \"dayl\": {\n        \"type\": \"data\",\n        \"description\": \"daylength\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"s\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"daylength\",\n          \"units\": \"s\"\n        }\n      },\n      \"lambert_conformal_conic\": {\n        \"type\": \"data\",\n        \"dimensions\": [],\n        \"shape\": [],\n        \"attrs\": {\n          \"false_easting\": 0.0,\n          \"false_northing\": 0.0,\n          \"grid_mapping_name\": \"lambert_conformal_conic\",\n          \"inverse_flattening\": 298.257223563,\n          \"latitude_of_projection_origin\": 42.5,\n          \"longitude_of_central_meridian\": -100.0,\n          \"semi_major_axis\": 6378137.0,\n          \"standard_parallel\": [25.0, 60.0]\n        }\n      },\n      \"lat\": {\n        \"type\": \"auxiliary\",\n        \"description\": \"latitude coordinate\",\n        \"dimensions\": [\"y\", \"x\"],\n        \"unit\": \"degrees_north\",\n        \"shape\": [584, 284],\n        \"chunks\": [584, 284],\n        \"attrs\": {\n          \"long_name\": \"latitude coordinate\",\n          \"standard_name\": \"latitude\",\n          \"units\": \"degrees_north\"\n        }\n      },\n      \"lon\": {\n        \"type\": \"auxiliary\",\n        \"description\": \"longitude coordinate\",\n        \"dimensions\": [\"y\", \"x\"],\n        \"unit\": \"degrees_east\",\n        \"shape\": [584, 284],\n        \"chunks\": [584, 284],\n        \"attrs\": {\n          \"long_name\": \"longitude coordinate\",\n          \"standard_name\": \"longitude\",\n          \"units\": \"degrees_east\"\n        }\n      },\n      \"prcp\": {\n        \"type\": \"data\",\n        \"description\": \"daily total precipitation\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"mm/day\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean time: sum\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"daily total precipitation\",\n          \"units\": \"mm/day\"\n        }\n      },\n      \"srad\": {\n        \"type\": \"data\",\n        \"description\": \"daylight average incident shortwave radiation\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"W/m2\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean time: mean\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"daylight average incident shortwave radiation\",\n          \"units\": \"W/m2\"\n        }\n      },\n      \"swe\": {\n        \"type\": \"data\",\n        \"description\": \"snow water equivalent\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"kg/m2\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean time: mean\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"snow water equivalent\",\n          \"units\": \"kg/m2\"\n        }\n      },\n      \"time_bnds\": {\n        \"type\": \"data\",\n        \"dimensions\": [\"time\", \"nv\"],\n        \"shape\": [14965, 2],\n        \"chunks\": [365, 2],\n        \"attrs\": {}\n      },\n      \"tmax\": {\n        \"type\": \"data\",\n        \"description\": \"daily maximum temperature\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"degrees C\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean time: maximum\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"daily maximum temperature\",\n          \"units\": \"degrees C\"\n        }\n      },\n      \"tmin\": {\n        \"type\": \"data\",\n        \"description\": \"daily minimum temperature\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"degrees C\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean time: minimum\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"daily minimum temperature\",\n          \"units\": \"degrees C\"\n        }\n      },\n      \"vp\": {\n        \"type\": \"data\",\n        \"description\": \"daily average vapor pressure\",\n        \"dimensions\": [\"time\", \"y\", \"x\"],\n        \"unit\": \"Pa\",\n        \"shape\": [14965, 584, 284],\n        \"chunks\": [365, 584, 284],\n        \"attrs\": {\n          \"cell_methods\": \"area: mean time: mean\",\n          \"grid_mapping\": \"lambert_conformal_conic\",\n          \"long_name\": \"daily average vapor pressure\",\n          \"units\": \"Pa\"\n        }\n      },\n      \"yearday\": {\n        \"type\": \"data\",\n        \"description\": \"day of year (DOY) starting with day 1 on January 1st\",\n        \"dimensions\": [\"time\"],\n        \"shape\": [14965],\n        \"chunks\": [365],\n        \"attrs\": {\n          \"long_name\": \"day of year (DOY) starting with day 1 on January 1st\"\n        }\n      }\n    },\n    \"start_datetime\": \"1980-01-01T12:00:00Z\",\n    \"end_datetime\": \"2020-12-30T12:00:00Z\",\n    \"datetime\": null\n  },\n  \"geometry\": {\n    \"type\": \"Polygon\",\n    \"coordinates\": [\n      [\n        [-154.7780670634169, 17.960033949329812],\n        [-154.7780670634169, 23.51232608231902],\n        [-160.2988400944475, 23.51232608231902],\n        [-160.2988400944475, 17.960033949329812],\n        [-154.7780670634169, 17.960033949329812]\n      ]\n    ]\n  },\n  \"links\": [],\n  \"assets\": {\n    \"zarr-https\": {\n      \"href\": \"https://daymeteuwest.blob.core.windows.net/daymet-zarr/daily/hi.zarr\",\n      \"type\": \"application/vnd+zarr\",\n      \"title\": \"Daily Hawaii Daymet HTTPS Zarr root\",\n      \"description\": \"HTTPS URI of the daily Hawaii Daymet Zarr Group on Azure Blob Storage.\",\n      \"xarray:open_kwargs\": {\n        \"consolidated\": true\n      },\n      \"roles\": [\"data\", \"zarr\", \"https\"]\n    },\n    \"zarr-abfs\": {\n      \"href\": \"abfs://daymet-zarr/daily/hi.zarr\",\n      \"type\": \"application/vnd+zarr\",\n      \"title\": \"Daily Hawaii Daymet Azure Blob File System Zarr root\",\n      \"description\": \"Azure Blob File System of the daily Hawaii Daymet Zarr Group on Azure Blob Storage for use with adlfs.\",\n      \"xarray:storage_options\": {\n        \"account_name\": \"daymeteuwest\"\n      },\n      \"xarray:open_kwargs\": {\n        \"consolidated\": true\n      },\n      \"roles\": [\"data\", \"zarr\", \"abfs\"]\n    },\n    \"thumbnail\": {\n      \"href\": \"https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/daymet-daily-hi.png\",\n      \"type\": \"image/png\",\n      \"title\": \"Daymet daily Hawaii map thumbnail\"\n    }\n  },\n  \"bbox\": [\n    -160.2988400944475, 17.960033949329812, -154.7780670634169,\n    23.51232608231902\n  ],\n  \"stac_extensions\": [\n    \"https://stac-extensions.github.io/datacube/v2.0.0/schema.json\"\n  ]\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0/itemcollection/example-search.json",
    "content": "{\n  \"id\": \"mysearchresults\",\n  \"stac_version\": \"1.0.0-beta.2\",\n  \"stac_extensions\": [\"single-file-stac\"],\n  \"description\": \"A bunch of results from a search\",\n  \"type\": \"FeatureCollection\",\n  \"features\": [\n    {\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\n        \"https://stac-extensions.github.io/projection/v1.0.0/schema.json\",\n        \"https://stac-extensions.github.io/view/v1.0.0/schema.json\"\n      ],\n      \"type\": \"Feature\",\n      \"id\": \"LC80370332018039LGN00\",\n      \"collection\": \"landsat-8-l1\",\n      \"bbox\": [-112.21054, 37.83042, -109.4992, 39.95532],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [-111.6768167850251, 39.952817693022276],\n            [-109.5010938553632, 39.55607811527241],\n            [-110.03573868784865, 37.83172334507642],\n            [-112.20846353249907, 38.236456540046845],\n            [-111.6768167850251, 39.952817693022276]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2018-02-08T18:02:15.719478+00:00\",\n        \"view:sun_azimuth\": 152.63804142,\n        \"view:sun_elevation\": 31.82216637,\n        \"proj:epsg\": 32612\n      },\n      \"assets\": {\n        \"index\": {\n          \"type\": \"text/html\",\n          \"title\": \"HTML index page\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/index.html\"\n        },\n        \"thumbnail\": {\n          \"title\": \"Thumbnail image\",\n          \"type\": \"image/jpeg\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_thumb_large.jpg\"\n        },\n        \"B1\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 1 (coastal)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B1.TIF\"\n        },\n        \"B2\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 2 (blue)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B2.TIF\"\n        },\n        \"B3\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 3 (green)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B3.TIF\"\n        },\n        \"B4\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 4 (red)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B4.TIF\"\n        },\n        \"B5\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 5 (nir)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B5.TIF\"\n        },\n        \"B6\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 6 (swir16)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B6.TIF\"\n        },\n        \"B7\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 7 (swir22)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B7.TIF\"\n        },\n        \"B8\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 8 (pan)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B8.TIF\"\n        },\n        \"B9\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 9 (cirrus)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B9.TIF\"\n        },\n        \"B10\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 10 (lwir)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B10.TIF\"\n        },\n        \"B11\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 11 (lwir)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_B11.TIF\"\n        },\n        \"ANG\": {\n          \"title\": \"Angle coefficients file\",\n          \"type\": \"text/plain\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_ANG.txt\"\n        },\n        \"MTL\": {\n          \"title\": \"original metadata file\",\n          \"type\": \"text/plain\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_MTL.txt\"\n        },\n        \"BQA\": {\n          \"title\": \"Band quality data\",\n          \"type\": \"image/tiff; application=geotiff\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/037/033/LC08_L1TP_037033_20180208_20180221_01_T1/LC08_L1TP_037033_20180208_20180221_01_T1_BQA.TIF\"\n        }\n      },\n      \"links\": []\n    },\n    {\n      \"stac_version\": \"1.0.0\",\n      \"stac_extensions\": [\"projection\", \"view\"],\n      \"type\": \"Feature\",\n      \"id\": \"LC80340332018034LGN00\",\n      \"collection\": \"landsat-8-l1\",\n      \"bbox\": [-107.6044, 37.8096, -104.86884, 39.97508],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [-107.03912158283073, 39.975078807631036],\n            [-104.87161559271382, 39.548160703908025],\n            [-105.43927721248009, 37.81075859503169],\n            [-107.60423259994965, 38.24485405534073],\n            [-107.03912158283073, 39.975078807631036]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2018-02-03T17:43:44Z\",\n        \"view:sun_azimuth\": 153.39513457,\n        \"view:sun_elevation\": 30.41894816,\n        \"proj:epsg\": 32613\n      },\n      \"assets\": {\n        \"index\": {\n          \"type\": \"text/html\",\n          \"title\": \"HTML index page\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/index.html\"\n        },\n        \"thumbnail\": {\n          \"title\": \"Thumbnail image\",\n          \"type\": \"image/jpeg\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_thumb_large.jpg\"\n        },\n        \"B1\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 1 (coastal)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B1.TIF\"\n        },\n        \"B2\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 2 (blue)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B2.TIF\"\n        },\n        \"B3\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 3 (green)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B3.TIF\"\n        },\n        \"B4\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 4 (red)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B4.TIF\"\n        },\n        \"B5\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 5 (nir)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B5.TIF\"\n        },\n        \"B6\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 6 (swir16)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B6.TIF\"\n        },\n        \"B7\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 7 (swir22)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B7.TIF\"\n        },\n        \"B8\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 8 (pan)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B8.TIF\"\n        },\n        \"B9\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 9 (cirrus)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B9.TIF\"\n        },\n        \"B10\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 10 (lwir)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B10.TIF\"\n        },\n        \"B11\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 11 (lwir)\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_B11.TIF\"\n        },\n        \"ANG\": {\n          \"title\": \"Angle coefficients file\",\n          \"type\": \"text/plain\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_ANG.txt\"\n        },\n        \"MTL\": {\n          \"title\": \"original metadata file\",\n          \"type\": \"text/plain\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_MTL.txt\"\n        },\n        \"BQA\": {\n          \"title\": \"Band quality data\",\n          \"type\": \"image/tiff; application=geotiff\",\n          \"href\": \"https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/034/033/LC08_L1TP_034033_20180203_20180220_01_T1/LC08_L1TP_034033_20180203_20180220_01_T1_BQA.TIF\"\n        }\n      },\n      \"links\": []\n    }\n  ],\n  \"collections\": [\n    {\n      \"id\": \"landsat-8-l1\",\n      \"title\": \"Landsat 8 L1\",\n      \"description\": \"Landat 8 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.\",\n      \"keywords\": [\"landsat\", \"earth observation\", \"usgs\"],\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"item_assets\"],\n      \"extent\": {\n        \"spatial\": {\n          \"bbox\": [[-180, -90, 180, 90]]\n        },\n        \"temporal\": {\n          \"interval\": [[\"2013-06-01T00:00:00Z\", null]]\n        }\n      },\n      \"providers\": [\n        {\n          \"name\": \"USGS\",\n          \"roles\": [\"producer\"],\n          \"url\": \"https://landsat.usgs.gov/\"\n        },\n        {\n          \"name\": \"Planet Labs\",\n          \"roles\": [\"processor\"],\n          \"url\": \"https://github.com/landsat-pds/landsat_ingestor\"\n        },\n        {\n          \"name\": \"AWS\",\n          \"roles\": [\"host\"],\n          \"url\": \"https://landsatonaws.com/\"\n        },\n        {\n          \"name\": \"Development Seed\",\n          \"roles\": [\"processor\"],\n          \"url\": \"https://github.com/sat-utils/sat-api\"\n        }\n      ],\n      \"license\": \"PDDL-1.0\",\n      \"summaries\": {\n        \"gsd\": [15],\n        \"platform\": [\"landsat-8\"],\n        \"instruments\": [\"oli\", \"tirs\"],\n        \"view:off_nadir\": [0]\n      },\n      \"item_assets\": {\n        \"index\": {\n          \"type\": \"text/html\",\n          \"title\": \"HTML index page\"\n        },\n        \"thumbnail\": {\n          \"title\": \"Thumbnail image\",\n          \"type\": \"image/jpeg\"\n        },\n        \"B1\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 1 (coastal)\"\n        },\n        \"B2\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 2 (blue)\"\n        },\n        \"B3\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 3 (green)\"\n        },\n        \"B4\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 4 (red)\"\n        },\n        \"B5\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 5 (nir)\"\n        },\n        \"B6\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 6 (swir16)\"\n        },\n        \"B7\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 7 (swir22)\"\n        },\n        \"B8\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 8 (pan)\"\n        },\n        \"B9\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 9 (cirrus)\"\n        },\n        \"B10\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 10 (lwir)\"\n        },\n        \"B11\": {\n          \"type\": \"image/tiff; application=geotiff\",\n          \"title\": \"Band 11 (lwir)\"\n        },\n        \"ANG\": {\n          \"title\": \"Angle coefficients file\",\n          \"type\": \"text/plain\"\n        },\n        \"MTL\": {\n          \"title\": \"original metadata file\",\n          \"type\": \"text/plain\"\n        },\n        \"BQA\": {\n          \"title\": \"Band quality data\",\n          \"type\": \"image/tiff; application=geotiff\"\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"./example-search.json\"\n        }\n      ]\n    }\n  ],\n  \"links\": []\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0beta2/earthsearch/readme.md",
    "content": "Generated with:\n\n```python\nimport satsearch\nimport pystac\nimport json\n\nbbox = [35.48, -3.24, 35.58, -3.14]\ndates = '2020-07-01/2020-08-15'\nURL='https://earth-search.aws.element84.com/v0'\nresults = satsearch.Search.search(url=URL,\n                                  collections=['sentinel-s2-l2a-cogs'],\n                                  datetime=dates,\n                                  bbox=bbox,\n                                  sort=['-properties.datetime'])\n\n# 18 items found\nitems = results.items()\nprint(len(items))\nitems.save('single-file-stac.json')\n\n# validation returns empty list\nimport json\nfrom pystac.validation import validate_dict\nwith open('single-file-stac.json') as f:\n    js = json.load(f)\nprint(validate_dict(js))\n\ncat = pystac.read_file('single-file-stac.json')\n```\n"
  },
  {
    "path": "intake/readers/tests/cats/stac_data/1.0.0beta2/earthsearch/single-file-stac.json",
    "content": "{\n  \"id\": \"STAC\",\n  \"description\": \"Single file STAC\",\n  \"stac_version\": \"1.0.0-beta.2\",\n  \"stac_extensions\": [\"single-file-stac\"],\n  \"type\": \"FeatureCollection\",\n  \"features\": [\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200814_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.755863403460744,\n        -2.7110273448887328\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.755863403460744, -2.7110273448887328],\n            [35.58561393371855, -3.4903485137854644],\n            [35.53750206413227, -3.703878577458925],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-08-14T08:11:00Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200814T074621_N0214_R135_T36MYB_20200814T103139\",\n        \"sentinel:data_coverage\": 85.74,\n        \"eo:cloud_cover\": 47.78,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-17T19:57:08.648Z\",\n        \"updated\": \"2020-08-17T19:57:08.648Z\",\n        \"sentinel:utm_zone\": 36\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/8/14/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/14/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/14/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200814_0_L2A\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/8/S2A_36MYB_20200814_0_L2A/S2A_36MYB_20200814_0_L2A.json\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200811_0_L2A\",\n      \"bbox\": [\n        35.3528545470006, -3.704390328989478, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.3528545470006, -3.704390328989478],\n            [35.3871744235407, -3.550087222076425],\n            [35.57403108911051, -2.7114299346410236],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.3528545470006, -3.704390328989478]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-08-11T08:01:05Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200811T073621_N0214_R092_T36MYB_20200811T114649\",\n        \"sentinel:data_coverage\": 32.62,\n        \"eo:cloud_cover\": 12.78,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-09-19T02:55:16.877Z\",\n        \"updated\": \"2020-09-19T02:55:16.877Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/8/11/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/11/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/11/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200811_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200811_0_L2A/S2A_36MYB_20200811_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2A_36MYB_20200811_0_L2A\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200809_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.76700629249285,\n        -2.7110017811328926\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.76700629249285, -2.7110017811328926],\n            [35.59534033727834, -3.4950217349500936],\n            [35.54865807009975, -3.7038464213259266],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-08-09T08:10:57Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200809T074619_N0214_R135_T36MYB_20200809T103816\",\n        \"sentinel:data_coverage\": 86.85,\n        \"eo:cloud_cover\": 77.28,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-18T10:57:54.124Z\",\n        \"updated\": \"2020-08-18T10:57:54.124Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/8/9/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/9/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/9/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200809_0_L2A\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/8/S2B_36MYB_20200809_0_L2A/S2B_36MYB_20200809_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200806_0_L2A\",\n      \"bbox\": [\n        35.361851558698646, -3.704366288842677, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.361851558698646, -3.704366288842677],\n            [35.40381967148886, -3.5125664894255837],\n            [35.582659278647895, -2.7114114515918657],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.361851558698646, -3.704366288842677]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-08-06T08:01:02Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200806T073619_N0214_R092_T36MYB_20200806T113023\",\n        \"sentinel:data_coverage\": 31.77,\n        \"eo:cloud_cover\": 0,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-17T20:39:17.063Z\",\n        \"updated\": \"2020-08-17T20:39:17.063Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/8/6/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/6/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/6/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200806_0_L2A\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/8/S2B_36MYB_20200806_0_L2A/S2B_36MYB_20200806_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200804_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.757300644517535,\n        -2.7110240533940337\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.757300644517535, -2.7110240533940337],\n            [35.593185658988496, -3.4635764446186035],\n            [35.539121505336155, -3.7038739183195086],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-08-04T08:11:00Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200804T074621_N0214_R135_T36MYB_20200804T104333\",\n        \"sentinel:data_coverage\": 85.89,\n        \"eo:cloud_cover\": 75.37,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-17T21:33:14.530Z\",\n        \"updated\": \"2020-08-17T21:33:14.530Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/8/4/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/4/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/4/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200804_0_L2A\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/8/S2A_36MYB_20200804_0_L2A/S2A_36MYB_20200804_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200801_0_L2A\",\n      \"bbox\": [\n        35.354473584331565, -3.704386009664254, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.354473584331565, -3.704386009664254],\n            [35.3944358874427, -3.5225127312927675],\n            [35.53486937287048, -2.8896897890758986],\n            [35.574751456777825, -2.7114283938558765],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.354473584331565, -3.704386009664254]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-08-01T08:01:05Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200801T073621_N0214_R092_T36MYB_20200801T102406\",\n        \"sentinel:data_coverage\": 32.57,\n        \"eo:cloud_cover\": 62.8,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-18T10:54:09.447Z\",\n        \"updated\": \"2020-08-18T10:54:09.447Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/8/1/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/1/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/8/1/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200801_0_L2A\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/8/S2A_36MYB_20200801_0_L2A/S2A_36MYB_20200801_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200730_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.7687137359457,\n        -2.7109978548485385\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.7687137359457, -2.7109978548485385],\n            [35.602303822311306, -3.471108470253178],\n            [35.5503674728783, -3.703841481678485],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-30T08:10:57Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200730T074619_N0214_R135_T36MYB_20200730T111202\",\n        \"sentinel:data_coverage\": 87.03,\n        \"eo:cloud_cover\": 5.89,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-18T09:45:24.058Z\",\n        \"updated\": \"2020-08-18T09:45:24.058Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/30/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/30/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/30/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200730_0_L2A\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/7/S2B_36MYB_20200730_0_L2A/S2B_36MYB_20200730_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200727_0_L2A\",\n      \"bbox\": [\n        35.364731107030785, -3.704358575275874, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.364731107030785, -3.704358575275874],\n            [35.40440599150377, -3.5241264913281642],\n            [35.58517559334435, -2.711406049575985],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.364731107030785, -3.704358575275874]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-27T08:01:01Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200727T073619_N0214_R092_T36MYB_20200727T122549\",\n        \"sentinel:data_coverage\": 31.47,\n        \"eo:cloud_cover\": 49.45,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-31T18:24:19.293Z\",\n        \"updated\": \"2020-08-31T18:24:19.293Z\",\n        \"sentinel:utm_zone\": 36\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/27/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/27/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/27/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200727_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200727_0_L2A/S2B_36MYB_20200727_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2B_36MYB_20200727_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200725_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.759906699226775,\n        -2.7110180807693265\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.759906699226775, -2.7110180807693265],\n            [35.58973859625803, -3.4867218329449443],\n            [35.54119098027402, -3.703867960107399],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-25T08:10:59Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"proj:epsg\": 32736,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200725T074621_N0214_R135_T36MYB_20200725T111115\",\n        \"sentinel:data_coverage\": 86.12,\n        \"eo:cloud_cover\": 1.01,\n        \"sentinel:valid_cloud_cover\": true,\n        \"created\": \"2020-08-18T10:54:56.621Z\",\n        \"updated\": \"2020-08-18T10:54:56.621Z\"\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/25/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/25/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/25/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200725_0_L2A\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/7/S2A_36MYB_20200725_0_L2A/S2A_36MYB_20200725_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200722_0_L2A\",\n      \"bbox\": [\n        35.356453603399366, -3.704380723262607, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.356453603399366, -3.704380723262607],\n            [35.39652214425136, -3.52401250696652],\n            [35.57744628705123, -2.7114226260972907],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.356453603399366, -3.704380723262607]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-22T08:01:04Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 32.26,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 69.73,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200722T073621_N0214_R092_T36MYB_20200722T105841\",\n        \"created\": \"2020-09-19T01:08:35.436Z\",\n        \"updated\": \"2020-09-19T01:08:35.436Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 32.26\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/22/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/22/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/22/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200722_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200722_0_L2A/S2A_36MYB_20200722_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2A_36MYB_20200722_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200720_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.77185877492271,\n        -2.710990616463335\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.77185877492271, -2.710990616463335],\n            [35.610236308119404, -3.449883225671188],\n            [35.5534261567553, -3.7038326347645305],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-20T08:10:56Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 87.34,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 14.84,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200720T074619_N0214_R135_T36MYB_20200720T112901\",\n        \"created\": \"2020-09-19T10:48:10.455Z\",\n        \"updated\": \"2020-09-19T10:48:10.455Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 87.34\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/20/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/20/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/20/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200720_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200720_0_L2A/S2B_36MYB_20200720_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2B_36MYB_20200720_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200717_0_L2A\",\n      \"bbox\": [\n        35.36833009312432, -3.704348921313967, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.36833009312432, -3.704348921313967],\n            [35.407819165099255, -3.5245820359904383],\n            [35.588411026975066, -2.7113990960324563],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.36833009312432, -3.704348921313967]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-17T08:01:00Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 31.12,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 99.94,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200717T073619_N0214_R092_T36MYB_20200718T133139\",\n        \"created\": \"2020-08-28T01:04:04.443Z\",\n        \"updated\": \"2020-08-28T01:04:04.443Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 31.12\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/17/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/17/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/17/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200717_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200717_0_L2A/S2B_36MYB_20200717_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2B_36MYB_20200717_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2B_36MYB_20200717_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200715_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.762423439929975,\n        -2.7110123074852157\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.762423439929975, -2.7110123074852157],\n            [35.58950948158126, -3.500550130276924],\n            [35.54379972231785, -3.7038604423827857],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-15T08:10:59Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 86.39,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 98.19,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200715T074621_N0214_R135_T36MYB_20200715T120435\",\n        \"created\": \"2020-08-27T22:44:05.523Z\",\n        \"updated\": \"2020-08-27T22:44:05.523Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 86.39\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/15/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/15/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/15/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200715_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200715_0_L2A/S2A_36MYB_20200715_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2A_36MYB_20200715_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200712_0_L2A\",\n      \"bbox\": [\n        35.36185349129008, -3.7043662836689135, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.36185349129008, -3.7043662836689135],\n            [35.527401739826665, -2.9456144416642576],\n            [35.579424700604456, -2.711418387850691],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.36185349129008, -3.7043662836689135]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-12T08:01:03Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 31.92,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 12.02,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200712T073621_N0214_R092_T36MYB_20200712T110016\",\n        \"created\": \"2020-08-18T09:03:03.858Z\",\n        \"updated\": \"2020-08-18T09:03:03.858Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 31.92\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/12/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/12/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/12/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200712_0_L2A\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/7/S2A_36MYB_20200712_0_L2A/S2A_36MYB_20200712_0_L2A.json\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200710_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.77024131589214,\n        -2.7109943401109917\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.77024131589214, -2.7109943401109917],\n            [35.60554409132611, -3.4622467746776224],\n            [35.55171700875178, -3.7038375795975336],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-10T08:10:56Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 87.17,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 10.17,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200710T074619_N0214_R135_T36MYB_20200710T115611\",\n        \"created\": \"2020-08-18T12:01:03.380Z\",\n        \"updated\": \"2020-08-18T12:01:03.380Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 87.17\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/10/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/10/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/10/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200710_0_L2A\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"s3://cirrus-v0-data-1qm7gekzjucbq/sentinel-s2-l2a/36/M/YB/2020/7/S2B_36MYB_20200710_0_L2A/S2B_36MYB_20200710_0_L2A.json\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2B_36MYB_20200707_0_L2A\",\n      \"bbox\": [\n        35.363651831328085, -3.704361467477506, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.363651831328085, -3.704361467477506],\n            [35.58391733787192, -2.7114087514517324],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.363651831328085, -3.704361467477506]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-07T08:01:01Z\",\n        \"platform\": \"sentinel-2b\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 31.59,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 19.61,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2B_MSIL2A_20200707T073619_N0214_R092_T36MYB_20200707T113322\",\n        \"created\": \"2020-09-18T22:19:41.465Z\",\n        \"updated\": \"2020-09-18T22:19:41.465Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 31.59\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/7/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/7/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/7/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2B_36MYB_20200707_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2B_36MYB_20200707_0_L2A/S2B_36MYB_20200707_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2B_36MYB_20200707_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200705_0_L2A\",\n      \"bbox\": [\n        34.79870551983084, -3.7056906919566326, 35.762781954089746,\n        -2.7110114846428153\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [34.80044299251402, -3.7056906919566326],\n            [34.79870551983084, -2.712838434794184],\n            [35.762781954089746, -2.7110114846428153],\n            [35.543081537096796, -3.703862512777141],\n            [34.80044299251402, -3.7056906919566326]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-05T08:10:58Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 12.46,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200705T074621_N0214_R135_T36MYB_20200705T111345\",\n        \"sentinel:data_coverage\": 86.27,\n        \"created\": \"2020-08-27T21:35:09.718Z\",\n        \"updated\": \"2020-08-27T21:35:09.718Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/5/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/5/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/5/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200705_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200705_0_L2A/S2A_36MYB_20200705_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"title\": \"Source STAC Item\",\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2A_36MYB_20200705_0_L2A\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    },\n    {\n      \"type\": \"Feature\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"stac_extensions\": [\"eo\", \"view\", \"proj\"],\n      \"id\": \"S2A_36MYB_20200702_0_L2A\",\n      \"bbox\": [\n        35.356093300381986, -3.704381685556869, 35.78841174435426,\n        -2.7109586089556537\n      ],\n      \"geometry\": {\n        \"type\": \"Polygon\",\n        \"coordinates\": [\n          [\n            [35.356093300381986, -3.704381685556869],\n            [35.39966829397743, -3.506566766775803],\n            [35.57726786230264, -2.7114230081670594],\n            [35.785723330760796, -2.7109586089556537],\n            [35.78841174435426, -3.7031212858685087],\n            [35.356093300381986, -3.704381685556869]\n          ]\n        ]\n      },\n      \"properties\": {\n        \"datetime\": \"2020-07-02T08:01:04Z\",\n        \"platform\": \"sentinel-2a\",\n        \"constellation\": \"sentinel-2\",\n        \"instruments\": [\"msi\"],\n        \"gsd\": 10,\n        \"data_coverage\": 32.37,\n        \"view:off_nadir\": 0,\n        \"eo:cloud_cover\": 55.53,\n        \"proj:epsg\": 32736,\n        \"sentinel:latitude_band\": \"M\",\n        \"sentinel:grid_square\": \"YB\",\n        \"sentinel:sequence\": \"0\",\n        \"sentinel:product_id\": \"S2A_MSIL2A_20200702T073621_N0214_R092_T36MYB_20200702T105954\",\n        \"created\": \"2020-08-27T21:10:40.692Z\",\n        \"updated\": \"2020-08-27T21:10:40.692Z\",\n        \"sentinel:valid_cloud_cover\": true,\n        \"sentinel:utm_zone\": 36,\n        \"sentinel:data_coverage\": 32.37\n      },\n      \"collection\": \"sentinel-s2-l2a-cogs\",\n      \"assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l1c/tiles/36/M/YB/2020/7/2/0/preview.jpg\"\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/L2A_PVI.tif\",\n          \"proj:shape\": [343, 343],\n          \"proj:transform\": [320, 0, 699960, 0, -320, 9700000, 0, 0, 1]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/2/0/tileInfo.json\"\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"href\": \"https://roda.sentinel-hub.com/sentinel-s2-l2a/tiles/36/M/YB/2020/7/2/0/metadata.xml\"\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/TCI.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B01.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B02.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B03.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B04.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B05.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B06.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B07.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B08.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B8A.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B09.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B11.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/B12.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/AOT.tif\",\n          \"proj:shape\": [1830, 1830],\n          \"proj:transform\": [60, 0, 699960, 0, -60, 9700000, 0, 0, 1]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/WVP.tif\",\n          \"proj:shape\": [10980, 10980],\n          \"proj:transform\": [10, 0, 699960, 0, -10, 9700000, 0, 0, 1]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/SCL.tif\",\n          \"proj:shape\": [5490, 5490],\n          \"proj:transform\": [20, 0, 699960, 0, -20, 9700000, 0, 0, 1]\n        }\n      },\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items/S2A_36MYB_20200702_0_L2A\"\n        },\n        {\n          \"rel\": \"canonical\",\n          \"href\": \"https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/36/M/YB/2020/7/S2A_36MYB_20200702_0_L2A/S2A_36MYB_20200702_0_L2A.json\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"derived_from\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items/S2A_36MYB_20200702_0_L2A\",\n          \"title\": \"Source STAC Item\",\n          \"type\": \"application/json\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"collection\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" }\n      ]\n    }\n  ],\n  \"collections\": [\n    {\n      \"id\": \"sentinel-s2-l2a-cogs\",\n      \"stac_version\": \"1.0.0-beta.2\",\n      \"description\": \"Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs\",\n      \"links\": [\n        {\n          \"rel\": \"self\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs\"\n        },\n        {\n          \"rel\": \"license\",\n          \"href\": \"https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice\"\n        },\n        {\n          \"rel\": \"about\",\n          \"href\": \"https://github.com/stac-utils/stac-sentinel\"\n        },\n        {\n          \"rel\": \"parent\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/\"\n        },\n        { \"rel\": \"root\", \"href\": \"https://earth-search.aws.element84.com/v0/\" },\n        {\n          \"rel\": \"items\",\n          \"href\": \"https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a-cogs/items\"\n        }\n      ],\n      \"stac_extensions\": [\"item-assets\"],\n      \"title\": \"Sentinel 2 L2A COGs\",\n      \"keywords\": [\"sentinel\", \"earth observation\", \"esa\"],\n      \"providers\": [\n        {\n          \"name\": \"ESA\",\n          \"roles\": [\"producer\"],\n          \"url\": \"https://earth.esa.int/web/guest/home\"\n        },\n        {\n          \"name\": \"Sinergise\",\n          \"roles\": [\"processor\"],\n          \"url\": \"https://registry.opendata.aws/sentinel-2/\"\n        },\n        {\n          \"name\": \"AWS\",\n          \"roles\": [\"host\"],\n          \"url\": \"http://sentinel-pds.s3-website.eu-central-1.amazonaws.com/\"\n        },\n        {\n          \"name\": \"Element 84\",\n          \"roles\": [\"processor\"],\n          \"url\": \"https://element84.com\"\n        }\n      ],\n      \"summaries\": {\n        \"platform\": [\"sentinel-2a\", \"sentinel-2b\"],\n        \"constellation\": [\"sentinel-2\"],\n        \"instruments\": [\"msi\"],\n        \"gsd\": [10],\n        \"view:off_nadir\": [0]\n      },\n      \"item_assets\": {\n        \"thumbnail\": {\n          \"title\": \"Thumbnail\",\n          \"type\": \"image/png\",\n          \"roles\": [\"thumbnail\"]\n        },\n        \"overview\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"overview\"],\n          \"gsd\": 10,\n          \"eo:bands\": [\n            {\n              \"name\": \"B04\",\n              \"common_name\": \"red\",\n              \"center_wavelength\": 0.6645,\n              \"full_width_half_max\": 0.038\n            },\n            {\n              \"name\": \"B03\",\n              \"common_name\": \"green\",\n              \"center_wavelength\": 0.56,\n              \"full_width_half_max\": 0.045\n            },\n            {\n              \"name\": \"B02\",\n              \"common_name\": \"blue\",\n              \"center_wavelength\": 0.4966,\n              \"full_width_half_max\": 0.098\n            }\n          ]\n        },\n        \"info\": {\n          \"title\": \"Original JSON metadata\",\n          \"type\": \"application/json\",\n          \"roles\": [\"metadata\"]\n        },\n        \"metadata\": {\n          \"title\": \"Original XML metadata\",\n          \"type\": \"application/xml\",\n          \"roles\": [\"metadata\"]\n        },\n        \"visual\": {\n          \"title\": \"True color image\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"overview\"],\n          \"gsd\": 10,\n          \"eo:bands\": [\n            {\n              \"name\": \"B04\",\n              \"common_name\": \"red\",\n              \"center_wavelength\": 0.6645,\n              \"full_width_half_max\": 0.038\n            },\n            {\n              \"name\": \"B03\",\n              \"common_name\": \"green\",\n              \"center_wavelength\": 0.56,\n              \"full_width_half_max\": 0.045\n            },\n            {\n              \"name\": \"B02\",\n              \"common_name\": \"blue\",\n              \"center_wavelength\": 0.4966,\n              \"full_width_half_max\": 0.098\n            }\n          ]\n        },\n        \"B01\": {\n          \"title\": \"Band 1 (coastal)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 60,\n          \"eo:bands\": [\n            {\n              \"name\": \"B01\",\n              \"common_name\": \"coastal\",\n              \"center_wavelength\": 0.4439,\n              \"full_width_half_max\": 0.027\n            }\n          ]\n        },\n        \"B02\": {\n          \"title\": \"Band 2 (blue)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 10,\n          \"eo:bands\": [\n            {\n              \"name\": \"B02\",\n              \"common_name\": \"blue\",\n              \"center_wavelength\": 0.4966,\n              \"full_width_half_max\": 0.098\n            }\n          ]\n        },\n        \"B03\": {\n          \"title\": \"Band 3 (green)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 10,\n          \"eo:bands\": [\n            {\n              \"name\": \"B03\",\n              \"common_name\": \"green\",\n              \"center_wavelength\": 0.56,\n              \"full_width_half_max\": 0.045\n            }\n          ]\n        },\n        \"B04\": {\n          \"title\": \"Band 4 (red)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 10,\n          \"eo:bands\": [\n            {\n              \"name\": \"B04\",\n              \"common_name\": \"red\",\n              \"center_wavelength\": 0.6645,\n              \"full_width_half_max\": 0.038\n            }\n          ]\n        },\n        \"B05\": {\n          \"title\": \"Band 5\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 20,\n          \"eo:bands\": [\n            {\n              \"name\": \"B05\",\n              \"center_wavelength\": 0.7039,\n              \"full_width_half_max\": 0.019\n            }\n          ]\n        },\n        \"B06\": {\n          \"title\": \"Band 6\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 20,\n          \"eo:bands\": [\n            {\n              \"name\": \"B06\",\n              \"center_wavelength\": 0.7402,\n              \"full_width_half_max\": 0.018\n            }\n          ]\n        },\n        \"B07\": {\n          \"title\": \"Band 7\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 20,\n          \"eo:bands\": [\n            {\n              \"name\": \"B07\",\n              \"center_wavelength\": 0.7825,\n              \"full_width_half_max\": 0.028\n            }\n          ]\n        },\n        \"B08\": {\n          \"title\": \"Band 8 (nir)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 10,\n          \"eo:bands\": [\n            {\n              \"name\": \"B08\",\n              \"common_name\": \"nir\",\n              \"center_wavelength\": 0.8351,\n              \"full_width_half_max\": 0.145\n            }\n          ]\n        },\n        \"B8A\": {\n          \"title\": \"Band 8A\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 20,\n          \"eo:bands\": [\n            {\n              \"name\": \"B8A\",\n              \"center_wavelength\": 0.8648,\n              \"full_width_half_max\": 0.033\n            }\n          ]\n        },\n        \"B09\": {\n          \"title\": \"Band 9\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 60,\n          \"eo:bands\": [\n            {\n              \"name\": \"B09\",\n              \"center_wavelength\": 0.945,\n              \"full_width_half_max\": 0.026\n            }\n          ]\n        },\n        \"B11\": {\n          \"title\": \"Band 11 (swir16)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 20,\n          \"eo:bands\": [\n            {\n              \"name\": \"B11\",\n              \"common_name\": \"swir16\",\n              \"center_wavelength\": 1.6137,\n              \"full_width_half_max\": 0.143\n            }\n          ]\n        },\n        \"B12\": {\n          \"title\": \"Band 12 (swir22)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"],\n          \"gsd\": 20,\n          \"eo:bands\": [\n            {\n              \"name\": \"B12\",\n              \"common_name\": \"swir22\",\n              \"center_wavelength\": 2.22024,\n              \"full_width_half_max\": 0.242\n            }\n          ]\n        },\n        \"AOT\": {\n          \"title\": \"Aerosol Optical Thickness (AOT)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"]\n        },\n        \"WVP\": {\n          \"title\": \"Water Vapour (WVP)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"]\n        },\n        \"SCL\": {\n          \"title\": \"Scene Classification Map (SCL)\",\n          \"type\": \"image/tiff; application=geotiff; profile=cloud-optimized\",\n          \"roles\": [\"data\"]\n        }\n      },\n      \"extent\": {\n        \"spatial\": { \"bbox\": [[-180, -90, 180, 90]] },\n        \"temporal\": { \"interval\": [[\"2015-06-27T10:25:31.456000Z\", null]] }\n      },\n      \"license\": \"proprietary\"\n    }\n  ],\n  \"links\": []\n}\n"
  },
  {
    "path": "intake/readers/tests/cats/test_sql.py",
    "content": "import os\n\nimport pandas as pd\nimport pytest\n\nfrom intake.readers import catalogs, datatypes, readers\n\npytest.importorskip(\"pytest_postgresql\")\npytest.importorskip(\"psycopg\")\n\n\n@pytest.fixture  # uses pytest-postgresql\ndef postgres_with_data(postgresql):\n    \"\"\"Check main postgresql fixture.\"\"\"\n    cur = postgresql.cursor()\n    cur.execute(\n        \"create table t_random as select s, md5(random()::text) from generate_Series(1,50) s;\"\n    )\n    postgresql.commit()\n    cur.close()\n    return int(cur._conn.pgconn.port)  # this is the one I found to be dynamic\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"`postgresql` does not work on Windows\")\ndef test_pg_pandas(postgres_with_data):\n    pytest.importorskip(\"psycopg2\")\n    pytest.importorskip(\"sqlalchemy\")\n\n    data = datatypes.SQLQuery(\n        conn=f\"postgresql://postgres@127.0.0.1:{postgres_with_data}/tests\",\n        query=\"t_random\",\n    )\n    reader = readers.PandasSQLAlchemy(data)\n    out = reader.read()\n    assert len(out) == 50\n    out = reader.discover()\n    assert len(out) == 10\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"`postgresql` does not work on Windows\")\ndef test_pg_duck_with_pandas_input(postgres_with_data):\n    data = datatypes.SQLQuery(\n        conn=f\"postgresql://postgres@127.0.0.1:{postgres_with_data}/tests\",\n        query=\"t_random\",\n    )\n    reader = readers.DuckSQL(data)\n    out = reader.read()\n    assert len(out) == 50\n    out = reader.discover()\n    assert len(out) == 10\n\n\n@pytest.fixture\ndef sqlite_with_data(tmpdir):\n    \"\"\"Check main postgresql fixture.\"\"\"\n    pytest.importorskip(\"sqlalchemy\", minversion=\"2\")\n    import sqlite3\n\n    fn = f\"{tmpdir}/test.db\"\n    cnx = sqlite3.connect(fn)\n    df = pd.DataFrame({\"a\": [\"hi\", \"ho\"] * 1000})\n    df.to_sql(name=\"oi\", con=cnx)\n    return fn\n\n\ndef test_sqlite_pandas(sqlite_with_data):\n    pytest.importorskip(\"pandas\", minversion=\"2\", reason=\"Not working on earlier version of pandas\")\n    data = datatypes.SQLQuery(conn=f\"sqlite:///{sqlite_with_data}\", query=\"oi\")\n    reader = readers.PandasSQLAlchemy(data)\n    out = reader.read()\n    assert len(out) == 2000\n    out = reader.discover()\n    assert len(out) == 10\n\n\ndef test_sqlite_duck_with_pandas_input(sqlite_with_data):\n    data = datatypes.SQLQuery(conn=f\"sqlite:///{sqlite_with_data}\", query=\"oi\")\n    reader = readers.DuckSQL(data)\n    out = reader.read()\n    assert len(out) == 2000\n    out = reader.discover()\n    assert len(out) == 10\n\n\ndef test_pandas_duck_pandas(sqlite_with_data):\n    pytest.importorskip(\"pandas\", minversion=\"2\", reason=\"Not working on earlier version of pandas\")\n    data = datatypes.SQLQuery(conn=f\"sqlite:///{sqlite_with_data}\", query=\"oi\")\n    reader = readers.PandasSQLAlchemy(data)\n    comment_dict = {\"args\": [1, \"2\"], \"kwargs\": {\"true\": False}}\n    reader2 = reader.PandasToDuck(\"out\", comment=str(comment_dict))\n    reader3 = reader2.DuckToPandas()\n    out = reader3.read()\n    assert out[:2].to_dict() == {\"a\": {0: \"hi\", 1: \"ho\"}, \"index\": {0: 0, 1: 1}}\n\n    reader = datatypes.SQLQuery({}, \"SELECT comment FROM duckdb_tables();\").to_reader(\n        reader=\"DuckSQL\"\n    )\n    assert reader.read().fetchall() == [(str(comment_dict),)]\n\n\ndef test_cat(sqlite_with_data):\n    pytest.importorskip(\"pandas\", minversion=\"2\", reason=\"Not working on earlier version of pandas\")\n    data = datatypes.Service(url=f\"sqlite:///{sqlite_with_data}\")\n    reader = catalogs.SQLAlchemyCatalog(data)\n    cat = reader.read()\n    assert list(cat.data) == [\"oi\"]\n    out = cat[\"oi\"].to_reader(outtype=\"pandas\").read()\n    assert len(out) == 2000\n"
  },
  {
    "path": "intake/readers/tests/cats/test_stac.py",
    "content": "import os\n\nimport pytest\n\nimport intake.readers.datatypes\n\nhere = os.path.dirname(os.path.abspath(__file__))\ncat_url = os.path.join(here, \"stac_data\", \"1.0.0\", \"catalog\", \"catalog.json\")\nsimple_item_url = os.path.join(here, \"stac_data\", \"1.0.0\", \"collection\", \"simple-item.json\")\npytest.importorskip(\"pystac\")\n\n\ndef test_1():\n    data = intake.readers.datatypes.STACJSON(cat_url)\n    cat = data.to_reader(reader=\"StacCatalog\").read()\n    assert \"test\" in cat\n    cat2 = cat.test.read()\n    assert isinstance(cat2, intake.entry.Catalog)\n    assert cat2.metadata[\"description\"] == \"child catalog\"\n\n\ndef test_bands():\n    data = intake.readers.datatypes.STACJSON(simple_item_url)\n    list_of_bands = [\"B02\", \"B03\"]\n    reader = data.to_reader_cls(reader=\"Bands\")(data, list_of_bands).read()\n    assert isinstance(reader.kwargs[\"data\"], intake.readers.datatypes.TIFF)\n    assert len(reader.kwargs[\"data\"].url) == 2\n"
  },
  {
    "path": "intake/readers/tests/cats/test_thredds.py",
    "content": "import pytest\n\nimport intake.readers\n\npytest.importorskip(\"siphon\")\npytest.importorskip(\"xarray\")\npytest.importorskip(\"h5netcdf\")\n\n\ndef test_1():\n    req = pytest.importorskip(\"requests\")\n    u = \"https://psl.noaa.gov/thredds/catalog.xml\"\n    try:\n        req.head(u)\n        assert req.ok\n    except:\n        pytest.xfail(\"server down\")\n    data = intake.readers.datatypes.THREDDSCatalog(url=u)\n    ds = (\n        data.to_reader()\n        .THREDDSCatToMergedDataset(\n            path=\"Datasets/ncep.reanalysis.dailyavgs/surface/air.sig995.194*.nc\"\n        )\n        .read()\n    )\n    assert \"1948-01-01\" in str(ds.time.min())\n    assert \"1949-12-31\" in str(ds.time.max())\n"
  },
  {
    "path": "intake/readers/tests/cats/test_tiled.py",
    "content": "import shlex\nimport subprocess\nimport time\n\nimport pytest\n\nimport intake.readers.datatypes\n\ntiled = pytest.importorskip(\"tiled\")\n\n\n@pytest.fixture()\ndef tiled_server():\n    t0 = time.time()\n    cmd = \"tiled serve demo --port 8901\"\n    fail = False\n    P = subprocess.Popen(\n        shlex.split(cmd),\n        stdout=subprocess.DEVNULL,\n        stderr=subprocess.PIPE,\n        bufsize=1,\n        text=True,\n    )\n    while True:\n        line = P.stderr.readline()\n        if \"api_key=\" in line:\n            url = line.lstrip().rstrip()\n            # fails if another tiled server is already running\n            yield url\n            break\n        time.sleep(0.01)\n        if time.time() - t0 > 5:\n            fail = True\n            break\n    P.kill()\n    P.wait()\n    if fail:\n        raise RuntimeError(\"tiled fixture did not start\")\n\n\ndef test_catalog_workflow(tiled_server):\n    from tiled.queries import FullText\n\n    data = intake.readers.datatypes.TiledService(tiled_server)\n    node = data.to_reader(\"tiled\")\n    cat = intake.entry.Catalog()\n    cat[\"cat\"] = (\n        node.TiledSearch(query=FullText(\"dog\"))\n        .TiledSearch(query=FullText(\"red\"))\n        .TiledNodeToCatalog\n    )\n    cat.to_yaml_file(\"memory://cat.yaml\")\n    cat2 = intake.entry.Catalog.from_yaml_file(\"memory://cat.yaml\")\n    with intake.conf.set(allow_pickle=True):\n        # tiled query instances are pickled\n        scat = cat2[\"cat\"].read()\n    assert \"short_table\" in scat\n"
  },
  {
    "path": "intake/readers/tests/test_basic.py",
    "content": "import os\n\nfrom intake.readers import datatypes, readers, entry\n\nhere = os.path.dirname(__file__)\ntestdir = os.path.abspath(os.path.join(here, \"..\", \"..\", \"catalog/tests\"))\n\n\ndef test1():\n    data = datatypes.CSV(url=f\"{testdir}/entry1_1.csv\")\n    reader = readers.PandasCSV(data)\n    assert reader.doc()\n    out = reader.read()\n    assert list(out.columns) == [\"name\", \"score\", \"rank\"]\n\n\ndef test_recommend_filetype():\n    assert datatypes.Parquet in datatypes.recommend(url=\"myfile.parq\")\n    assert datatypes.Parquet in datatypes.recommend(head=b\"PAR1\")\n    assert all(\n        p in datatypes.recommend(mime=\"text/yaml\", head=False)\n        for p in {datatypes.YAMLFile, datatypes.CatalogFile, datatypes.Text}\n    )\n\n\ndef test_recommend_reader():\n    pp = datatypes.Parquet(\"\")\n    rec = readers.recommend(pp)\n    assert all(p not in rec[\"importable\"] for p in rec[\"not_importable\"])\n    assert all(p not in rec[\"not_importable\"] for p in rec[\"importable\"])\n    assert all(\n        p in rec[\"importable\"] + rec[\"not_importable\"]\n        for p in {readers.PandasParquet, readers.AwkwardParquet, readers.DaskParquet}\n    )\n    pp = datatypes.CSV(\"\")\n    assert readers.PandasCSV in readers.recommend(pp)[\"importable\"]\n\n\ndef test_data_metadata():\n    cat = entry.Catalog()\n    cat[\"d\"] = datatypes.BaseData(metadata={\"oi\": \"io\"})\n    cat.get_entity(\"d\").metadata.update(blag=0)\n    out = cat[\"d\"]\n    assert out.metadata == {\"oi\": \"io\", \"blag\": 0}\n"
  },
  {
    "path": "intake/readers/tests/test_consistency.py",
    "content": "import pytest\nimport intake\nfrom intake.readers.utils import subclasses\nfrom intake.readers.readers import FileReader\nfrom intake import BaseConverter\nfrom intake.readers.convert import SameType\n\n\n@pytest.mark.parametrize(\"cls\", subclasses(intake.BaseReader))\ndef test_readers(cls):\n    assert isinstance(cls.imports, set)\n    assert all(isinstance(_, str) for _ in cls.imports)\n\n    assert isinstance(cls.implements, set)\n    assert all(issubclass(_, intake.BaseData) for _ in cls.implements)\n\n    assert isinstance(cls.func, str)\n    assert cls.func.count(\":\") == 1\n    assert cls.func_doc is None or cls.func_doc.count(\":\") == 1\n    assert isinstance(cls.output_instance, str) or cls.output_instance is None\n    if cls.other_funcs:\n        assert isinstance(cls.other_funcs, set) and all(isinstance(c, str) for c in cls.other_funcs)\n\n\n@pytest.mark.parametrize(\"cls\", subclasses(intake.BaseData))\ndef test_data(cls):\n    assert isinstance(cls.filepattern, str)\n    assert isinstance(cls.mimetypes, str)\n    assert isinstance(cls.structure, set) and all(isinstance(s, str) for s in cls.structure)\n    assert isinstance(cls.magic, set) and all(isinstance(m, (bytes, tuple)) for m in cls.magic)\n    assert isinstance(cls.contains, set) and all(isinstance(c, str) for c in cls.contains)\n\n\n@pytest.mark.parametrize(\"cls\", subclasses(FileReader))\ndef test_filereaders(cls):\n    assert isinstance(cls.url_arg, str)\n\n\n@pytest.mark.parametrize(\"cls\", subclasses(BaseConverter))\ndef test_converters(cls):\n    assert all(isinstance(s, str) and (s.count(\":\") == 1 or s == \".*\") for s in cls.instances)\n    assert all(\n        s is SameType or isinstance(s, str) and (s.count(\":\") == 1 or s == \".*\")\n        for s in cls.instances.values()\n    )\n"
  },
  {
    "path": "intake/readers/tests/test_dict.py",
    "content": "import intake.readers\nfrom intake.readers import entry\n\n\ndef test_yaml_roundtrip():\n    cat = entry.Catalog()\n    cat[\"one\"] = intake.readers.BaseReader(intake.BaseData(), output_instance=\"blah\")\n    cat.to_yaml_file(\"memory://cat.yaml\")\n    cat2 = entry.Catalog.from_yaml_file(\"memory://cat.yaml\")\n    assert cat2.user_parameters.pop(\"CATALOG_DIR\")\n    assert cat2.user_parameters.pop(\"STORAGE_OPTIONS\") == {}\n    assert cat.data == cat2.data\n    assert list(cat.entries) == list(cat2.entries)\n    assert cat2[\"one\"].output_instance == \"blah\"\n"
  },
  {
    "path": "intake/readers/tests/test_errors.py",
    "content": "import pytest\n\nimport intake\n\n\ndef test_func_ser():\n    class A:\n        def get(self):\n            def inner():\n                return self.x\n\n            return inner\n\n    func = A().get()\n    reader = intake.readers.convert.GenericFunc(data=func)\n    cat = intake.Catalog()\n    with pytest.raises(RuntimeError):\n        cat.add_entry(reader)\n"
  },
  {
    "path": "intake/readers/tests/test_reader.py",
    "content": "import tempfile\n\nimport pytest\n\nimport intake\n\n\ndef test_reader_from_call():\n    import pandas as pd\n\n    df = pd.DataFrame(\n        {\n            \"col1\": [\"a\", \"b\"],\n            \"col2\": [1.0, 3.0],\n        },\n        columns=[\"col1\", \"col2\"],\n    )\n    with tempfile.NamedTemporaryFile(delete=False) as fp:\n        df.to_csv(fp.name)\n        fp.close()\n        reader = intake.reader_from_call(\"df = pd.read_csv(fp.name)\")\n        read_df = reader.read()\n        assert all(read_df.col1 == df.col1)\n        assert all(read_df.col2 == df.col2)\n\n\n@pytest.fixture()\ndef xarray_dataset():\n    xr = pytest.importorskip(\"xarray\")\n    import numpy as np\n    import pandas as pd\n\n    temperature = 15 + 8 * np.random.randn(2, 3, 4)\n    lon = [-99.83, -99.32]\n    lat = [42.25, 42.21]\n    instruments = [\"manufac1\", \"manufac2\", \"manufac3\"]\n    time = pd.date_range(\"2014-09-06\", periods=4)\n    return xr.Dataset(\n        data_vars=dict(\n            temperature=([\"loc\", \"instrument\", \"time\"], temperature),\n        ),\n        coords=dict(\n            lon=(\"loc\", lon),\n            lat=(\"loc\", lat),\n            instrument=instruments,\n            time=time,\n        ),\n    )\n\n\ndef test_xarray_pattern(tmpdir, xarray_dataset):\n    import numpy as np\n    from intake.readers.readers import XArrayPatternReader\n\n    if np.__version__.split(\".\") > [\"2\"]:\n        pytest.skip(\"HDF does not yet support numpy 2\")\n    pytest.importorskip(\"h5netcdf\")\n    path1 = f\"{tmpdir}/1.nc\"\n    path2 = f\"{tmpdir}/2.nc\"\n    xarray_dataset.to_netcdf(path1)\n    xarray_dataset.to_netcdf(path2)\n\n    data = intake.datatypes.HDF5(\"%s/{part}.nc\" % tmpdir)\n    reader = XArrayPatternReader(data)\n    ds = reader.read()\n\n    assert ds.part.values.tolist() == [\"1\", \"2\"]\n    assert ds.temperature.shape == (2, 2, 3, 4)\n\n    data = intake.datatypes.HDF5(\"%s/{part:d}.nc\" % tmpdir)\n    reader = XArrayPatternReader(data)\n    ds = reader.read()\n\n    assert ds.part.values.tolist() == [1, 2]\n\n\ndef test_xarray_dataset_remote_url_glob_str(tmpdir, xarray_dataset):\n    \"\"\"Test opening HDF5 data with Xarray via remote URL with glob.\n\n    We're using tar archive to create a remote URL, since this was the\n    case that raised Issue #879.\n    \"\"\"\n    from pathlib import Path\n    import shutil\n\n    from intake.readers.readers import XArrayDatasetReader\n\n    pytest.importorskip(\"h5netcdf\")\n\n    root_dir = Path(tmpdir)\n\n    path = root_dir / \"test.nc\"\n    xarray_dataset.to_netcdf(path)\n\n    # make archive.tar.gz in tmpdir\n    tarname = f\"{tmpdir}/archive\"\n    tarpath = shutil.make_archive(tarname, \"gztar\", root_dir=root_dir, base_dir=\"test.nc\")\n\n    data_url = \"tar://*.nc::\" + tarpath\n    data = intake.datatypes.HDF5(data_url)\n    reader = XArrayDatasetReader(data)\n    ds = reader.read()\n\n    # check that result is not empty\n    assert ds.sizes.get(\"time\", 0) > 0\n\n\n@pytest.fixture\ndef icechunk_xr_repo(tmpdir):\n    xr = pytest.importorskip(\"xarray\")\n    icechunk = pytest.importorskip(\"icechunk\")\n    pd = pytest.importorskip(\"pandas\")\n    import numpy as np\n\n    np.random.seed(0)\n    temperature = 15 + 8 * np.random.randn(2, 3, 4)\n    precipitation = 10 * np.random.rand(2, 3, 4)\n    lon = [-99.83, -99.32]\n    lat = [42.25, 42.21]\n    instruments = [\"manufac1\", \"manufac2\", \"manufac3\"]\n    time = pd.date_range(\"2014-09-06\", periods=4)\n    reference_time = pd.Timestamp(\"2014-09-05\")\n    ds = xr.Dataset(\n        data_vars=dict(\n            temperature=([\"loc\", \"instrument\", \"time\"], temperature),\n            precipitation=([\"loc\", \"instrument\", \"time\"], precipitation),\n        ),\n        coords=dict(\n            lon=(\"loc\", lon),\n            lat=(\"loc\", lat),\n            instrument=instruments,\n            time=time,\n            reference_time=reference_time,\n        ),\n        attrs=dict(description=\"Weather related data.\"),\n    )\n    storage = icechunk.local_filesystem_storage(tmpdir.strpath)\n    repo = icechunk.Repository.create(storage)\n    session = repo.writable_session(\"main\")\n    store = session.store\n    ds.to_zarr(store, consolidated=False)\n    session.commit(\"Initial commit\")\n    return tmpdir.strpath\n\n\ndef test_icechunk(icechunk_xr_repo):\n    data = intake.readers.datatypes.IcechunkRepo(\n        \"local_filesystem\", storage_options={\"path\": icechunk_xr_repo}, ref=\"main\"\n    )\n    reader = intake.readers.XArrayDatasetReader(data)\n    ds = reader.read()\n    assert (~ds.temperature.isnull()).all()\n"
  },
  {
    "path": "intake/readers/tests/test_search.py",
    "content": "from intake.readers.entry import Catalog, ReaderDescription\nfrom intake.readers.readers import BaseReader\nfrom intake.readers.search import Importable, Text\n\n\nclass NotImportable(BaseReader):\n    # TODO: this will show up in the class hierarchy throughout testing - make a temporary mock?\n    #  see https://stackoverflow.com/a/52428851/3821154\n    imports = {\"unknown_package\"}\n\n\ndef test_1():\n    cat = Catalog()\n    cat[\"en1\"] = ReaderDescription(\"intake.readers.readers:BaseReader\", kwargs={\"allow_me\": True})\n    cat[\"en2\"] = ReaderDescription(\"intake.readers.readers:BaseReader\", kwargs={\"nope\": True})\n    cat[\"en3\"] = ReaderDescription(\n        \"intake.readers.tests.test_search:NotImportable\", kwargs={\"allow_me\": True}\n    )\n\n    # simple text\n    cat2 = cat.search(\"allow\")\n    assert \"en1\" in cat2\n    assert \"en2\" not in cat2\n    assert \"en3\" in cat2\n    assert cat2[\"en3\"] == cat[\"en3\"]\n\n    # single term\n    cat2 = cat.search(Importable())\n    assert \"en1\" in cat2\n    assert \"en2\" in cat2\n    assert \"en3\" not in cat2\n\n    # expression\n    cat2 = cat.search(Importable() & Text(\"allow\"))\n    assert \"en1\" in cat2\n    assert \"en2\" not in cat2\n    assert \"en3\" not in cat2\n"
  },
  {
    "path": "intake/readers/tests/test_up.py",
    "content": "import pytest\n\n\ndef test_basic():\n    from intake.readers import user_parameters as up\n\n    p = up.SimpleUserParameter(default=1, dtype=int)\n    pars = {\"k\": [\"{p}\", 1]}\n    out = up.set_values({\"p\": p}, pars)\n    assert out == {\"k\": [1, 1]}\n\n    pars = {\"k\": [\"{p}\", 1], \"p\": 2}\n    out = up.set_values({\"p\": p}, pars)\n    assert out == {\"k\": [2, 1]}\n\n    # extra space here results in list member being string formatted\n    pars = {\"k\": [\" {p}\", 1], \"p\": 2}\n    out = up.set_values({\"p\": p}, pars)\n    assert out == {\"k\": [\" 2\", 1]}\n\n    with pytest.raises(TypeError):\n        # supplied None as a value to int parameter\n        pars = {\"k\": [\"{p}\", 1], \"p\": None}\n        up.set_values({\"p\": p}, pars)\n\n\ndef test_named_options():\n    from intake.readers import user_parameters as up\n\n    p = up.NamedOptionsUserParameter({\"a\": \"athing\", \"b\": \"bthing\"}, default=\"b\")\n    pars = {\"k\": [\"{p}\", 1]}\n    out = up.set_values({\"p\": p}, pars)\n    assert out == {\"k\": [\"bthing\", 1]}\n\n    pars = {\"k\": [\"{p}\", 1], \"p\": \"a\"}\n    out = up.set_values({\"p\": p}, pars)\n    assert out == {\"k\": [\"athing\", 1]}\n"
  },
  {
    "path": "intake/readers/tests/test_utils.py",
    "content": "import pytest\n\nfrom intake.readers.utils import LazyDict, PartlyLazyDict\n\n\nclass OnlyOkeKey(LazyDict):\n    def __getitem__(self, item):\n        if item == 5:\n            return 5\n        raise KeyError\n\n    def __iter__(self):\n        return iter(range(10))\n\n\ndef test_lazy_dict():\n    ld = OnlyOkeKey()\n    assert list(ld) == list(range(10))\n    assert 5 in ld\n    assert ld[5] == 5\n    with pytest.raises(KeyError):\n        ld[4]\n\n    pld = PartlyLazyDict({12: 2}, ld)\n    assert set(pld) == {12} | set(range(10))\n    assert 12 in pld\n    assert 5 in pld\n    assert 0 in pld\n    assert pld[12] == 2\n    assert pld[5] == 5\n    with pytest.raises(KeyError):\n        pld[0]\n"
  },
  {
    "path": "intake/readers/tests/test_workflows.py",
    "content": "import os\n\nimport fsspec\nimport pytest\n\nimport intake.readers\nfrom intake.readers import convert, readers, utils, entry\n\npd = pytest.importorskip(\"pandas\")\nbindata = b\"apple,beet,carrot\\n\" + b\"a,1,0.1\\nb,2,0.2\\nc,3,0.3\\n\" * 100\n\n\n@pytest.fixture()\ndef dataframe_file():\n    m = fsspec.filesystem(\"memory\")\n    m.pipe(\"/data\", bindata)\n    return \"memory://data\"\n\n\n@pytest.fixture()\ndef df(dataframe_file):\n    return pd.read_csv(dataframe_file)\n\n\ndef test_pipelines_in_catalogs(dataframe_file, df):\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    reader = intake.readers.PandasCSV(data)\n    reader2 = reader[[\"apple\", \"beet\"]].set_index(keys=\"beet\")\n    cat = intake.entry.Catalog()\n    cat[\"mydata\"] = reader2\n\n    assert cat.mydata.read().equals(df[[\"apple\", \"beet\"]].set_index(keys=\"beet\"))\n    assert cat.mydata.discover().equals(df[[\"apple\", \"beet\"]].set_index(\"beet\")[:10])\n\n    cat[\"eq\"] = reader.equals(other=reader)\n    assert reader.equals(other=reader).read() is True\n    assert cat.eq.read() is True\n\n    with pytest.raises(NotImplementedError):\n        cat.delete(\"eq\", recursive=True)\n    cat.delete(\"eq\")\n    assert \"eq\" not in cat\n\n\ndef test_pipeline_steps(dataframe_file, df):\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    reader = intake.readers.PandasCSV(data)\n    with pytest.raises(AttributeError):\n        # cannot auto on private methods; could still apply\n        out = reader._nonexistent()\n        breakpoint()\n\n    reader2 = reader[[\"apple\", \"beet\"]].set_index(keys=\"beet\")\n    stepper = reader2.read_stepwise()\n    assert isinstance(stepper, convert.PipelineExecution)\n    assert stepper.data is None\n    assert stepper.next[0] == 0\n    assert isinstance(stepper.step(), convert.PipelineExecution)\n    assert stepper.next[0] == 1\n    assert stepper.data is not None\n    assert isinstance(stepper.step(), convert.PipelineExecution)\n    out = stepper.step()\n    assert out.equals(df[[\"apple\", \"beet\"]].set_index(keys=\"beet\"))\n\n    stepper = reader2.read_stepwise(breakpoint=1)\n    assert stepper.next[0] == 1\n    assert stepper.data is not None\n    out = stepper.cont()\n    assert out.equals(df[[\"apple\", \"beet\"]].set_index(keys=\"beet\"))\n\n\ndef test_parameters(dataframe_file, monkeypatch):\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    reader = readers.PandasCSV(data)\n    reader2 = reader[[\"apple\", \"beet\"]].set_index(keys=\"beet\")\n    ent = reader2.to_entry()\n    ent.extract_parameter(name=\"index_key\", value=\"beet\")\n    assert ent.user_parameters[\"index_key\"].default == \"beet\"\n\n    assert str(ent.to_dict()).count(\"{index_key}\") == 2  # once in select, once in set_index\n    assert utils.descend_to_path(\"steps.2.2.keys\", ent.kwargs) == \"{index_key}\"\n\n    assert ent.to_reader() == reader2\n\n    cat = entry.Catalog()\n    cat.add_entry(reader2)\n    datadesc = list(cat.data.values())[0]\n    datadesc.extract_parameter(name=\"protocol\", value=\"memory:\")\n    assert datadesc.kwargs[\"url\"] == dataframe_file.replace(\"memory:\", \"{protocol}\")\n    datadesc.user_parameters[\"protocol\"].set_default(\"env(TEMP_TEST)\")\n    monkeypatch.setenv(\"TEMP_TEST\", \"memory:\")\n    out = datadesc.to_data()\n    assert out == data\n\n\ndef test_namespace(dataframe_file):\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    reader = readers.PandasCSV(data)\n    assert \"np\" in reader._namespaces\n    assert reader.apply(getattr, \"beet\").np.max().read() == 3\n\n\ncalls = 0\n\n\ndef fails(x):\n    global calls\n    if calls < 2:\n        calls += 1\n        raise RuntimeError\n    return x\n\n\ndef test_retry(dataframe_file):\n    from intake.readers.readers import Retry\n\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    reader = readers.PandasCSV(data)\n    pipe = Retry(reader.apply(fails), allowed_exceptions=(ValueError,))\n    cat = entry.Catalog()\n    cat[\"ret1\"] = pipe\n\n    pipe = Retry(reader.apply(fails), allowed_exceptions=(RuntimeError,))\n    cat[\"ret2\"] = pipe\n\n    with pytest.raises(RuntimeError):\n        cat[\"ret1\"].read()\n    assert calls == 1\n    assert cat[\"ret2\"].read() is not None\n    assert calls > 1\n\n\ndef dir_non_empty(d):\n    import os\n\n    return os.path.exists(d) and os.path.isdir(d) and bool(os.listdir(d))\n\n\ndef test_custom_cache(dataframe_file, tmpdir, df):\n    from intake.readers.readers import Condition, PandasCSV, PandasParquet, FileExistsReader\n\n    fn = f\"{tmpdir}/file.parquet\"\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    part = PandasCSV(data)\n\n    output = part.PandasToParquet(url=fn).transform(PandasParquet)\n    data2 = intake.readers.datatypes.Parquet(url=fn)\n    cached = PandasParquet(data=data2)\n    reader2 = Condition(cached, if_false=output, condition=FileExistsReader(data2))\n\n    assert os.listdir(tmpdir) == []\n    out = reader2.read()\n    assert os.listdir(tmpdir)\n\n    assert df.equals(out)\n\n    m = fsspec.filesystem(\"memory\")\n    m.rm(dataframe_file)\n\n    with pytest.raises(IOError):\n        # file has gone\n        part.read()\n\n    # but condition still picks read from cache\n    out = reader2.read()\n    assert df.equals(out)\n\n\ndef test_cat_mapper(dataframe_file):\n    # setup\n    cat = intake.Catalog()\n    data = intake.readers.datatypes.CSV(url=dataframe_file)\n    cat[\"one\"] = readers.PandasCSV(data)\n    cat[\"two\"] = readers.PandasCSV(data, usecols=[0])\n    cat.to_yaml_file(\"memory://cat.yaml\")\n\n    reader = intake.readers.YAMLCatalogReader(\"memory://cat.yaml\")\n    reader2 = reader.transform.CatalogMapper(getattr, \"shape\", transform=False)\n\n    cat2 = reader2.read()\n    assert \"one\" in cat2 and \"two\" in cat2\n    result1 = cat2[\"one\"].read()\n    assert result1 == (300, 3)\n    result2 = cat2[\"two\"].read()\n    assert result2 == (300, 1)\n"
  },
  {
    "path": "intake/readers/transform.py",
    "content": "\"\"\"Manipulate data: functions that change the data but not the container type\n\"\"\"\nfrom __future__ import annotations\n\nimport intake\nfrom intake.readers.convert import BaseConverter, SameType\nfrom intake.readers.utils import one_to_one\n\n\nclass DataFrameColumns(BaseConverter):\n    instances = one_to_one({\"pandas:DataFrame\", \"dask.dataframe:DataFrame\"})\n    func = \"pandas:DataFrame.loc\"\n\n    def run(self, x, columns, **_):\n        return x[columns]\n\n\nclass XarraySel(BaseConverter):\n    instances = one_to_one({\"xarray:Dataset\", \"xarray:DataArray\"})\n    func = \"xarray:Dataset.sel\"\n\n    def run(self, x, indexers, **_):\n        return x.sel(indexers)\n\n\nclass THREDDSCatToMergedDataset(BaseConverter):\n    instances = {\"intake.readers.catalogs:THREDDSCatalog\": \"xarray:Dataset\"}\n\n    def run(self, cat, path, driver=\"h5netcdf\", xarray_kwargs=None, concat_kwargs=None, **_):\n        \"\"\"Merges multiple datasets into a single datasets.\n\n        Recreates the merged-dataset functionality of intake-thredds\n\n        This source takes a THREDDS URL and a path to descend down, and calls the\n        combine function on all of the datasets found.\n\n        Parameters\n        ----------\n        url : str\n            Location of server\n        path : str, list of str\n            Subcats to follow; include glob characters (*, ?) in here for matching.\n        driver : str\n            Select driver to access data. Choose from 'netcdf' and 'opendap'.\n        xarray_kwargs: dict\n            kwargs to be passed to xr.open_dataset\n        concat_kwargs: dict\n            kwargs to be passed to xr.concat() filled by files opened by xr.open_dataset\n            previously\n        \"\"\"\n        import fnmatch\n\n        import xarray as xr\n\n        path = path.split(\"/\") if isinstance(path, str) else path\n        if driver not in [\"pydap\", \"h5netcdf\"]:\n            raise ValueError\n        xarray_kwargs = xarray_kwargs or {}\n        xarray_kwargs[\"engine\"] = driver\n        for i, part in enumerate(path):\n            if \"*\" not in part and \"?\" not in part:\n                cat = cat[part]\n            else:\n                break\n        path = \"/\".join(path)\n        cat = cat.read()\n        data = []\n        suffix = {\"pydap\": \"_DAP\", \"h5netcdf\": \"_CDF\"}[driver]\n        for name in list(cat):\n            if fnmatch.fnmatch(name[:-4], path) and name[-4:] == suffix:\n                data.append(cat[name].read(**xarray_kwargs))\n        if concat_kwargs:\n            return xr.concat(data, **concat_kwargs)\n        else:\n            return xr.combine_by_coords(data, combine_attrs=\"override\")\n\n\nclass PysparkColumns(BaseConverter):\n    instances = {\"pyspark.sql:DataFrame\": \"pyspark.sql:DataFrame\"}\n\n    def run(self, x, columns, **_):\n        return x.select(columns)\n\n\nclass Method(BaseConverter):\n    \"\"\"Call named method on object\n\n    Assumes output type is the same as input.\n    \"\"\"\n\n    instances = {\".*\": SameType}\n\n    def run(self, x, *args, method_name: str = \"\", **kw):\n        method = getattr(x, method_name)\n        if callable(method):\n            return method(*args, **kw)\n        else:\n            return method\n\n\nclass GetItem(BaseConverter):\n    \"\"\"Equivalent of x[item]\n\n    Assumes output type is the same as input.\n    \"\"\"\n\n    instances = {\".*\": SameType}\n    func = \"operator:getitem\"\n\n    def _read(self, item, data=None):\n        return data[item]\n\n\ndef identity(x):\n    return x\n\n\nclass CatalogMapper(BaseConverter):\n    instances = {\"intake.readers.entry:Catalog\": \"intake.readers.entry:Catalog\"}\n\n    def run(\n        self,\n        in_cat: intake.Catalog,\n        func,\n        *args,\n        transform=True,\n        name_arg=None,\n        read=False,\n        **kwargs,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        func: function to apply to each entry (as callable or string equivalent)\n        transform: do we expect this to be a named transform that intake\n            already knows about?\n        name_arg: if given, pass the entry name to the action to be\n            performed using this as the kwarg name\n        read: if True, execute the pipeline produced. This might be used\n            where the pipeline output is itself another reader.\n        \"\"\"\n        out = intake.Catalog()\n        for name in in_cat.entries:\n            if name_arg:\n                kwargs[name_arg] = name\n            if transform:\n                pipe = in_cat[name].__getattr__(func)(*args, **kwargs)\n            else:\n                if isinstance(func, str):\n                    func = intake.import_name(func)\n                pipe = in_cat[name].apply(func, *args, **kwargs)\n            if read:\n                out[name] = pipe.read()\n            else:\n                out[name] = pipe\n        return out\n"
  },
  {
    "path": "intake/readers/user_parameters.py",
    "content": "\"\"\"\nParametrization of data/reader entries, as they appear in Catalogs\n\nParameters can be used to template values across readers, wither to indicate\nchoices that a user wishes to make at run time, or to require the user to fill\nin details such as credentials. Providing options in this way is a simpler\nexperience for a user than replicating a reader/pipeline with different options.\n\nIn a catalog, user parameters can exist at the global scope, to be used anywhere,\nas a part of the data description, to be inherited by all readers of that data,\nor as part of the reader-specific descriptions. The value can be set in-place,\nor provided during ``read()``.\n\"\"\"\nfrom __future__ import annotations\n\nimport builtins\nimport os\nimport re\nfrom typing import Any, Iterable\n\nfrom intake import import_name, conf\nfrom intake.readers.utils import FormatWithPassthrough, SecurityError, Tokenizable\n\n\nclass BaseUserParameter(Tokenizable):\n    \"\"\"The base class allows for any default without checking/coercing\"\"\"\n\n    def __init__(self, default, description=\"\"):\n        self.default = default  #: the value to use without user input\n        self.description = description  #: what is the function of this parameter\n\n    def __repr__(self):\n        dic = {k: v for k, v in self.__dict__.items() if not k.startswith(\"_\")}\n        return f\"{type(self).__name__}, {self.description}\\n{dic}\"\n\n    def set_default(self, value):\n        \"\"\"Change the default, if it validates\"\"\"\n        value = self.coerce(value)\n        if self.validate(value):\n            self.default = value\n        else:\n            raise ValueError(\"Could not validate %s with %s\", value, self)\n\n    def with_default(self, value):\n        \"\"\"A new instance with different default, if it validates\n\n        (original object is left unchanged)\n        \"\"\"\n        import copy\n\n        up = copy.copy(self)\n        up.set_default(value)\n        return up\n\n    def coerce(self, value):\n        \"\"\"Change given type to one that matches this parameter's intent\"\"\"\n        return value\n\n    def _validate(self, value):\n        return True\n\n    def validate(self, value) -> bool:\n        \"\"\"Is the given value allowed by this parameter?\n\n        Exceptions are treated as False\n        \"\"\"\n        try:\n            return self._validate(value)\n        except (TypeError, ValueError):\n            return False\n\n    def to_dict(self):\n        dic = super().to_dict()\n        dic[\"cls\"] = self.qname()\n        return dic\n\n\nclass SimpleUserParameter(BaseUserParameter):\n    \"\"\"This class is enough for simple type coercion.\"\"\"\n\n    def __init__(self, dtype: type = object, **kw):\n        self.dtype = dtype.__name__\n        if self.dtype not in dir(builtins) or not isinstance(dtype, type):\n            raise ValueError(\"Only supports classes from the builtins module\")\n        super().__init__(**kw)\n\n    @property\n    def _dtype(self):\n        return getattr(builtins, self.dtype)\n\n    def coerce(self, value):\n        if not isinstance(value, self._dtype):\n            return self._dtype(value)  # works for dtype like str, int, list\n        return value\n\n    def _validate(self, value):\n        return isinstance(value, self._dtype)\n\n\nclass OptionsUserParameter(SimpleUserParameter):\n    \"\"\"One choice out of a given allow list\"\"\"\n\n    def __init__(self, options, dtype=object, **kw):\n        super().__init__(dtype=dtype, **kw)\n        self.options = {self.coerce(o) for o in options}\n\n    def _validate(self, value):\n        return self.coerce(value) in self.options\n\n\nclass NamedOptionsUserParameter(SimpleUserParameter):\n    \"\"\"One choice out of a given allow dictionary accessed by its keys\n\n    In this case, `dtype` is the types of the dictionary values, and\n    we have a separate \"keytype\" for the keys, str by default\n    \"\"\"\n\n    def __init__(self, options, default, dtype=object, keytype=str, **kw):\n        super().__init__(default=options[default], dtype=dtype, **kw)\n        self.keytype = keytype\n        self.options = options\n\n    def coerce(self, value):\n        return self.options.get(value, None)\n\n    def _validate(self, value):\n        if not isinstance(value, self.keytype):\n            value = self.keytype(value)\n        return value in self.options.values()\n\n\nclass MultiOptionUserParameter(OptionsUserParameter):\n    \"\"\"Multiple choices out of a given allow list/tuple\n\n    In this case, dtype is the type of each member of the list\n    \"\"\"\n\n    def __init__(self, options: list | tuple, dtype=object, **kw):\n        super().__init__(options=options, dtype=dtype, **kw)\n\n    def coerce_one(self, value):\n        return super().coerce(value)\n\n    def coerce(self, value):\n        return [self.coerce_one(v) for v in value]\n\n    def _validate(self, value):\n        return isinstance(value, (list, tuple)) and all(v in self.options for v in value)\n\n\nclass BoundedNumberUserParameter(SimpleUserParameter):\n    \"\"\"A number within a range bound\"\"\"\n\n    def __init__(self, dtype=float, max_value=None, min_value=None, **kw):\n        super().__init__(dtype=dtype, **kw)\n        self.max = max_value\n        self.min = min_value\n\n    def _validate(self, value):\n        out = True\n        if self.max:\n            out = out and self.max > value\n        if self.min:\n            out = out and self.min < value\n        return out\n\n\n# TODO: Date type and generic functions of user_parameters like date(value).day\n\ntemplates = {}\n\n\nclass NoMatch(ValueError):\n    ...\n\n\ndef register_template(name):\n    def wrapper(func):\n        regex = re.compile(f\"{name}[(]([^)]+)[)]\")\n        templates[regex] = func\n\n        def go(text):\n            m = regex.match(text)\n            if m:\n                return func(m)\n            raise NoMatch\n\n        return go\n\n    return wrapper\n\n\ntemplate_env = re.compile(r\"env[(]([^)]+)[)]\")\ntemplate_data = re.compile(r\"data[(]([^)]+)[)]\")\ntemplate_func = re.compile(r\"func[(]([^)]+)[)]\")\n\n\n@register_template(r\"env\")\ndef env(match, up):\n    \"\"\"Value from an environment variable\"\"\"\n    return os.getenv(match.groups()[0])\n\n\n@register_template(r\"data\")\ndef data(match, up):\n    \"\"\"The value from reading a dataset\n\n    Used in pipelines to point to the outputs of upstream readers\n    \"\"\"\n    # TODO: this might never be called, since Catalog._rehydrate does this job\n    from intake.readers.convert import Pipeline\n    from intake.readers.entry import ReaderDescription\n\n    var = match.groups()[0]\n    if \",\" in var:\n        var, part = var.split(\",\")\n    else:\n        part = None\n    thing = up[var.strip()]\n    if isinstance(thing, ReaderDescription):\n        thing = thing.to_reader(user_parameters=up)\n        if part and isinstance(thing, Pipeline):\n            thing = thing.first_n_stages(int(part))\n\n    return thing\n\n\n@register_template(r\"import\")\n@register_template(r\"func\")\ndef imp(match, up):\n    \"\"\"The result of importing the string, an arbitrary python object\n\n    Format of the input string is like \"{import(package.module:object)}\"\n    \"\"\"\n    if not conf[\"allow_import\"]:\n        from intake.readers.utils import SecurityError\n\n        raise SecurityError(\"Arbitrary imports are not allowed by the Intake config\")\n    return import_name(match.groups()[0])\n\n\n@register_template(r\"pickle64\")\ndef unpickle(match, up):\n    if not conf[\"allow_pickle\"]:\n        raise SecurityError(\"Unpickling is disallowed by the Intake config\")\n    import base64\n    import pickle\n\n    return pickle.loads(base64.b64decode(match.groups()[0].encode()))\n\n\ndef _set_values(up, arguments):\n    if isinstance(arguments, dict):\n        return {k: _set_values(up, v) for k, v in arguments.copy().items()}\n    elif isinstance(arguments, str) and arguments.startswith(\"{\") and arguments.endswith(\"}\"):\n        arg = arguments[1:-1]\n        if arg in up:\n            return up[arguments[1:-1]]\n        else:\n            for k, v in templates.items():\n                m = re.match(k, arg)\n                if m:\n                    return v(m, up)\n    if isinstance(arguments, str):\n        # missing env keys become empty strings\n        envdict = {f\"env({k})\": os.getenv(k) for k in template_env.findall(arguments)}\n        data = FormatWithPassthrough(**up)\n        data.update(envdict)\n        try:\n            out = arguments.format_map(data)  # missing keys remain unformatted, but don't raise\n        except ValueError:\n            # in case this is a string with genuine \"{\"s\n            out = arguments\n        return out\n    elif isinstance(arguments, Iterable):\n        return type(arguments)([_set_values(up, v) for v in arguments])\n    return arguments\n\n\ndef set_values(user_parameters: dict[str, BaseUserParameter], arguments: dict[str, Any]):\n    \"\"\"Walk kwargs and set the value and types of any parameters found\n\n    If one of arguments matches the name of a user_parameter, it will set the value of that\n    parameter before proceeding.\n    \"\"\"\n    up = user_parameters.copy()\n    for k, v in arguments.copy().items():\n        if k in user_parameters:\n            u = up[k]\n            if isinstance(u, BaseUserParameter):\n                up[k] = up[k].with_default(v)\n            else:\n                up[k] = v\n            arguments.pop(k)\n    for k, v in up.copy().items():\n        # v can be a literal DataDescription (from a reader) rather than a UP\n        if isinstance(getattr(v, \"default\", None), str):\n            for templ, func in templates.items():\n                m = re.match(templ, v.default)\n                if m:\n                    up[k] = up[k].with_default(func(m, up))\n            # m = template_env.match(v.default)\n            # if m:\n            #     up[k] = up[k].with_default(os.getenv(m.groups()[0]))\n            # m = template_func.match(v.default)\n            # if m:\n            #     var = m.groups()[0]\n            #     up[k] = up[k].with_default(import_name(var))\n\n    return _set_values(\n        {k: (u.default if isinstance(u, BaseUserParameter) else u) for k, u in up.items()},\n        arguments,\n    )\n"
  },
  {
    "path": "intake/readers/utils.py",
    "content": "from __future__ import annotations\n\nimport importlib.metadata\nimport numbers\nimport re\nimport typing\nfrom functools import lru_cache as cache\nfrom hashlib import md5\nfrom itertools import zip_longest\nfrom typing import Any, Iterable, Mapping\n\nfrom intake import import_name\n\n\nclass SecurityError(RuntimeError):\n    \"\"\"The given operation is disabled in the Intake config\"\"\"\n\n\ndef subclasses(cls: type) -> set:\n    \"\"\"Find all direct and indirect subclasses\n\n    Most recently created and most specialised classes come first\n    \"\"\"\n    out = set()\n    for cl in reversed(cls.__subclasses__()):\n        # TODO: if cls.check_imports exists and returns False, do we descend?\n        out |= subclasses(cl)\n        out.add(cl)\n    return out\n\n\ndef merge_dicts(*dicts: dict) -> dict:\n    \"\"\"Deep-merge dictionary values, latest value wins\n\n    Examples\n    --------\n    >>> merge_dicts({\"a\": {\"a\": 0, \"b\": 1}}, {\"a\": {\"a\": 1}, \"b\": 1})\n    {\"a\": {\"a\": 1, \"b\": 1}}, \"b\": 1)\n\n    >>> merge_dicts({\"a\": [None, True]}, {\"a\": [False, None]})\n    {\"a\": [False, True}\n    \"\"\"\n    if isinstance(dicts[0], dict):\n        out = {}\n        for dic in dicts:\n            for k, v in dic.items():\n                if k in out and isinstance(v, Iterable) and not isinstance(v, (bytes, str)):\n                    # deep-merge nested dicts\n                    out[k] = merge_dicts(out[k], v)\n                else:\n                    out[k] = v\n    elif isinstance(dicts[0], Iterable) and not isinstance(dicts[0], (bytes, str)):\n        stuff = []\n        for values in zip_longest(*(d for d in dicts if d is not None)):\n            stuff.append(merge_dicts(*values))\n\n        out = type(dicts[0])(stuff)\n    else:\n        out = next((d for d in dicts if d is not None), None)\n\n    return out\n\n\ndef nested_keys_to_dict(kw: dict[str, Any]) -> dict:\n    \"\"\"Nest keys of the form \"field.subfield.item\" into dicts\n\n    Examples\n    --------\n    >>> nested_keys_to_dict({\"field\": 0, \"deeper.field\": 1, \"deeper.other\": 2, \"deep.est.field\": 3})\n    {'field': 0, 'deeper': {'field': 1, 'other': 2}, 'deep': {'est': {'field': 3}}}\n\n    >>>  nested_keys_to_dict({\"deeper.1.field\": 1, \"list.1.1.1\": True, \"list.1.0\": False})\n    {'deeper': [None, {\"field\": 1}], \"list\": [None, [False, [None, True]]]}\n    \"\"\"\n    out = {}\n    for k, v in kw.items():\n        bits = k.split(\".\")\n        o = out\n        for bit, bit2 in zip(bits[:-1], bits[1:]):\n            if bit2.isnumeric():\n                bit2 = int(bit2)\n                newpart = [None] * (int(bit2) + 1)\n            else:\n                newpart = {}\n            if isinstance(o, dict):\n                o = o.setdefault(bit, newpart)\n            else:\n                if o[int(bit)] is None:\n                    o[int(bit)] = newpart\n                o = o[int(bit)]\n        bit = bits[-1]\n        if bit.isnumeric():\n            bit = int(bit)\n        o[bit] = v\n    return out\n\n\nfunc_or_method = re.compile(r\"<(function|method) ([^ ]+) at 0x[0-9a-f]+>\")\n\n\ndef find_funcs(val, tokens={}):\n    \"\"\"Walk nested dict/iterables, replacing functions with string package.mod:func form\"\"\"\n    import base64\n    import pickle\n\n    from intake.readers import BaseData, BaseReader\n\n    if isinstance(val, dict):\n        return {k: find_funcs(v, tokens=tokens) for k, v in val.items()}\n    elif isinstance(val, (str, bytes)):\n        return val\n    elif isinstance(val, Iterable):\n        # list, tuple, set-like\n        return type(val)([find_funcs(v, tokens=tokens) for v in val])\n    if isinstance(val, (BaseReader, BaseData)):\n        ent = val.to_entry()\n        find_funcs(ent, tokens)\n        tok = ent.token\n        tokens[tok] = val\n        return \"{data(%s)}\" % tok\n    if isinstance(val, Tokenizable):\n        return val.to_dict()\n    elif callable(val):\n        name = \"{func(%s)}\" % f\"{val.__module__}:{val.__name__}\"\n        if \"<locals>\" in name or \"__main__\" in name or getattr(val, \"__closure__\", False):\n            raise RuntimeError(\"Cannot store dynamically defined function: %s\", val)\n        return name\n    elif val is None or isinstance(val, (numbers.Number, BaseData, BaseReader)):\n        return val\n    else:\n        return \"{pickle64(%s)}\" % base64.b64encode(pickle.dumps(val)).decode()\n\n\nclass LazyDict(Mapping):\n    \"\"\"Subclass this to make lazy dictionaries, where getting values can be expensive\"\"\"\n\n    _keys = None\n\n    def __getitem__(self, item):\n        raise NotImplementedError\n\n    def __len__(self):\n        return len(self.keys())\n\n    def keys(self):\n        if self._keys is None:\n            self._keys = set(self)\n        return self._keys\n\n    def __contains__(self, item):\n        return item in self.keys()\n\n    def __iter__(self):\n        raise NotImplementedError\n\n\nclass PartlyLazyDict(LazyDict):\n    \"\"\"A dictionary-like, where some components may be lazy\"\"\"\n\n    def __init__(self, *mappings):\n        self.dic = {}\n        self.lazy = []\n        for mapping in mappings:\n            if isinstance(mapping, LazyDict):\n                self.lazy.append(mapping)\n            else:\n                self.dic.update(mapping)\n\n    def keys(self):\n        out = set(self.dic)\n        for mapping in self.lazy:\n            out.update(mapping)\n        return out\n\n    def __len__(self):\n        return len(self.keys())\n\n    def __iter__(self):\n        return iter(self.keys())\n\n    def __getitem__(self, item):\n        if item in self.dic:\n            return self.dic[item]\n        for mapping in self.lazy:\n            if item in mapping:\n                return mapping[item]\n        raise KeyError(item)\n\n    def __setitem__(self, key, value):\n        self.dic[key] = value\n\n    def update(self, data):\n        if isinstance(data, LazyDict):\n            self.lazy.append(data)\n        else:\n            self.dic.update(data)\n\n    def copy(self):\n        return PartlyLazyDict(self.dic.copy(), *self.lazy)\n\n\nclass FormatWithPassthrough(dict):\n    \"\"\"When calling .format(), use this to replace only those keys that are found\"\"\"\n\n    def __getitem__(self, item):\n        try:\n            return super().__getitem__(item)\n        except KeyError:\n            return \"{%s}\" % item\n\n\n@cache\ndef check_imports(*imports: Iterable[str]) -> bool:\n    \"\"\"See if required packages are importable, but don't import them\"\"\"\n    import sys\n\n    try:\n        for package in imports:\n            if package:\n                package in sys.modules or importlib.metadata.distribution(package)\n        return True\n    except (ImportError, ModuleNotFoundError, NameError):\n        return False\n\n\nclass Completable:\n    \"\"\"Helper mixin for classes with dynamic tab completion\"\"\"\n\n    @classmethod\n    @cache\n    def check_imports(cls):\n        return check_imports(*getattr(cls, \"imports\"))\n\n    @staticmethod\n    def tab_completion_fixer(item):\n        # just make this a function?\n        if item in {\n            \"_ipython_key_completions_\",\n            \"_ipython_display_\",\n            \"__wrapped__\",\n            \"_ipython_canary_method_should_not_exist_\",\n            \"_render_traceback_\",\n        }:\n            raise AttributeError\n        if item.startswith(\"_repr_\"):\n            raise AttributeError\n\n\nclass Tokenizable(Completable):\n    \"\"\"Provides reliable hash/eq support to classes that hold dict attributes\n\n    The convention is, that attributes starting with _ are not included in the\n    hash and so can be mutated. Changing a non-_ attribute should only be done\n    when making a new instance.\n    \"\"\"\n\n    _tok = None\n    _avoid = {\"metadata\"}\n\n    def _dic_for_comp(self):\n        # TODO: we don't consider metadata part of the token. Any others?\n        #  Do we just want to exclude others?\n        return {\n            k: find_funcs(v)\n            for k, v in self.__dict__.items()\n            if not k.startswith(\"_\") and k not in self._avoid\n        }\n\n    def _token(self):\n        dic = self._dic_for_comp()\n        dictxt = func_or_method.sub(r\"\\2\", str(dic))\n        return md5(f\"{self.qname()}|{dictxt}\".encode()).hexdigest()[:16]\n\n    @property\n    def token(self):\n        \"\"\"Token is computed from all non-_ attributes and then cached.\n\n        Even if those attributes are mutated, the token will not change, but the\n        resultant might or might not beequal to the original.\n        \"\"\"\n        if self._tok is None:\n            self._tok = self._token()\n        return self._tok\n\n    def __hash__(self):\n        \"\"\"Hash depends on class name and all non-_* attributes\"\"\"\n        return int(self.token, 16)\n\n    def __eq__(self, other):\n        return type(self) == type(other) and (\n            self.token == other.token or self.to_dict() == other.to_dict()\n        )\n\n    @classmethod\n    def qname(cls):\n        \"\"\"package.module:class name of this class, makes str for import_name\"\"\"\n        return f\"{cls.__module__}:{cls.__name__}\"\n\n    def to_dict(self):\n        \"\"\"Dictionary representation of the instances contents\"\"\"\n        return to_dict(self)\n\n    def pprint(self):\n        \"\"\"Produce nice text formatting of the instance's contents\"\"\"\n        from pprint import pp\n\n        pp(self.to_dict())\n\n    @classmethod\n    def from_dict(cls, data):\n        \"\"\"Recreate instance from the results of to_dict()\"\"\"\n        data = data.copy()\n        if \"cls\" in data:\n            cls = import_name(data.pop(\"cls\"))\n        obj = object.__new__(cls)\n        obj.__dict__.update(data)  # walk data\n        return obj\n\n\ndef to_dict(thing):\n    \"\"\"Serialise deep structure into JSON-like hierarchy, invoking to_dict() on intake instances\"\"\"\n    if isinstance(thing, dict):\n        return {k: to_dict(v) for k, v in thing.items()}\n    elif isinstance(thing, (tuple, list)):\n        return [to_dict(v) for v in thing]\n    elif isinstance(thing, Tokenizable):\n        return {k: to_dict(v) for k, v in thing.__dict__.items() if not k.startswith(\"_\")}\n    else:\n        return thing\n\n\ndef make_cls(cls: str | type, kwargs: dict):\n    \"\"\"Recreate class instance from class/kwargs pair\"\"\"\n    if isinstance(cls, str):\n        cls = import_name(cls)\n    return cls(**kwargs)\n\n\ndef descend_to_path(path: str | list, kwargs: dict | list | tuple, name: str = \"\"):\n    \"\"\"Find the value at the location `path` in the deeply nested dict `kwargs`\n\n    If a name is given, replace that value by \"{name}\" - used by parameter\n    extraction.\n    \"\"\"\n    if isinstance(path, str):\n        path = path.split(\".\")\n    part = path.pop(0)\n    if part.isnumeric():\n        part = int(part)\n    if path:\n        return descend_to_path(path, kwargs[part], name)\n    out = kwargs[part]\n    if name:\n        kwargs[part] = \"{%s}\" % name\n    return out\n\n\ndef extract_by_path(path: str, cls: type, name: str, kwargs: dict, **kw) -> tuple:\n    \"\"\"Walk kwargs, replacing dotted keys in path by UserParameter template\"\"\"\n    value = descend_to_path(path, kwargs, name)\n    up = cls(default=value, **kw)\n    return merge_dicts(kwargs, nested_keys_to_dict({path: \"{%s}\" % name})), up\n\n\ndef _by_value(val, up, name):\n    if isinstance(val, dict):\n        return {k: _by_value(v, up, name) for k, v in val.items()}\n    elif isinstance(val, Iterable) and not isinstance(val, (str, bytes)):\n        return type(val)([_by_value(v, up, name) for v in val])\n    elif isinstance(val, str) and isinstance(up.default, str) and up.default in val:\n        return val.replace(up.default, \"{%s}\" % name)\n    else:\n        try:\n            if isinstance(val, (str, bytes)) and up.default in val:\n                return val.replace(up.default, \"{%s}\" % name)\n            if val == up.default:\n                return \"{%s}\" % name\n        except (TypeError, ValueError):\n            pass\n        return val\n\n\ndef extract_by_value(value: Any, cls: type, name: str, kwargs: dict, **kw) -> tuple:\n    \"\"\"Walk kwargs, replacing given value with UserParameter placeholder\"\"\"\n    up = cls(default=value, **kw)\n    kw = _by_value(kwargs, up, name)\n    return kw, up\n\n\ndef replace_values(val, needle, replace):\n    \"\"\"Find `needle` in the given values and replace with `replace`\n\n    Useful for removing sensitive values from kwargs and replacing with functions\n    like \"{env(...)}\".\n    \"\"\"\n    if isinstance(val, dict):\n        return {k: replace_values(v, needle, replace) for k, v in val.items()}\n    elif isinstance(val, Iterable) and not isinstance(val, (str, bytes)):\n        return type(val)([replace_values(v, needle, replace) for v in val])\n    try:\n        if val == needle:\n            return replace\n        elif isinstance(val, (str, bytes)):\n            return val.replace(needle, replace)\n    except (ValueError, TypeError):\n        pass\n    return val\n\n\ndef one_to_one(it: Iterable) -> dict:\n    return {_: _ for _ in it}\n\n\ndef all_to_one(it: Iterable, one: Any) -> dict:\n    return {_: one for _ in it}\n\n\ns1 = re.compile(\"(.)([A-Z][a-z]+)\")\ns2 = re.compile(\"([a-z0-9])([A-Z])\")\n\n\n@cache\ndef camel_to_snake(name: str) -> str:\n    # https://stackoverflow.com/a/1176023/3821154\n    name = s1.sub(r\"\\1_\\2\", name)\n    return s2.sub(r\"\\1_\\2\", name).lower()\n\n\n@cache\ndef snake_to_camel(name: str) -> str:\n    # https://stackoverflow.com/a/1176023/3821154\n    return \"\".join(word.title() for word in name.split(\"_\"))\n\n\ndef pattern_to_glob(pattern: str) -> str:\n    \"\"\"\n    Convert a path-as-pattern into a glob style path\n\n    Uses the pattern's indicated number of '?' instead of '*' where an int was specified.\n\n    Parameters\n    ----------\n    pattern : str\n        Path as pattern optionally containing format_strings\n\n    Returns\n    -------\n    glob_path : str\n        Path with int format strings replaced with the proper number of '?' and '*' otherwise.\n\n    Examples\n    --------\n    >>> pattern_to_glob('{year}/{month}/{day}.csv')\n    '*/*/*.csv'\n    >>> pattern_to_glob('{year:4}/{month:2}/{day:2}.csv')\n    '????/??/??.csv'\n    >>> pattern_to_glob('data/{year:4}{month:02}{day:02}.csv')\n    'data/????????.csv'\n    >>> pattern_to_glob('data/*.csv')\n    'data/*.csv'\n    \"\"\"\n    # https://github.com/intake/intake/issues/776#issuecomment-1917737732 by @JessicaS11\n    from string import Formatter\n\n    fmt = Formatter()\n    glob_path = \"\"\n    for literal_text, field_name, format_specs, _ in fmt.parse(format_string=pattern):\n        glob_path += literal_text\n        if field_name and (glob_path != \"*\"):\n            try:\n                glob_path += \"?\" * int(format_specs)\n            except ValueError:\n                glob_path += \"*\"\n    return glob_path\n\n\ndef safe_dict(x):\n    \"\"\"Make a dict or list-like int a form you can JSON serialize\"\"\"\n    if isinstance(x, str):\n        return x\n    if isinstance(x, typing.Mapping):\n        return {k: safe_dict(v) for k, v in x.items()}\n    if isinstance(x, typing.Iterable):\n        return [safe_dict(v) for v in x]\n    return str(x)\n\n\ndef port_in_use(host, port=None):\n    import socket\n\n    if isinstance(host, tuple):\n        host, port = host\n    elif isinstance(host, str) and host.startswith(\"http\"):\n        from urllib.parse import urlparse\n\n        parsed = urlparse(host)\n        host = parsed.hostname\n        if parsed.port is None:\n            port = port\n\n    if (port is None) or (host is None):\n        raise ValueError(f\"host={host} and port={port} is not valid.\")\n\n    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n    try:\n        s.connect((host, int(port)))\n        s.shutdown(2)\n        return True\n    except:  # noqa: E722\n        return False\n\n\ndef find_free_port():\n    import socket\n    from contextlib import closing\n\n    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:\n        s.bind((\"\", 0))\n        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)\n        return s.getsockname()[1]\n\n\ndef _is_tok(s: str) -> bool:\n    \"\"\"Check if a string is a valid token\"\"\"\n    if len(s) == 16 and not s.isupper():\n        try:\n            int(s, 16)\n            return True\n        except ValueError:\n            return False\n    return False\n"
  },
  {
    "path": "intake/source/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport logging\nfrom collections.abc import MappingView\n\nfrom intake.source.base import DataSource\nfrom intake.source.discovery import drivers\n\nlogger = logging.getLogger(\"intake\")\n\n\nclass DriverRegistry(MappingView):\n    \"\"\"Dict of driver: DataSource class\n\n    If the value object is a EntryPoint or str, will load it when accesses, which\n    does the import.\n    \"\"\"\n\n    def __init__(self, drivers_source=drivers):\n        self.drivers = drivers_source\n\n    def __getitem__(self, item):\n        it = self.drivers.enabled_plugins()[item]\n        if hasattr(it, \"load\"):\n            return it.load()\n        if isinstance(it, str):\n            return import_name(it)\n        elif issubclass(it, DataSource):\n            return it\n        raise ValueError\n\n    def __iter__(self):\n        return iter(self.drivers.enabled_plugins())\n\n    def keys(self):\n        return list(self)\n\n    def __len__(self):\n        return len(self.drivers.enabled_plugins())\n\n    def __repr__(self):\n        return \"\"\"<Intake driver registry>\"\"\"\n\n    def __contains__(self, item):\n        return item in self.keys()\n\n\nregistry = DriverRegistry()\nregister_driver = drivers.register_driver\nunregister_driver = drivers.unregister_driver\n\n# A set of fully-qualified package.module.Class mappings\nclasses = {}\n\n\ndef import_name(name):\n    import importlib\n\n    if \":\" in name:\n        if name.count(\":\") > 1:\n            raise ValueError(\"Cannot decipher name to import: %s\", name)\n        mod, rest = name.split(\":\")\n        bit = importlib.import_module(mod)\n        for part in rest.split(\".\"):\n            bit = getattr(bit, part)\n        return bit\n    else:\n        mod, cls = name.rsplit(\".\", 1)\n        module = importlib.import_module(mod)\n        return getattr(module, cls)\n\n\ndef get_plugin_class(name):\n    if name in registry:\n        return registry[name]\n    if \".\" not in name:\n        logger.debug('Plugin name \"%s\" not known' % name)\n        return None\n    if name not in classes:\n        try:\n            classes[name] = import_name(name)\n        except (KeyError, NameError, ImportError):\n            logger.debug('Failed to import \"%s\"' % name)\n    return classes.get(name, None)\n"
  },
  {
    "path": "intake/source/base.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\"\"\"\nBase classes for Data Loader interface\n\"\"\"\n\nfrom yaml import dump\n\nfrom ..utils import DictSerialiseMixin, pretty_describe\n\n\nclass Schema(dict):\n    def __getattr__(self, item):\n        return self[item]\n\n\nclass NoEntry(AttributeError):\n    pass\n\n\nclass DataSourceBase(DictSerialiseMixin):\n    \"\"\"An object which can produce data\n\n    This is the base class for all Intake plugins, including catalogs and\n    remote (server) data objects. To produce a new plugin commonly involves\n    subclassing this definition and overriding some or all of the methods.\n\n    This class is not useful in itself, most methods raise NotImplemented.\n    \"\"\"\n\n    name = None\n    version = None\n    container = None\n    partition_access = False\n    description = None\n    dtype = None\n    shape = None\n    npartitions = 0\n    _schema = None\n    on_server = False\n    cat = None  # the cat from which this source was made\n    _entry = None\n    metadata = {}\n\n    def __init__(self, storage_options=None, metadata=None):\n        # default data\n        self.metadata = metadata or {}\n        if isinstance(self.metadata, dict) and storage_options is None:\n            storage_options = self._captured_init_kwargs.get(\"storage_options\", {})\n        self.storage_options = storage_options\n\n    def _get_schema(self):\n        \"\"\"Subclasses should return an instance of base.Schema\"\"\"\n        raise NotImplementedError\n\n    def _get_partition(self, i):\n        \"\"\"Subclasses should return a container object for this partition\n\n        This function will never be called with an out-of-range value for i.\n        \"\"\"\n        raise NotImplementedError\n\n    def __eq__(self, other):\n        return (\n            type(self) is type(other)\n            and self._captured_init_args == other._captured_init_args\n            and self._captured_init_kwargs == other._captured_init_kwargs\n        )\n\n    def __hash__(self):\n        return hash((type(self), str(self._captured_init_args), str(self._captured_init_kwargs)))\n\n    def _close(self):\n        \"\"\"Subclasses should close all open resources\"\"\"\n        raise NotImplementedError\n\n    def _load_metadata(self):\n        \"\"\"load metadata only if needed\"\"\"\n        if self._schema is None:\n            self._schema = self._get_schema()\n            self.dtype = self._schema.dtype\n            self.shape = self._schema.shape\n            self.npartitions = self._schema.npartitions\n            self.metadata.update(self._schema.extra_metadata)\n\n    def _yaml(self):\n        import inspect\n\n        kwargs = self._captured_init_kwargs.copy()\n        meta = kwargs.pop(\"metadata\", self.metadata) or {}\n        kwargs.update(\n            dict(\n                zip(\n                    inspect.signature(self.__init__).parameters,\n                    self._captured_init_args,\n                )\n            )\n        )\n        data = {\n            \"sources\": {\n                self.name: {\n                    \"driver\": self.classname,\n                    \"description\": self.description or \"\",\n                    \"metadata\": meta,\n                    \"args\": kwargs,\n                }\n            }\n        }\n        return data\n\n    def yaml(self):\n        \"\"\"Return YAML representation of this data-source\n\n        The output may be roughly appropriate for inclusion in a YAML\n        catalog. This is a best-effort implementation\n        \"\"\"\n        data = self._yaml()\n        return dump(data, default_flow_style=False)\n\n    def _ipython_display_(self):\n        \"\"\"Display the entry as a rich object in an IPython session.\"\"\"\n        from IPython.display import display\n\n        data = self._yaml()[\"sources\"]\n        contents = dump(data, default_flow_style=False)\n        display(\n            {\"application/yaml\": contents, \"text/plain\": pretty_describe(contents)},\n            metadata={\"application/json\": {\"root\": self.name}},\n            raw=True,\n        )\n\n    def __repr__(self):\n        return self.yaml()\n\n    @property\n    def is_persisted(self):\n        \"\"\"The base class does not interact with persistence\"\"\"\n        return False\n\n    @property\n    def has_been_persisted(self):\n        \"\"\"The base class does not interact with persistence\"\"\"\n        return False\n\n    def _get_cache(self, urlpath):\n        \"\"\"The base class does not interact with caches\"\"\"\n        return [urlpath]\n\n    def discover(self):\n        \"\"\"Open resource and populate the source attributes.\"\"\"\n        self._load_metadata()\n\n        return dict(\n            dtype=self.dtype,\n            shape=self.shape,\n            npartitions=self.npartitions,\n            metadata=self.metadata,\n        )\n\n    def read(self):\n        \"\"\"Load entire dataset into a container and return it\"\"\"\n        if not self.partition_access or self.npartitions == 1:\n            return self._get_partition(0)\n        else:\n            raise NotImplementedError\n\n    def read_chunked(self):\n        \"\"\"Return iterator over container fragments of data source\"\"\"\n        self._load_metadata()\n        for i in range(self.npartitions):\n            yield self._get_partition(i)\n\n    def read_partition(self, i):\n        \"\"\"Return a part of the data corresponding to i-th partition.\n\n        By default, assumes i should be an integer between zero and npartitions;\n        override for more complex indexing schemes.\n        \"\"\"\n        self._load_metadata()\n        if i < 0 or i >= self.npartitions:\n            raise IndexError(\"%d is out of range\" % i)\n\n        return self._get_partition(i)\n\n    def to_dask(self):\n        \"\"\"Return a dask container for this data source\"\"\"\n        raise NotImplementedError\n\n    def to_spark(self):\n        \"\"\"Provide an equivalent data object in Apache Spark\n\n        The mapping of python-oriented data containers to Spark ones will be\n        imperfect, and only a small number of drivers are expected to be able\n        to produce Spark objects. The standard arguments may b translated,\n        unsupported or ignored, depending on the specific driver.\n\n        This method requires the package intake-spark\n        \"\"\"\n        raise NotImplementedError\n\n    @property\n    def entry(self):\n        if self._entry is None:\n            raise NoEntry(\"Source was not made from a catalog entry\")\n        return self._entry\n\n    def configure_new(self, **kwargs):\n        \"\"\"Create a new instance of this source with altered arguments\n\n        Enables the picking of options and re-evaluating templates from any\n        user-parameters associated with\n        this source, or overriding any of the init arguments.\n\n        Returns a new data source instance. The instance will be recreated from\n        the original entry definition in a catalog **if** this source was originally\n        created from a catalog.\n        \"\"\"\n        if self._entry is not None:\n            kw = {k: v for k, v in self._captured_init_kwargs.items() if k in self._passed_kwargs}\n            kw.update(kwargs)\n            obj = self._entry(**kw)\n            obj._entry = self._entry\n            return obj\n        else:\n            kw = self._captured_init_kwargs.copy()\n            kw.update(kwargs)\n            return type(self)(*self._captured_init_args, **kw)\n\n    __call__ = get = configure_new  # compatibility aliases\n\n    def describe(self):\n        \"\"\"Description from the entry spec\"\"\"\n        return self.entry.describe()\n\n    def close(self):\n        \"\"\"Close open resources corresponding to this data source.\"\"\"\n        self._close()\n\n    # Boilerplate to make this object also act like a context manager\n    def __enter__(self):\n        self._load_metadata()\n        return self\n\n    def __exit__(self, exc_type, exc_value, traceback):\n        self.close()\n\n\nclass DataSource(DataSourceBase):\n    \"\"\"A Data Source will all optional functionality\n\n    When subclassed, child classes will have the base data source functionality,\n    plus caching, plotting and persistence abilities.\n    \"\"\"\n\n    pass\n\n\nclass PatternMixin:\n    ...\n"
  },
  {
    "path": "intake/source/csv.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nfrom __future__ import annotations\n\nfrom intake.source.base import DataSource\nfrom intake.readers.datatypes import CSV\nfrom intake.readers.readers import DaskCSV, PandasCSV, SparkDataFrame\n\n\nclass CSVSource(DataSource):\n    \"\"\"Read CSV files into dataframes\n\n    Backward compatibility for V1 catalogs.\n    \"\"\"\n\n    name = \"csv\"\n    container = \"dataframe\"\n\n    def __init__(self, urlpath, storage_options=None, metadata=None, **kwargs):\n        super().__init__(metadata=metadata)\n        self.data = CSV(url=urlpath, storage_options=storage_options, metadata=metadata)\n        self.kwargs = kwargs\n\n    def discover(self):\n        return PandasCSV(self.data).discover(**self.kwargs)\n\n    def to_dask(self):\n        return DaskCSV(self.data).read(**self.kwargs)\n\n    def read(self):\n        return PandasCSV(self.data).read(**self.kwargs)\n\n    def to_spark(self):\n        return SparkDataFrame(self.data).read(**self.kwargs)\n"
  },
  {
    "path": "intake/source/derived.py",
    "content": "from copy import deepcopy\nfrom functools import lru_cache, partial\nfrom textwrap import dedent\n\nfrom .. import open_catalog\nfrom ..catalog.exceptions import CatalogException\nfrom . import import_name\nfrom .base import DataSource, Schema\n\n\ndef _kwargs_string(kwargs_dict):\n    return \", \".join([f\"{k}={v}\" for k, v in kwargs_dict.items()])\n\n\nclass PipelineStepError(CatalogException):\n    pass\n\n\nclass MissingTargetError(CatalogException):\n    def __init__(self, source, step_index, method, target):\n        self.message = f\"{source} step {step_index} {method}: {target} is not listed in the targets key of this pipeline.\"\n        super().__init__(self.message)\n\n\ncached_cats = lru_cache(10)(open_catalog)\n\n\ndef get_source(target, cat, kwargs, cat_kwargs):\n    if \":\" in target:\n        caturl, target = target.rsplit(\":\", 1)\n        cat = cached_cats(caturl, **cat_kwargs)\n    if cat:\n        return cat[target].configure_new(**kwargs)\n    # for testing only\n    return target  # pragma: no cover\n\n\nclass AliasSource(DataSource):\n    \"\"\"Refer to another named source, unmodified\n\n    The purpose of an Alias is to be able to refer to other source(s) in the\n    same catalog or an external catalog, perhaps leaving the choice of which\n    target to load up to the user. This source makes no sense outside of a catalog.\n\n    The \"target\" for an aliased data source will normally be a string. In the\n    simple case, it is the name of a data source in the same catalog. However,\n    we use the syntax \"catalog:source\" to refer to sources in other catalogs,\n    where the part before \":\" will be passed to intake.open_catalog,\n    together with any keyword arguments from cat_kwargs.\n\n    In this case, the output of the target source is not modified, but this\n    class acts as a prototype 'derived' source for processing the output of\n    some standard driver.\n\n    After initial discovery, the source's container and other details will be\n    updated from the target; initially, the AliasSource container is not\n    any standard.\n    \"\"\"\n\n    container = \"other\"\n    version = 2\n    name = \"alias\"\n\n    def __init__(self, target, mapping=None, metadata=None, kwargs=None, cat_kwargs=None):\n        \"\"\"\n\n        Parameters\n        ----------\n        target: str\n            Name of the source to load, must be a key in the same catalog\n        mapping: dict or None\n            If given, use this to map the string passed as ``target`` to\n            entries in the catalog\n        metadata: dict or None\n            Extra metadata to associate\n        kwargs: passed on to the target\n        cat_kwargs: passed on to the target catalog if target is in\n            another catalog\n        \"\"\"\n        super(AliasSource, self).__init__(metadata)\n        self.target = target\n        self.mapping = mapping or {target: target}\n        self.kwargs = kwargs or {}\n        self.cat_kwargs = cat_kwargs or {}\n        self.metadata = metadata\n        self.source = None\n\n    def _get_source(self):\n        if self.cat is None:\n            raise ValueError(\"AliasSource cannot be used outside a catalog\")\n        if self.source is None:\n            target = self.mapping[self.target]\n            self.source = get_source(target, self.cat, self.kwargs, self.cat_kwargs)\n            self.metadata = self.source.metadata.copy()\n            self.container = self.source.container\n            self.partition_access = self.source.partition_access\n            self.description = self.source.description\n\n    def discover(self):\n        self._get_source()\n        return self.source.discover()\n\n    def read(self):\n        self._get_source()\n        return self.source.read()\n\n    def read_partition(self, i):\n        self._get_source()\n        return self.source.read_partition(i)\n\n    def read_chunked(self):\n        self._get_source()\n        return self.source.read_chunked()\n\n    def to_dask(self):\n        self._get_source()\n        return self.source.to_dask()\n\n\ndef first(targets, cat, kwargs, cat_kwargs):\n    \"\"\"A target chooser that simply picks the first from the given list\n\n    This is the default, particularly for the case of only one element in\n    the list\n    \"\"\"\n    targ = targets[0]\n    return get_source(targ, cat, kwargs.get(targ, {}), cat_kwargs)\n\n\ndef first_discoverable(targets, cat, kwargs, cat_kwargs):\n    \"\"\"A target chooser: the first target for which discover() succeeds\n\n    This may be useful where some drivers are not importable, or some\n    sources can be available only sometimes.\n    \"\"\"\n    for t in targets:\n        try:\n            s = get_source(t, cat, kwargs.get(t, {}), cat_kwargs)\n            s.discover()\n            return s\n        except Exception:\n            pass\n    raise RuntimeError(\"No targets succeeded at discover()\")\n\n\nclass DerivedSource(DataSource):\n    \"\"\"Base source deriving from another source in the same catalog\n\n    Target picking and parameter validation are performed here, but\n    you probably want to subclass from one of the more specific\n    classes like ``DataFrameTransform``.\n    \"\"\"\n\n    input_container = \"other\"  # no constraint\n    container = \"other\"  # to be filled in per instance at access time\n    required_params = []  # list of kwargs that must be present\n    optional_params = {}  # optional kwargs with defaults\n\n    def __init__(\n        self,\n        targets,\n        target_chooser=first,\n        target_kwargs=None,\n        cat_kwargs=None,\n        container=None,\n        metadata=None,\n        **kwargs,\n    ):\n        \"\"\"\n\n        Parameters\n        ----------\n        targets: list of string or DataSources\n            If string(s), refer to entries of the same catalog as this Source\n        target_chooser: function to choose between targets\n            function(targets, cat) -> source, or a fully-qualified dotted string pointing\n            to it\n        target_kwargs: dict of dict with keys matching items of targets\n        cat_kwargs: to pass to intake.open_catalog, if the target is in\n            another catalog\n        container: str (optional)\n            Assumed output container, if known/different from input\n\n        [Note: the exact form of target_kwargs and cat_kwargs may be\n        subject to change]\n        \"\"\"\n        self.targets = targets\n        self._chooser = target_chooser if callable(target_chooser) else import_name(target_chooser)\n        self._kwargs = target_kwargs or {}\n        self._source = None\n        self._params = kwargs\n        self._cat_kwargs = cat_kwargs or {}\n        if container:\n            self.container = container\n        self._validate_params()\n        super().__init__(metadata=metadata)\n\n    def _validate_params(self):\n        \"\"\"That all required params are present and that optional types match\"\"\"\n        assert set(self.required_params) - set(self._params) == set()\n        for par, val in self.optional_params.items():\n            if par not in self._params:\n                self._params[par] = val\n\n    def _pick(self):\n        \"\"\"Pick the source from the given targets\"\"\"\n        self._source = self._chooser(self.targets, self.cat, self._kwargs, self._cat_kwargs)\n        if self.input_container != \"other\":\n            assert self._source.container == self.input_container\n\n        self.metadata[\"target\"] = self._source.metadata\n        if self.container is None:\n            self.container = self._source.container\n\n\nclass GenericTransform(DerivedSource):\n    required_params = [\"transform\", \"transform_kwargs\"]\n    optional_params = {\"allow_dask\": True}\n    \"\"\"\n    Perform an arbitrary function to transform an input\n\n        transform: function to perform transform\n            function(container_object) -> output, or a fully-qualified dotted string pointing\n            to it\n        transform_params: dict\n            The keys are names of kwargs to pass to the transform function. Values are either\n            concrete values to pass; or param objects which can be made into widgets (but\n            must have a default value) - or a spec to be able to make these objects.\n        allow_dask: bool (optional, default True)\n            Whether to_dask() is expected to work, which will in turn call the\n            target's to_dask()\n    \"\"\"\n\n    def _validate_params(self):\n        super()._validate_params()\n        transform = self._params[\"transform\"]\n        self._transform = transform if callable(transform) else import_name(transform)\n\n    def _get_schema(self):\n        \"\"\"We do not know the schema of a generic transform\"\"\"\n        self._pick()\n        return Schema()\n\n    def to_dask(self):\n        self._get_schema()\n        if not self._params[\"allow_dask\"]:\n            raise ValueError(\n                \"This transform is not compatible with Dask\" \"because it has use_dask=False\"\n            )\n        return self._transform(self._source.to_dask(), **self._params[\"transform_kwargs\"])\n\n    def read(self):\n        self._get_schema()\n        return self._transform(self._source.read(), **self._params[\"transform_kwargs\"])\n\n\nclass DataFrameTransform(GenericTransform):\n    \"\"\"Transform where the input and output are both Dask-compatible dataframes\n\n    This derives from GenericTransform, and you must supply ``transform`` and\n    any ``transform_kwargs``.\n    \"\"\"\n\n    input_container = \"dataframe\"\n    container = \"dataframe\"\n    optional_params = {}\n    _df = None\n\n    def to_dask(self):\n        if self._df is None:\n            self._pick()\n            self._df = self._transform(self._source.to_dask(), **self._params[\"transform_kwargs\"])\n        return self._df\n\n    def _get_schema(self):\n        \"\"\"load metadata only if needed\"\"\"\n        self.to_dask()\n        return Schema(\n            dtype=self._df.dtypes,\n            shape=(None, len(self._df.columns)),\n            npartitions=self._df.npartitions,\n            metadata=self.metadata,\n        )\n\n    def read(self):\n        return self.to_dask().compute()\n\n\nclass Columns(DataFrameTransform):\n    \"\"\"Simple dataframe transform to pick columns\n\n    Given as an example of how to make a specific dataframe transform.\n    Note that you could use DataFrameTransform directly, by writing a\n    function to choose the columns instead of a method as here.\n    \"\"\"\n\n    input_container = \"dataframe\"\n    container = \"dataframe\"\n    required_params = [\"columns\"]\n\n    def __init__(self, columns, **kwargs):\n        \"\"\"\n        columns: list of labels (usually str) or slice\n            Columns to choose from the target dataframe\n        \"\"\"\n        # this class wants requires \"columns\", but DataFrameTransform\n        # uses \"transform_kwargs\", which we don't need since we use a method for the\n        # transform\n        kwargs.update(transform=self.pick_columns, columns=columns, transform_kwargs={})\n        super().__init__(**kwargs)\n\n    def pick_columns(self, df):\n        return df[self._params[\"columns\"]]\n\n\nclass DataFramePipeline(DataFrameTransform):\n    \"\"\"Apply a sequence of transformations over a DataFrame\n\n    The sequence of steps is defined as a list-of-dicts under\n    the key \"steps\". Each step requires a \"method\" key and\n    can optionally take \"kwargs\".\n\n    loc/iloc are not supported, but query is recommended\n    for filtering.\n\n    A special method called 'cols' enables column selection\n    and takes the kwarg 'columns', which can be a single value\n    or a list of column names.\n\n    merge, join, and assign will look for a source defined in\n    the targets key of the pipeline.\n\n    A special method called 'concat' will perform dd.concat\n    operations where the list of dataframes to concat is\n    provided in the kwarg 'dfs'.\n\n    If a method cannot be found as an attribute of the output\n    of the previous step, the pipeline will attempt to import\n    the method and apply it.\n\n    Here's an example of performing a groupby, selecting two\n    columns and computing the mean.\n\n    by_origin:\n      driver: intake.source.derived.DataFramePipeline\n      args:\n        targets:\n          - auto\n        steps:\n          - method: groupby\n            kwargs:\n              by: origin\n          - method: cols\n            kwargs:\n              columns:\n                - hp\n                - mpg\n          - method: mean\n    \"\"\"\n\n    input_container = \"dataframe\"\n    container = \"dataframe\"\n    required_params = [\"steps\"]\n\n    def __init__(self, steps, **kwargs):\n        kwargs.update(transform=self.pipeline, steps=steps, transform_kwargs={})\n        super().__init__(**kwargs)\n        self._sources = {}\n\n    def _get_sources(self):\n        for target in self.targets:\n            s = get_source(target, self.cat, self._kwargs, self._cat_kwargs).to_dask()\n            self._sources[target] = s\n\n    def pipeline(self, df):\n        \"\"\"Apply pipeline steps\"\"\"\n        if not self._sources:\n            self._get_sources()\n\n        for idx, step in enumerate(self._params[\"steps\"]):\n            method = step[\"method\"]\n            kwargs = deepcopy(step.get(\"kwargs\", {}))\n\n            if callable(method):\n                func = partial(method, df)\n\n            elif method in (\"loc\", \"iloc\"):\n                msg = dedent(\n                    \"\"\"\\\n                    iloc and loc are not supported. To select one or more columns use\n\n                    - method: cols\n                      kwargs:\n                        columns: <single value or or list>\n\n                    To filter use the the query method\n\n                    - method: query\n                      kwargs:\n                        arg: A > 2\n                      \"\"\"\n                )\n                raise ValueError(msg)\n\n            # this will catch accessor methods like .dt.<method> and .str.<method>\n            elif method.count(\".\") == 1 and hasattr(df, method.split(\".\")[0]):\n                sel, f = method.split(\".\")\n                mod = getattr(df, sel)\n                func = getattr(mod, f)\n\n            elif method == \"cols\":\n                columns = kwargs.pop(\"columns\")\n                func = partial(df.__getitem__, columns)\n\n            elif method == \"assign\":\n                to_assign = {}\n                for col, value in kwargs.items():\n                    if value in self.targets:\n                        to_assign[col] = self._sources[value]\n                    else:\n                        to_assign[col] = value\n                kwargs = to_assign\n                func = df.assign\n\n            elif method == \"join\":\n                other = kwargs[\"other\"]\n                if not isinstance(other, list):\n                    other = [other]\n\n                kwargs[\"other\"] = []\n                for s in other:\n                    if s in self.targets:\n                        kwargs[\"other\"].append(self._sources[s])\n                    else:\n                        raise MissingTargetError(self.name, idx + 1, method, s)\n                func = df.join\n\n            elif method == \"merge\":\n                right = kwargs[\"right\"]\n                source = self._sources.get(right)\n                if source is None:\n                    raise MissingTargetError(self.name, idx + 1, method, right)\n                else:\n                    kwargs[\"right\"] = source\n                func = df.merge\n\n            elif method in (\"apply\", \"transform\"):\n                kwargs_func = kwargs.pop(\"func\")\n                f = kwargs_func if callable(kwargs_func) else import_name(kwargs_func)\n                func = partial(getattr(df, method), f)\n\n            elif method == \"concat\":\n                objs = kwargs[\"dfs\"]\n                kwargs[\"dfs\"] = [self._sources[s] for s in objs]\n                func = import_name(\"dask.dataframe.concat\")\n\n            else:\n                try:\n                    func = getattr(df, method)\n                except AttributeError:\n                    func = partial(import_name(method), df)\n\n            try:\n                df = func(**kwargs)\n            except Exception as e:\n                original_kwargs = step.get(\"kwargs\", {})\n                s = _kwargs_string(original_kwargs)\n                msg = dedent(\n                    f\"\"\"\\\n                    DataFramePipeline source {self.name} step {idx+1} failed\n                    {method}({s})\n\n                    {repr(e)}\n                    \"\"\"\n                )\n                raise PipelineStepError(msg) from e\n\n        return df\n"
  },
  {
    "path": "intake/source/discovery.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom importlib.metadata import entry_points\nimport logging\nimport re\nimport warnings\n\nfrom ..config import conf\n\nlogger = logging.getLogger(\"intake\")\n\n\nclass DriverSouces:\n    \"\"\"\n    Handles the various ways in which drivers can be known to Intake\n    \"\"\"\n\n    def __init__(self, config=None, do_scan=None):\n        \"\"\"\n\n        Parameters\n        ----------\n        config: intake.config.Config oinstance, optional\n            If not given, will use ``intake.config.conf`` singleton\n        do_scan: bool or None\n            Whether to scan packages with names ``intake_*`` for valid drivers.\n            This is for backward compatibility only. If not given, value comes\n            from the config key \"package_scan\", default False.\n        \"\"\"\n        self.conf = config or conf\n        self.do_scan = do_scan\n        self._scanned = None\n        self._entrypoints = None\n        self._registered = None\n        self._disabled = None\n\n    @property\n    def package_scan(self):\n        return self.conf.get(\"package_scan\", False) if self.do_scan is None else self.do_scan\n\n    @package_scan.setter\n    def package_scan(self, val):\n        self.conf[\"package_scan\"] = val\n\n    def from_entrypoints(self):\n        if self._entrypoints is None:\n            eps = entry_points()\n            if hasattr(eps, \"select\"):  # Python 3.10+ / importlib_metadata >= 3.9.0\n                specs = eps.select(group=\"intake.drivers\")\n            else:\n                specs = eps.get(\"intake.drivers\", [])\n            # must cache this, since lookup if fairly slow and we do this a lot\n            self._entrypoints = list(specs)\n        return self._entrypoints\n\n    def from_conf(self):\n        return []\n\n    def __setitem__(self, key, value):\n        super(DriverSouces, self).__setitem__(key, value)\n        self.save()\n\n    def __delitem__(self, key):\n        super(DriverSouces, self).__delitem__(key)\n        self.save()\n\n    @property\n    def scanned(self):\n        return {}\n\n    def disabled(self):\n        if self._disabled is None:\n            self._disabled = {k for k, v in self.conf.get(\"drivers\", {}).items() if v is False}\n\n        return self._disabled\n\n    def registered(self):\n        # priority order (decreasing): runtime, config, entrypoints, package scan\n        if self._registered is None:\n            out = {}\n            if self.package_scan:\n                out.update(self.scanned)\n\n            for ep in self.from_entrypoints() + self.from_conf():\n                out[ep.name] = ep\n\n            self._registered = out\n\n        return self._registered\n\n    def enabled_plugins(self):\n        return {k: v for k, v in self.registered().items() if k not in self.disabled()}\n\n    def register_driver(self, name, value, clobber=False, do_enable=False):\n        \"\"\"Add runtime driver definition to list of registered drivers\n\n        (drivers in global scope with corresponding ``intake.open_*`` function)\n\n        Parameters\n        ----------\n        name: str\n            Name of the driver\n        value: str, entrypoint or class\n            Pointer to the implementation\n        clobber: bool\n            If True, perform the operation even if the driver exists\n        do_enable: bool\n            If True, unset the disabled flag for this driver\n        \"\"\"\n        name = _normalize(name)\n        if name in self.registered() and not clobber:\n            raise ValueError(f\"Driver {name} already enabled\")\n        if name in self.disabled():\n            if do_enable:\n                self.enable(name, value)\n            else:\n                logger.warning(f\"Adding driver {name}, but it is disabled\")\n\n        self.registered()[name] = value\n\n    def unregister_driver(self, name):\n        \"\"\"Remove runtime registered driver\"\"\"\n        name = _normalize(name)\n        self.registered().pop(name)\n\n    def enable(self, name, driver=None):\n        \"\"\"\n        Explicitly assign a driver to a name, or remove ban\n\n        Updates the associated config, which will be persisted\n\n        Parameters\n        ----------\n        name : string\n            As in ``'zarr'``\n        driver : string\n            Dotted object name, as in ``'intake_xarray.xzarr.ZarrSource'``.\n            If None, simply remove driver disable flag, if it is found\n        \"\"\"\n        config = self.conf\n        if \"drivers\" not in config:\n            config[\"drivers\"] = {}\n        if driver:\n            config[\"drivers\"][name] = driver\n        elif config[\"drivers\"].get(name) is False:\n            del config[\"drivers\"][name]\n        if name in self.disabled():\n            self.disabled().remove(name)\n        config.save()\n\n    def disable(self, name):\n        \"\"\"Disable a driver by name.\n\n        Updates the associated config, which will be persisted\n\n        Parameters\n        ----------\n        name : string\n            As in ``'zarr'``\n        \"\"\"\n        name = _normalize(name)\n        config = self.conf\n        if \"drivers\" not in config:\n            config[\"drivers\"] = {}\n        config[\"drivers\"][name] = False\n        config.save()\n        self.disabled().add(name)\n\n\ndrivers = DriverSouces()\n\n\ndef _load_entrypoint(entrypoint):\n    \"\"\"\n    Call entrypoint.load() and, if it fails, raise context-specific errors.\n    \"\"\"\n    try:\n        return entrypoint.load()\n    except ImportError as err:\n        raise ConfigurationError(\n            f\"Failed to load {entrypoint.name} driver because module \"\n            f\"{entrypoint.module_name} could not be imported.\"\n        ) from err\n    except AttributeError as err:\n        raise ConfigurationError(\n            f\"Failed to load {entrypoint.name} driver because no object \"\n            f\"named {entrypoint.object_name} could be found in the module \"\n            f\"{entrypoint.module_name}.\"\n        ) from err\n\n\ndef _normalize(name):\n    if not name.isidentifier():\n        # primitive name normalization\n        name = re.sub(\"[-=~^&|@+]\", \"_\", name)\n    if not name.isidentifier():\n        warnings.warn('Invalid Intake plugin name \"%s\" found.', name, stacklevel=2)\n\n    return name\n\n\nclass ConfigurationError(Exception):\n    pass\n"
  },
  {
    "path": "intake/source/jsonfiles.py",
    "content": "import contextlib\nimport json\nfrom itertools import islice\n\nfrom intake.source.base import DataSource\n\n\nclass JSONFileSource(DataSource):\n    \"\"\"\n    Read JSON files as a single dictionary or list\n\n    The files can be local or remote. Extra parameters for encoding, etc.,\n    go into ``storage_options``.\n    \"\"\"\n\n    name = \"json\"\n    version = \"0.0.1\"\n    container = \"python\"\n\n    def __init__(\n        self,\n        urlpath: str,\n        text_mode: bool = True,\n        text_encoding: str = \"utf8\",\n        compression: str = None,\n        read: bool = True,\n        metadata: dict = None,\n        storage_options: dict = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        urlpath : str\n            Target file. Can include protocol specified (e.g., \"s3://\").\n        text_mode : bool\n            Whether to open the file in text mode, recoding binary\n            characters on the fly\n        text_encoding : str\n            If text_mode is True, apply this encoding. UTF* is by far the most\n            common\n        compression : str or None\n            If given, decompress the file with the given codec on load. Can\n            be something like \"zip\", \"gzip\", \"bz2\", or to try to guess from the\n            filename, 'infer'\n        storage_options: dict\n            Options to pass to the file reader backend, including text-specific\n            encoding arguments, and parameters specific to the remote\n            file-system driver, if using.\n        \"\"\"\n        from fsspec.utils import compressions\n\n        VALID_COMPRESSIONS = list(compressions.values()) + [\"infer\"]\n\n        self._urlpath = urlpath\n        self._storage_options = storage_options or {}\n        self._dataframe = None\n        self._file = None\n        self.compression = compression\n        if compression is not None:\n            if compression not in VALID_COMPRESSIONS:\n                raise ValueError(\n                    f\"Compression value {compression} must be one of {VALID_COMPRESSIONS}\"\n                )\n        self.mode = \"rt\" if text_mode else \"rb\"\n        self.encoding = text_encoding\n        self._read = read\n\n        super(JSONFileSource, self).__init__(metadata=metadata)\n\n    def read(self):\n        import fsspec\n\n        urlpath = self._get_cache(self._urlpath)[0]\n        with fsspec.open(\n            urlpath,\n            mode=self.mode,\n            encoding=self.encoding,\n            compression=self.compression,\n            **self._storage_options,\n        ) as f:\n            return json.load(f)\n\n    def _load_metadata(self):\n        pass\n\n    def _get_schema(self):\n        pass\n\n\nclass JSONLinesFileSource(DataSource):\n    \"\"\"\n    Read a JSONL (https://jsonlines.org/) file and return a list of objects,\n    each being valid json object (e.g. a dictionary or list)\n    \"\"\"\n\n    name = \"jsonl\"\n    version = \"0.0.1\"\n    container = \"python\"\n\n    def __init__(\n        self,\n        urlpath: str,\n        text_mode: bool = True,\n        text_encoding: str = \"utf8\",\n        compression: str = None,\n        read: bool = True,\n        metadata: dict = None,\n        storage_options: dict = None,\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        urlpath : str\n            Target file. Can include protocol specified (e.g., \"s3://\").\n        text_mode : bool\n            Whether to open the file in text mode, recoding binary\n            characters on the fly\n        text_encoding : str\n            If text_mode is True, apply this encoding. UTF* is by far the most\n            common\n        compression : str or None\n            If given, decompress the file with the given codec on load. Can\n            be something like \"zip\", \"gzip\", \"bz2\", or to try to guess from the\n            filename, 'infer'.\n        storage_options: dict\n            Options to pass to the file reader backend, including text-specific\n            encoding arguments, and parameters specific to the remote\n            file-system driver, if using.\n        \"\"\"\n        from fsspec.utils import compressions\n\n        VALID_COMPRESSIONS = list(compressions.values()) + [\"infer\"]\n\n        self._urlpath = urlpath\n        self._storage_options = storage_options or {}\n        self._dataframe = None\n        self._file = None\n        self.compression = compression\n        if compression is not None:\n            if compression not in VALID_COMPRESSIONS:\n                raise ValueError(\n                    f\"Compression value {compression} must be one of {VALID_COMPRESSIONS}\"\n                )\n        self.mode = \"rt\" if text_mode else \"rb\"\n        self.encoding = text_encoding\n        self._read = read\n        super().__init__(metadata=metadata)\n\n    @contextlib.contextmanager\n    def _open(self):\n        \"\"\"\n        Yields an fsspec.OpenFile object\n        \"\"\"\n        import fsspec\n\n        urlpath = self._get_cache(self._urlpath)[0]\n        with fsspec.open(\n            urlpath,\n            mode=self.mode,\n            encoding=self.encoding,\n            compression=self.compression,\n            **self._storage_options,\n        ) as f:\n            yield f\n\n    def read(self):\n        with self._open() as f:\n            return list(map(json.loads, f))\n\n    def head(self, nrows: int = 100):\n        \"\"\"\n        return the first `nrows` lines from the file\n        \"\"\"\n        with self._open() as f:\n            return list(map(json.loads, islice(f, nrows)))\n\n    def _load_metadata(self):\n        pass\n\n    def _get_schema(self):\n        pass\n"
  },
  {
    "path": "intake/source/npy.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom .base import DataSource\nfrom intake.readers.datatypes import NumpyFile\nfrom intake.readers.readers import DaskNPYStack, NumpyReader\n\n\nclass NPySource(DataSource):\n    \"\"\"Read numpy binary files into an array\n\n    Prototype source showing example of working with arrays\n\n    Each file becomes one or more partitions, but partitioning within a file\n    is only along the largest dimension, to ensure contiguous data.\n    \"\"\"\n\n    container = \"ndarray\"\n    name = \"numpy\"\n    version = \"0.0.1\"\n    partition_access = True\n\n    def __init__(self, path, storage_options=None, metadata=None):\n        \"\"\"\n        The parameters dtype and shape will be determined from the first\n        file, if not given.\n\n        Parameters\n        ----------\n        path: str of list of str\n            Location of data file(s), possibly including glob and protocol\n            information\n        storage_options: dict\n            Passed to file-system backend.\n        \"\"\"\n        self.data = NumpyFile(url=path, storage_options=storage_options, metadata=metadata)\n        self.metadata = metadata\n\n    def to_dask(self):\n        return DaskNPYStack(self.data).read()\n\n    def read(self):\n        return NumpyReader(self.data).read()\n"
  },
  {
    "path": "intake/source/tests/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n"
  },
  {
    "path": "intake/source/tests/alias.yaml",
    "content": "sources:\n  csvs:\n    driver: textfiles\n    args:\n      urlpath: '{{ CATALOG_DIR }}/*.csv'\n  yamls:\n    driver: textfiles\n    args:\n      urlpath: '{{ CATALOG_DIR }}/*.yaml'\n  alias1:\n    driver: intake.source.derived.AliasSource\n    args:\n      mapping:\n        first: csvs\n        second: yamls\n      target: first\n  alias2:\n    driver: intake.source.derived.AliasSource\n    args:\n      target: csvs\n"
  },
  {
    "path": "intake/source/tests/cached.yaml",
    "content": "sources:\n  calvert:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/calvert_uk.zip'\n    cache:\n      - type: compressed\n        argkey: urlpath\n  calvert_infer:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/calvert_uk.zip'\n    cache:\n      - type: compressed\n        argkey: urlpath\n        decomp: infer\n  calvert_badkey:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/calvert_uk.zip'\n    cache:\n      - type: compressed\n        argkey: urlpath\n        decomp: unknown\n  calvert_filter:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/calvert_uk_filter.tar.gz'\n    cache:\n      - type: compressed\n        argkey: urlpath\n        regex_filter: '.*calvert_uk_research2017_nodes.csv'\n  dirs:\n    driver: textfiles\n    args:\n      urlpath: '{{ CATALOG_DIR }}/main'\n    cache:\n      - type: dir\n        argkey: urlpath\n        depth: 2\n  dat_data:\n    driver: textfiles\n    args:\n      urlpath: 'dat://66ef52101a2543e1721c901e84d2dd7a758c94283b8501d34a691abefe3fb3d6/*.json'\n      decoder: json.loads\n    cache:\n      - type: dat\n"
  },
  {
    "path": "intake/source/tests/data.zarr/.zarray",
    "content": "{\n    \"chunks\": [\n        10\n    ],\n    \"compressor\": {\n        \"blocksize\": 0,\n        \"clevel\": 5,\n        \"cname\": \"lz4\",\n        \"id\": \"blosc\",\n        \"shuffle\": 1\n    },\n    \"dtype\": \"<i8\",\n    \"fill_value\": 0,\n    \"filters\": null,\n    \"order\": \"C\",\n    \"shape\": [\n        10\n    ],\n    \"zarr_format\": 2\n}\n"
  },
  {
    "path": "intake/source/tests/der.yaml",
    "content": "sources:\n  base:\n    driver: csv\n    args:\n      urlpath: \"{{CATALOG_DIR}}/sample1.csv\"\n  cols:\n    driver: intake.source.derived.Columns\n    args:\n      targets:\n        - base\n      columns:\n        - score\n        - rank\n"
  },
  {
    "path": "intake/source/tests/footer_csvs/sample_fewfooters.csv",
    "content": "name,score,rank\nAlice,100.5,1\nBob,50.3,2\nCharlie,25,3\nEve,25,3\n1. This is footer #1\n2. This is another footer row, with a comma\n"
  },
  {
    "path": "intake/source/tests/footer_csvs/sample_manyfooters.csv",
    "content": "name,score,rank\nAlice,100.5,1\nBob,50.3,2\nCharlie,25,3\nEve,25,3\n1. This is footer #1\n2. This is another footer row, with a comma\n3. This a  third footer row. # With a hash\n4. And another one.\n5. And finally a last footer, to make sure we have more footers than data rows.\n"
  },
  {
    "path": "intake/source/tests/footer_csvs/sample_nofooters.csv",
    "content": "name,score,rank\nAlice,100.5,1\nBob,50.3,2\nCharlie,25,3\nEve,25,3\n"
  },
  {
    "path": "intake/source/tests/pipeline.yaml",
    "content": "sources:\n  df:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/sample1.csv'\n  df1:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/sample2_1.csv'\n  df2:\n    driver: csv\n    args:\n      urlpath: '{{ CATALOG_DIR }}/sample2_2.csv'\n\n  error_iloc:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps: [{method: iloc}]\n\n  error_loc:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps: [{method: loc}]\n\n  one_column:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: cols\n          kwargs:\n            columns: name\n\n  two_columns:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: cols\n          kwargs:\n            columns:\n              - name\n              - rank\n\n  accessor:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: cols\n          kwargs:\n            columns: name\n        - method: str.lower\n\n  failed:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: cols\n          kwargs: {columns: nope}\n\n  assign:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df, accessor]\n      steps:\n        - method: assign\n          kwargs:\n            lower: accessor\n\n  assign_value:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: assign\n          kwargs:\n            new_col: value\n\n  concat_rows:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df, df1]\n      steps:\n        - method: concat\n          kwargs:\n            dfs: [df, df1]\n\n  concat_cols:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df, df1]\n      steps:\n        - method: concat\n          kwargs:\n            dfs: [df, df1]\n            axis: columns\n\n  merged:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df, df1]\n      steps:\n        - method: merge\n          kwargs:\n            right: df1\n            \"on\": rank\n\n  merge_fail:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: merge\n          kwargs:\n            right: df1\n            \"on\": rank\n\n  join1:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df, df1]\n      steps:\n        - method: join\n          kwargs:\n            other: df1\n            rsuffix: 1\n\n  join_list:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df, df1, df2]\n      steps:\n        - method: join\n          kwargs:\n            other: [df1]\n            rsuffix: 1\n\n  join_fail:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: join\n          kwargs:\n            other: df1\n            rsuffix: 1\n\n  func:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: cols\n          kwargs: {columns: score}\n        - method: intake.source.tests.util.zscore\n\n  apply:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: cols\n          kwargs:\n            columns: name\n        - method: apply\n          kwargs:\n            func: intake.source.tests.util.reverse\n\n  groupby_apply:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: groupby\n          kwargs:\n            by: rank\n        - method: cols\n          kwargs:\n            columns: [score]\n        - method: apply\n          kwargs:\n            func: numpy.sum\n            meta: {score: float}\n\n  groupby_transform:\n    driver: intake.source.derived.DataFramePipeline\n    args:\n      targets: [df]\n      steps:\n        - method: groupby\n          kwargs:\n            by: rank\n        - method: transform\n          kwargs:\n            func: builtins.len\n"
  },
  {
    "path": "intake/source/tests/plugin_searchpath/collision_foo/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source.base import DataSource\n\n\nclass FooPlugin(DataSource):\n    name = \"foo\"\n    version = \"0.1\"\n    container = \"dataframe\"\n    partition_access = False\n"
  },
  {
    "path": "intake/source/tests/plugin_searchpath/collision_foo2/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source.base import DataSource\n\n\nclass FooPlugin(DataSource):\n    name = \"foo\"\n    version = \"0.1\"\n    container = \"dataframe\"\n    partition_access = False\n"
  },
  {
    "path": "intake/source/tests/plugin_searchpath/driver_with_entrypoints/__init__.py",
    "content": "class SomeTestDriver:\n    ...\n"
  },
  {
    "path": "intake/source/tests/plugin_searchpath/driver_with_entrypoints-0.1.dist-info/entry_points.txt",
    "content": "[intake.drivers]\nsome_test_driver = driver_with_entrypoints:SomeTestDriver\n"
  },
  {
    "path": "intake/source/tests/plugin_searchpath/intake_foo/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source.base import DataSource\n\n\nclass FooPlugin(DataSource):\n    name = \"foo\"\n    version = \"0.1\"\n    container = \"dataframe\"\n    partition_access = False\n\n    def __init__(self, **kwargs):\n        pass\n"
  },
  {
    "path": "intake/source/tests/plugin_searchpath/not_intake_foo/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom intake.source.base import DataSource\n\n\nclass FooPlugin(DataSource):\n    name = \"otherfoo\"\n    version = \"0.1\"\n    container = \"dataframe\"\n    partition_access = False\n\n    def __init__(self, **kwargs):\n        pass\n"
  },
  {
    "path": "intake/source/tests/sample1.csv",
    "content": "name,score,rank\nAlice,100.5,1\nBob,50.3,2\nCharlie,25,3\nEve,25,3\n"
  },
  {
    "path": "intake/source/tests/sample2_1.csv",
    "content": "name,score,rank\nAlice1,100.5,1\nBob1,50.3,2\nCharlie1,25,3\nEve1,25,3\n"
  },
  {
    "path": "intake/source/tests/sample2_2.csv",
    "content": "name,score,rank\nAlice2,100.5,1\nBob2,50.3,2\nCharlie2,25,3\nEve2,25,3\n"
  },
  {
    "path": "intake/source/tests/sample3_2.csv",
    "content": "name,score,rank\nAlice3,100.5,1\nBob3,50.3,2\nCharlie3,25,3\nEve3,25,3\n"
  },
  {
    "path": "intake/source/tests/sources.yaml",
    "content": "sources:\n  zarr1:\n    driver: ndzarr\n    args:\n      urlpath: \"{{CATALOG_DIR}}/data.zarr\"\n  sometext:\n    driver: textfiles\n    args:\n      urlpath: \"{{CATALOG_DIR}}/*.py\"\n"
  },
  {
    "path": "intake/source/tests/test_base.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport pickle\nfrom collections import defaultdict\n\nimport numpy as np\nimport pandas as pd\nimport pytest\n\nimport intake.source\nimport intake.source.base as base\nimport intake.source.derived as der\n\n\ndef test_datasource_base_method_exceptions():\n    # Unimplemented methods should raise exceptions\n    d = base.DataSource()\n\n    for method_name, args in [\n        (\"_get_schema\", []),\n        (\"_get_partition\", [1]),\n        (\"_close\", []),\n    ]:\n        method = getattr(d, method_name)\n        with pytest.raises(NotImplementedError):\n            method(*args)\n\n\ndef test_name():\n    d = base.DataSource()\n    assert d.classname == \"intake.source.base.DataSource\"\n    assert isinstance(hash(d), int)\n\n\ndef test_datasource_base_context_manager():\n    # Base data source should raise a \"need to implement\" exception when it\n    # enters the context manager (which loads the schema)\n\n    with pytest.raises(NotImplementedError):\n        with base.DataSource():\n            pass\n\n\nclass MockDataSourceDataFrame(base.DataSource):\n    \"\"\"Mock Data Source subclass that returns dataframe containers\n\n    Used to verify that the base DataSource class logic works for dataframes.\n    \"\"\"\n\n    container = \"dataframe\"\n    name = \"mock\"\n\n    def __init__(self, a, b):\n        self.a = a\n        self.b = b\n\n        self.call_count = defaultdict(lambda: 0)\n\n        super(MockDataSourceDataFrame, self).__init__(metadata=dict(a=1, b=2))\n        self.npartitions = 2\n\n    def _get_schema(self):\n        self.call_count[\"_get_schema\"] += 1\n\n        return base.Schema(\n            dtype=np.dtype([(\"x\", np.int64), (\"y\", np.int64)]),\n            shape=(6,),\n            npartitions=2,\n            extra_metadata=dict(c=3, d=4),\n        )\n\n    def _get_partition(self, i):\n        self.call_count[\"_get_partition\"] += 1\n\n        if i == 0:\n            return pd.DataFrame({\"x\": [1, 2, 3], \"y\": [10, 20, 30]})\n        elif i == 1:\n            return pd.DataFrame({\"x\": [4, 5, 6], \"y\": [40, 50, 60]})\n        else:\n            raise Exception(\"This should never happen\")\n\n    def read(self):\n        return pd.concat([self._get_partition(i) for i in range(self.npartitions)])\n\n    def to_dask(self):\n        import dask\n        import dask.dataframe as dd\n\n        return dd.from_delayed(\n            [dask.delayed(self._get_partition)(i) for i in range(self.npartitions)]\n        )\n\n    def _close(self):\n        self.call_count[\"_close\"] += 1\n\n\n@pytest.fixture\ndef source_dataframe():\n    return MockDataSourceDataFrame(a=1, b=2)\n\n\ndef test_datasource_discover(source_dataframe):\n    r = source_dataframe.discover()\n\n    assert source_dataframe.container == \"dataframe\"\n\n    row_dtype = np.dtype([(\"x\", np.int64), (\"y\", np.int64)])\n    assert r == {\n        \"dtype\": row_dtype,\n        \"shape\": (6,),\n        \"npartitions\": 2,\n        \"metadata\": dict(a=1, b=2, c=3, d=4),\n    }\n\n    # check attributes have been set\n    assert source_dataframe.dtype == row_dtype\n    assert source_dataframe.shape == (6,)\n    assert source_dataframe.npartitions == 2\n    assert source_dataframe.metadata == dict(a=1, b=2, c=3, d=4)\n\n    # check that _get_schema is only called once\n    assert source_dataframe.call_count[\"_get_schema\"] == 1\n    source_dataframe.discover()\n    assert source_dataframe.call_count[\"_get_schema\"] == 1\n\n\ndef check_df(data):\n    pd.testing.assert_frame_equal(\n        data.reset_index(drop=True),\n        pd.DataFrame({\"x\": [1, 2, 3, 4, 5, 6], \"y\": [10, 20, 30, 40, 50, 60]}),\n        check_index_type=False,\n    )\n\n\ndef test_datasource_read(source_dataframe):\n    data = source_dataframe.read()\n\n    check_df(data)\n\n\ndef check_df_parts(parts):\n    assert len(parts) == 2\n    pd.testing.assert_frame_equal(parts[0], pd.DataFrame({\"x\": [1, 2, 3], \"y\": [10, 20, 30]}))\n    pd.testing.assert_frame_equal(parts[1], pd.DataFrame({\"x\": [4, 5, 6], \"y\": [40, 50, 60]}))\n\n\ndef test_datasource_read_chunked(source_dataframe):\n    parts = [p for p in source_dataframe.read_chunked()]\n\n    check_df_parts(parts)\n\n\ndef test_datasource_read_partition(source_dataframe):\n    source_dataframe.discover()\n\n    parts = [source_dataframe.read_partition(i) for i in range(source_dataframe.npartitions)]\n\n    check_df_parts(parts)\n\n\ndef test_datasource_read_partition_out_of_range(source_dataframe):\n    with pytest.raises(IndexError):\n        source_dataframe.read_partition(-1)\n\n    with pytest.raises(IndexError):\n        source_dataframe.read_partition(2)\n\n\ndef test_datasource_to_dask(source_dataframe):\n    ddf = source_dataframe.to_dask()\n\n    check_df(ddf.compute())\n\n\ndef test_datasource_close(source_dataframe):\n    source_dataframe.close()\n\n    assert source_dataframe.call_count[\"_close\"] == 1\n\n\ndef test_datasource_context_manager(source_dataframe):\n    with source_dataframe:\n        pass\n\n    assert source_dataframe.call_count[\"_close\"] == 1\n\n\ndef test_datasource_pickle(source_dataframe):\n    pkl = pickle.dumps(source_dataframe)\n    new_obj = pickle.loads(pkl)\n\n    check_df(new_obj.read())\n\n\nclass MockDataSourcePython(base.DataSource):\n    \"\"\"Mock Data Source subclass that returns Python list containers\n\n    Used to verify that the base DataSource class logic works for Python lists.\n    \"\"\"\n\n    container = \"python\"\n    name = \"mock\"\n\n    def __init__(self, a, b):\n        self.a = a\n        self.b = b\n\n        self.call_count = defaultdict(lambda: 0)\n\n        super(MockDataSourcePython, self).__init__(metadata=dict(a=1, b=2))\n        self.npartitions = 2\n\n    def _get_schema(self):\n        self.call_count[\"_get_schema\"] += 1\n\n        return base.Schema(dtype=None, shape=(4,), npartitions=2, extra_metadata=dict(c=3, d=4))\n\n    def _get_partition(self, i):\n        self.call_count[\"_get_partition\"] += 1\n\n        if i == 0:\n            return [{\"x\": \"foo\", \"y\": \"bar\"}, {\"x\": \"foo\", \"y\": \"bar\", \"z\": \"baz\"}]\n        elif i == 1:\n            return [{\"x\": 1}, {}]\n        else:\n            raise Exception(\"This should never happen\")\n\n    def read(self):\n        return sum([self._get_partition(i) for i in range(self.npartitions)], [])\n\n    def to_dask(self):\n        import dask\n        import dask.bag as db\n\n        return db.from_delayed(\n            [dask.delayed(self._get_partition)(i) for i in range(self.npartitions)]\n        )\n\n    def _close(self):\n        self.call_count[\"_close\"] += 1\n\n\n@pytest.fixture\ndef source_python():\n    return MockDataSourcePython(a=1, b=2)\n\n\ndef test_datasource_python_discover(source_python):\n    r = source_python.discover()\n\n    assert source_python.container == \"python\"\n\n    assert r == {\n        \"dtype\": None,\n        \"shape\": (4,),\n        \"npartitions\": 2,\n        \"metadata\": dict(a=1, b=2, c=3, d=4),\n    }\n\n    # check attributes have been set\n    assert source_python.dtype is None\n    assert source_python.shape == (4,)\n    assert source_python.npartitions == 2\n    assert source_python.metadata == dict(a=1, b=2, c=3, d=4)\n\n    # check that _get_schema is only called once\n    assert source_python.call_count[\"_get_schema\"] == 1\n    source_python.discover()\n    assert source_python.call_count[\"_get_schema\"] == 1\n\n\ndef test_datasource_python_read(source_python):\n    data = source_python.read()\n\n    assert data == [\n        {\"x\": \"foo\", \"y\": \"bar\"},\n        {\"x\": \"foo\", \"y\": \"bar\", \"z\": \"baz\"},\n        {\"x\": 1},\n        {},\n    ]\n\n\ndef test_datasource_python_to_dask(source_python):\n    db = list(source_python.to_dask())\n\n    assert db == [\n        {\"x\": \"foo\", \"y\": \"bar\"},\n        {\"x\": \"foo\", \"y\": \"bar\", \"z\": \"baz\"},\n        {\"x\": 1},\n        {},\n    ]\n\n\ndef test_yaml_method(source_python):\n    out = source_python.yaml()\n    assert \"mock\" in out  # the \"driver\"\n    assert \"metadata\" in out\n    assert \"a: 1\" in out\n\n\ndef test_alias_fail():\n    s = der.AliasSource(\"atarget\")\n    s.container == \"other\"\n    with pytest.raises(ValueError):\n        s.read()\n\n\ndef test_reconfigure():\n    s = MockDataSourcePython(a=1, b=2)\n    assert s.a == 1\n    s2 = s(a=3, b=4)\n    assert s2.a == 3\n    s3 = s2(a=4)\n    assert s3.a == 4\n    assert s3.b == 4\n\n\n@pytest.mark.parametrize(\n    \"data\",\n    [\n        (\"intake.source.import_name\", intake.source.import_name),\n        (\"intake.source:import_name\", intake.source.import_name),\n        (\"intake.source.DataSource\", intake.source.DataSource),\n        (\"intake.source:DataSource\", intake.source.DataSource),\n        (\"intake.source:base.DataSource\", intake.source.DataSource),\n    ],\n)\ndef test_import_name(data):\n    text, object = data\n    assert intake.source.import_name(text) == object\n"
  },
  {
    "path": "intake/source/tests/test_csv.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os.path\nimport pickle\nimport posixpath\n\nimport pandas as pd\nimport pytest\n\nimport intake.source.csv as csv\nfrom intake.utils import make_path_posix\n\nfrom .util import verify_datasource_interface, verify_plugin_interface\n\n\n@pytest.fixture\ndef data_filenames():\n    basedir = make_path_posix(os.path.dirname(__file__))\n    return dict(\n        sample1=posixpath.join(basedir, \"sample1.csv\"),\n        sample2_1=posixpath.join(basedir, \"sample2_1.csv\"),\n        sample2_2=posixpath.join(basedir, \"sample2_2.csv\"),\n        sample2_all=posixpath.join(basedir, \"sample2_*.csv\"),\n        sample_pattern=posixpath.join(basedir, \"sample{num:d}_{dup:d}.csv\"),\n    )\n\n\n@pytest.fixture\ndef sample1_datasource(data_filenames):\n    return csv.CSVSource(data_filenames[\"sample1\"])\n\n\n@pytest.fixture\ndef sample2_datasource(data_filenames):\n    return csv.CSVSource(data_filenames[\"sample2_all\"])\n\n\n@pytest.fixture\ndef sample_pattern_datasource(data_filenames):\n    return csv.CSVSource(data_filenames[\"sample_pattern\"])\n\n\n@pytest.fixture\ndef sample_list_datasource(data_filenames):\n    return csv.CSVSource([data_filenames[\"sample2_1\"], data_filenames[\"sample2_2\"]])\n\n\n@pytest.fixture\ndef sample_list_datasource_with_glob(data_filenames):\n    return csv.CSVSource([data_filenames[\"sample1\"], data_filenames[\"sample2_all\"]])\n\n\n@pytest.fixture\ndef sample_list_datasource_with_path_as_pattern_str(data_filenames):\n    return csv.CSVSource(\n        [data_filenames[\"sample2_1\"], data_filenames[\"sample2_2\"]],\n        path_as_pattern=\"sample{num:d}_{dup:d}.csv\",\n    )\n\n\n@pytest.fixture\ndef sample_pattern_datasource_with_cache(data_filenames):\n    metadata = {\n        \"cache\": [\n            {\n                \"argkey\": \"urlpath\",\n                \"regex\": make_path_posix(os.path.dirname(__file__)),\n                \"type\": \"file\",\n            }\n        ]\n    }\n    return csv.CSVSource(data_filenames[\"sample_pattern\"], metadata=metadata)\n\n\n@pytest.fixture\ndef footer_csv_dir():\n    return make_path_posix(os.path.join(os.path.dirname(__file__), \"footer_csvs\"))\n\n\n@pytest.fixture(params=[(\"sample_fewfooters.csv\", 2), (\"sample_manyfooters.csv\", 5)])\ndef sample_datasource_with_skipfooter(request, footer_csv_dir):\n    csv_filename, skipfooter = request.param\n    csv_path = posixpath.join(footer_csv_dir, csv_filename)\n    return csv.CSVSource(csv_path, csv_kwargs={\"skipfooter\": skipfooter})\n\n\ndef test_csv_plugin():\n    p = csv.CSVSource\n    assert isinstance(p.version, str)\n    assert p.container == \"dataframe\"\n    verify_plugin_interface(p)\n\n\ndef test_open(data_filenames):\n    d = csv.CSVSource(data_filenames[\"sample1\"])\n    assert d.container == \"dataframe\"\n    assert d.description is None\n    verify_datasource_interface(d)\n\n\ndef test_discover(sample1_datasource):\n    info = sample1_datasource.discover()\n\n    assert info[\"dtype\"] == {\"name\": \"object\", \"score\": \"float64\", \"rank\": \"int64\"}\n    # Do not know length without parsing CSV\n    assert info[\"shape\"] == (None, 3)\n    assert info[\"npartitions\"] == 1\n\n\ndef test_read_dask(sample1_datasource, data_filenames):\n    sample1_datasource._open_dask()\n    assert sample1_datasource._dask_df is not None\n\n    expected_df = pd.read_csv(data_filenames[\"sample1\"])\n    df = sample1_datasource.read()\n\n    assert expected_df.equals(df)\n\n\ndef test_read_pandas(sample1_datasource, data_filenames):\n    sample1_datasource._dask_df = None\n\n    expected_df = pd.read_csv(data_filenames[\"sample1\"])\n    df = sample1_datasource.read()\n\n    assert expected_df.equals(df)\n\n\ndef test_read_list(sample_list_datasource, data_filenames):\n    df_1 = pd.read_csv(data_filenames[\"sample2_1\"])\n    df_2 = pd.read_csv(data_filenames[\"sample2_2\"])\n    expected_df = pd.concat([df_1, df_2])\n\n    sample_list_datasource._open_dask()\n    assert sample_list_datasource._dask_df is not None\n    dask_df = sample_list_datasource.read()\n\n    assert expected_df.equals(dask_df)\n\n    sample_list_datasource._dask_df = None\n    pandas_df = sample_list_datasource.read()\n    assert sample_list_datasource._dask_df is None\n\n    assert expected_df.equals(pandas_df)\n\n\ndef test_read_list_with_glob(sample_list_datasource_with_glob, data_filenames):\n    df_1 = pd.read_csv(data_filenames[\"sample1\"])\n    df_2 = pd.read_csv(data_filenames[\"sample2_1\"])\n    df_3 = pd.read_csv(data_filenames[\"sample2_2\"])\n    expected_df = pd.concat([df_1, df_2, df_3])\n    assert len(sample_list_datasource_with_glob.files()) == 3\n\n    sample_list_datasource_with_glob._open_dask()\n    assert sample_list_datasource_with_glob._dask_df is not None\n    dask_df = sample_list_datasource_with_glob.read()\n\n    assert expected_df.equals(dask_df)\n\n    sample_list_datasource_with_glob._dask_df = None\n    pandas_df = sample_list_datasource_with_glob.read()\n    assert sample_list_datasource_with_glob._dask_df is None\n\n    assert expected_df.equals(pandas_df)\n\n\ndef test_read_chunked(sample1_datasource, data_filenames):\n    expected_df = pd.read_csv(data_filenames[\"sample1\"])\n\n    parts = list(sample1_datasource.read_chunked())\n    df = pd.concat(parts)\n\n    assert expected_df.equals(df)\n\n\ndef check_read_pattern_output(df, df_part):\n    # check that first partition has correct num and dup; which file\n    # it represents is not guaranteed\n    if df_part[\"name\"][0].endswith(\"1\"):\n        assert all(df_part.num == 2)\n        assert all(df_part.dup == 1)\n    elif df_part[\"name\"][0].endswith(\"2\"):\n        assert all(df_part.num == 2)\n        assert all(df_part.dup == 2)\n    elif df_part[\"name\"][0].endswith(\"3\"):\n        assert all(df_part.num == 3)\n        assert all(df_part.dup == 2)\n\n    assert len(df.columns) == 5\n    assert \"num\" in df\n    assert \"dup\" in df\n    assert df.num.dtype == \"category\"\n    assert df.dup.dtype == \"category\"\n\n    names = [\"Alice\", \"Bob\", \"Charlie\", \"Eve\"]\n\n    file_1 = df[df[\"name\"].isin([\"{}1\".format(name) for name in names])]\n    assert all(file_1.num == 2)\n    assert all(file_1.dup == 1)\n\n    file_2 = df[df[\"name\"].isin([\"{}2\".format(name) for name in names])]\n    assert all(file_2.num == 2)\n    assert all(file_2.dup == 2)\n\n    file_3 = df[df[\"name\"].isin([\"{}3\".format(name) for name in names])]\n    assert all(file_3.num == 3)\n    assert all(file_3.dup == 2)\n\n\ndef test_read_pattern_dask(sample_pattern_datasource):\n    da = sample_pattern_datasource.to_dask()\n    assert set(da.num.cat.categories) == {2, 3}\n    assert set(da.dup.cat.categories) == {1, 2}\n    check_read_pattern_output(da.compute(), da.get_partition(0).compute())\n\n\ndef test_read_pattern_pandas(sample_pattern_datasource):\n    sample_pattern_datasource._dask_df = None\n    df = sample_pattern_datasource.read()\n    assert set(df.num.cat.categories) == {2, 3}\n    assert set(df.dup.cat.categories) == {1, 2}\n    df_part = sample_pattern_datasource._get_partition(0)\n    check_read_pattern_output(df, df_part)\n\n\ndef test_read_pattern_with_cache(sample_pattern_datasource_with_cache):\n    da = sample_pattern_datasource_with_cache.to_dask()\n    assert set(da.num.cat.categories) == {2, 3}\n    assert set(da.dup.cat.categories) == {1, 2}\n    check_read_pattern_output(da.compute(), da.get_partition(0).compute())\n\n\ndef test_read_pattern_with_path_as_pattern_str(sample_list_datasource_with_path_as_pattern_str):\n    da = sample_list_datasource_with_path_as_pattern_str.to_dask()\n    assert set(da.num.cat.categories) == {2}\n    assert set(da.dup.cat.categories) == {1, 2}\n    check_read_pattern_output(da.compute(), da.get_partition(0).compute())\n\n\ndef test_read_partition(sample2_datasource, data_filenames):\n    expected_df1 = pd.read_csv(data_filenames[\"sample2_1\"])\n    expected_df2 = pd.read_csv(data_filenames[\"sample2_2\"])\n\n    sample2_datasource.discover()\n    assert sample2_datasource.npartitions == 2\n\n    # Read partitions is opposite order\n    df2 = sample2_datasource.read_partition(1)\n    df1 = sample2_datasource.read_partition(0)\n\n    assert expected_df1.equals(df1)\n    assert expected_df2.equals(df2)\n\n\ndef test_to_dask(sample1_datasource, data_filenames):\n    dd = sample1_datasource.to_dask()\n    df = dd.compute()\n\n    expected_df = pd.read_csv(data_filenames[\"sample1\"])\n\n    assert expected_df.equals(df)\n\n\ndef test_plot(sample1_datasource):\n    pytest.importorskip(\"hvplot\")\n    import holoviews\n\n    p = sample1_datasource.plot()\n    assert isinstance(p, holoviews.NdOverlay)\n\n\ndef test_close(sample1_datasource, data_filenames):\n    sample1_datasource.close()\n    # Can reopen after close\n    df = sample1_datasource.read()\n    expected_df = pd.read_csv(data_filenames[\"sample1\"])\n\n    assert expected_df.equals(df)\n\n\ndef test_pickle(sample1_datasource):\n    pickled_source = pickle.dumps(sample1_datasource)\n    sample1_clone = pickle.loads(pickled_source)\n\n    expected_df = sample1_datasource.read()\n    df = sample1_clone.read()\n\n    assert expected_df.equals(df)\n\n\ndef test_skipfooter(sample_datasource_with_skipfooter, footer_csv_dir):\n    csv_nofooters = posixpath.join(footer_csv_dir, \"sample_nofooters.csv\")\n    expected_source = csv.CSVSource(csv_nofooters)\n    expected_schema = expected_source.discover()\n    assert expected_schema == sample_datasource_with_skipfooter.discover()\n    expected_df = expected_source.read()\n    df = sample_datasource_with_skipfooter.read()\n    assert expected_df.equals(df)\n"
  },
  {
    "path": "intake/source/tests/test_derived.py",
    "content": "import os\n\nimport pytest\n\nimport intake\nfrom intake.source.derived import MissingTargetError, PipelineStepError\n\ncatfile = os.path.join(\n    os.path.dirname(__file__), \"..\", \"..\", \"catalog\", \"tests\", \"catalog_alias.yml\"\n)\n\n\n@pytest.fixture\ndef pipe_cat():\n    catfile = os.path.join(os.path.dirname(__file__), \"..\", \"tests\", \"pipeline.yaml\")\n    return intake.open_catalog(catfile)\n\n\ndef test_columns():\n    cat = intake.open_catalog(catfile)\n    df1 = cat.input_data.read()\n    df2 = cat.derive_cols.read()\n    assert df1[[\"state\", \"slug\"]].equals(df2)\n\n\ndef _pick_columns(df, columns):\n    return df[columns]\n\n\ndef test_df_transform():\n    cat = intake.open_catalog(catfile)\n    df1 = cat.input_data.read()\n    df2 = cat.derive_cols_func.read()\n    assert df1[[\"state\", \"slug\"]].equals(df2)\n\n\ndef test_barebones():\n    cat = intake.open_catalog(catfile)\n    df1 = cat.input_data.read()\n    cat = intake.open_catalog(catfile)\n    assert cat.barebones.read() == len(df1)\n\n\ndef test_other_cat():\n    cat = intake.open_catalog(catfile)\n    df1 = cat.other_cat.read()\n    assert df1.columns.tolist() == [\"name\", \"score\"]\n\n\ndef test_pipeline_no_loc(pipe_cat):\n    with pytest.raises(ValueError):\n        pipe_cat.error_iloc.read()\n\n    with pytest.raises(ValueError):\n        pipe_cat.error_loc.read()\n\n\ndef test_pipeline_failed(pipe_cat):\n    with pytest.raises(PipelineStepError):\n        _ = pipe_cat.failed.read()\n\n\ndef test_pipeline_cols(pipe_cat):\n    name_col = pipe_cat.one_column.read()\n    assert name_col.to_list() == [\"Alice\", \"Bob\", \"Charlie\", \"Eve\"]\n\n    two_cols = pipe_cat.two_columns.read()\n    assert two_cols.values.tolist() == [\n        [\"Alice\", 1],\n        [\"Bob\", 2],\n        [\"Charlie\", 3],\n        [\"Eve\", 3],\n    ]\n\n\ndef test_pipeline_accessor(pipe_cat):\n    lower = pipe_cat.accessor.read()\n\n    assert lower.tolist() == [\"alice\", \"bob\", \"charlie\", \"eve\"]\n\n\ndef test_pipeline_assign(pipe_cat):\n    df = pipe_cat.assign.read()\n\n    assert \"lower\" in df\n    assert df[\"lower\"].tolist() == [\"alice\", \"bob\", \"charlie\", \"eve\"]\n\n\ndef test_pipeline_assign_value(pipe_cat):\n    df = pipe_cat.assign_value.read()\n\n    assert \"new_col\" in df\n    assert df[\"new_col\"].tolist() == [\"value\"] * 4\n\n\ndef test_pipeline_concat(pipe_cat):\n    concated_rows = pipe_cat.concat_rows.read()\n    assert concated_rows.shape == (8, 3)\n\n    concated_cols = pipe_cat.concat_cols.read()\n    assert concated_cols.shape == (4, 6)\n\n\ndef test_pipeline_merge(pipe_cat):\n    merged = pipe_cat.merged.read()\n    assert merged.columns.to_list() == [\n        \"name_x\",\n        \"score_x\",\n        \"rank\",\n        \"name_y\",\n        \"score_y\",\n    ]\n    assert merged.shape == (6, 5)\n\n\ndef test_pipeline_merge_fail(pipe_cat):\n    with pytest.raises(MissingTargetError):\n        _ = pipe_cat.merge_fail.read()\n\n\ndef test_pipeline_join(pipe_cat):\n    join1 = pipe_cat.join1.read()\n    assert join1.shape == (4, 6)\n    assert join1.columns.to_list() == [\n        \"name\",\n        \"score\",\n        \"rank\",\n        \"name1\",\n        \"score1\",\n        \"rank1\",\n    ]\n\n    join_list = pipe_cat.join_list.read()\n    assert join_list.shape == (4, 6)\n    assert join_list.columns.to_list() == [\n        \"name\",\n        \"score\",\n        \"rank\",\n        \"name1\",\n        \"score1\",\n        \"rank1\",\n    ]\n\n\ndef test_pipeline_join_fail(pipe_cat):\n    with pytest.raises(MissingTargetError):\n        _ = pipe_cat.join_fail.read()\n\n\ndef test_pipeline_func(pipe_cat):\n    from .util import zscore\n\n    df = pipe_cat.df.read()\n    z = zscore(df.score)\n\n    pipe = pipe_cat.func.read()\n    assert z.equals(pipe)\n\n\ndef test_pipeline_apply(pipe_cat):\n    from .util import reverse\n\n    df = pipe_cat.df.read()\n    s = pipe_cat.apply.read()\n\n    assert s.equals(df.name.apply(reverse))\n\n\ndef test_groupby_apply(pipe_cat):\n    df = pipe_cat.df.read()\n    agg = pipe_cat.groupby_apply.read()\n\n    assert agg.score.equals(df.groupby(\"rank\")[\"score\"].sum())\n\n\ndef test_groupby_transform(pipe_cat):\n    lengths = pipe_cat.groupby_transform.read()\n\n    assert lengths.score.to_list() == [1, 1, 2, 2]\n\n\ndef test_pipeline_dask(pipe_cat):\n    df = pipe_cat.groupby_apply.read()\n    ddf = pipe_cat.groupby_apply.to_dask()\n\n    assert df.equals(ddf.compute())\n"
  },
  {
    "path": "intake/source/tests/test_discovery.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\nimport os.path\nimport shlex\nimport subprocess\nimport sys\n\nimport pytest\n\nimport intake\nfrom intake.source import discovery\n\n\n@pytest.fixture\ndef extra_pythonpath():\n    basedir = os.path.dirname(__file__)\n    extra_path = os.path.join(basedir, \"plugin_searchpath\")\n\n    # Put extra directory on the python path\n    sys.path.append(extra_path)\n\n    yield extra_path\n\n    # Return python path back to normal\n    sys.path.remove(extra_path)\n\n\ndef test_package_scan(extra_pythonpath, tmp_config_path):\n    \"This tests a non-public function.\"\n    # Default path (sys.path)\n    results = discovery._package_scan()\n    assert \"foo\" in results\n\n    # Explicit path\n    results = discovery._package_scan(path=[extra_pythonpath])\n    assert \"foo\" in results\n\n\ndef test_discover_cli(extra_pythonpath, tmp_config_path):\n    env = os.environ.copy()\n    env[\"INTAKE_CONF_FILE\"] = tmp_config_path\n    env[\"PYTHONPATH\"] = extra_pythonpath\n\n    # directory is not automatically scanned any more\n    subprocess.call(\n        shlex.split(\"intake drivers enable foo intake_foo.FooPlugin\"),\n        stderr=subprocess.STDOUT,\n        stdout=subprocess.PIPE,\n        env=env,\n    )\n\n    out = subprocess.check_output(\n        shlex.split(\"intake drivers list\"), stderr=subprocess.STDOUT, env=env\n    )\n\n    assert b\"foo\" in out\n    assert out.index(b\"Disabled\") > out.index(b\"foo\")\n\n    subprocess.check_output(\n        shlex.split(\"intake drivers disable foo\"), stderr=subprocess.STDOUT, env=env\n    )\n\n    out = subprocess.check_output(\n        shlex.split(\"intake drivers list\"), stderr=subprocess.STDOUT, env=env\n    )\n    assert b\"foo\" in out\n    assert out.index(b\"Disabled\") < out.index(b\"foo\")\n\n\ndef test_discover(extra_pythonpath, tmp_config_path):\n    drivers = intake.source.discovery.DriverSouces(do_scan=True)\n    with pytest.warns(PendingDeprecationWarning):\n        assert \"foo\" in drivers.scanned\n\n    registry = intake.source.DriverRegistry(drivers)\n    # Check that package scan (name-based) discovery worked.\n    assert \"foo\" in registry\n    registry[\"foo\"]()\n    # Check that entrypoints-based discovery worked.\n    assert \"some_test_driver\" in registry\n    registry[\"some_test_driver\"]()\n\n    # Now again, turning off the package scan.\n\n    drivers = intake.source.discovery.DriverSouces()\n    registry = intake.source.DriverRegistry(drivers)\n\n    # Check that package scan (name-based) discovery did *not* happen.\n    assert \"foo\" not in registry\n    # Check that entrypoints-based discovery worked.\n    assert \"some_test_driver\" in registry\n    registry[\"some_test_driver\"]()\n\n\ndef test_enable_and_disable(extra_pythonpath, tmp_config_path):\n    # Disable and then enable a package scan result.\n\n    try:\n        drivers = intake.source.discovery.DriverSouces(do_scan=True)\n        registry = intake.source.DriverRegistry(drivers)\n        assert \"foo\" in registry\n\n        drivers.disable(\"foo\")\n        with pytest.warns(PendingDeprecationWarning):\n            assert \"foo\" in discovery.drivers.scanned\n        assert \"foo\" not in registry\n\n        drivers.enable(\"foo\", \"intake_foo.FooPlugin\")\n        assert \"foo\" in registry\n    finally:\n        drivers.enable(\"foo\", \"intake_foo.FooPlugin\")\n\n    # Disable and then enable an entrypoint result.\n\n    try:\n        drivers.disable(\"some_test_driver\")\n        assert \"some_test_driver\" not in registry\n\n        drivers.enable(\"some_test_driver\", \"driver_with_entrypoints.SomeTestDriver\")\n        assert \"some_test_driver\" in registry\n    finally:\n        drivers.enable(\"some_test_driver\", \"driver_with_entrypoints.SomeTestDriver\")\n\n\ndef test_register_and_unregister(extra_pythonpath, tmp_config_path):\n    registry = intake.source.registry\n    assert \"bar\" not in registry\n    with pytest.raises(ImportError):\n        from intake import open_bar\n\n    intake.register_driver(\"bar\", \"intake_foo.FooPlugin\")\n    assert \"bar\" in registry\n    from intake import open_bar  # noqa\n\n    intake.unregister_driver(\"bar\")\n    assert \"bar\" not in registry\n\n    with pytest.raises(ImportError):\n        from intake import open_bar  # noqa\n\n\ndef test_discover_collision(extra_pythonpath, tmp_config_path):\n    with pytest.warns(UserWarning):\n        discovery._package_scan(plugin_prefix=\"collision_\")\n"
  },
  {
    "path": "intake/source/tests/test_json.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2021, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport json\nimport os\n\nimport pytest\nfrom fsspec import open_files\nfrom fsspec.utils import compressions\n\nfrom intake.source.jsonfiles import JSONFileSource, JSONLinesFileSource\n\nhere = os.path.abspath(os.path.dirname(__file__))\n\nEXTENSIONS = {compression: f\".{extension}\" for extension, compression in compressions.items()}\n\n\n@pytest.fixture(params=[None, \"gzip\", \"bz2\"])\ndef json_file(request, tmp_path) -> str:\n    data = {\"hello\": \"world\"}\n    file_path = str(tmp_path / \"1.json\")\n    file_path += EXTENSIONS.get(request.param, \"\")\n    with open_files([file_path], mode=\"wt\", compression=request.param)[0] as f:\n        f.write(json.dumps(data))\n    return file_path\n\n\n@pytest.fixture(params=[None, \"gzip\", \"bz2\"])\ndef jsonl_file(request, tmp_path) -> str:\n    data = [{\"hello\": \"world\"}, [1, 2, 3]]\n    file_path = str(tmp_path / \"1.jsonl\")\n    file_path += EXTENSIONS.get(request.param, \"\")\n    with open_files([file_path], mode=\"wt\", compression=request.param)[0] as f:\n        f.write(\"\\n\".join(json.dumps(row) for row in data))\n    return file_path\n\n\ndef test_jsonfile(json_file: str):\n    j = JSONFileSource(json_file, text_mode=True, compression=\"infer\")\n    out = j.read()\n    assert isinstance(out, dict)\n    assert out[\"hello\"] == \"world\"\n\n\ndef test_jsonfile_none(json_file: str):\n    try:\n        j = JSONFileSource(json_file, text_mode=True, compression=None)\n        out = j.read()\n        # compression=None should not raise exception for uncompressed files\n        assert json_file.endswith(\".json\")\n    except UnicodeDecodeError:\n        # compression=None should raise exception for compressed files\n        assert not json_file.endswith(\".json\")\n        return\n    assert isinstance(out, dict)\n    assert out[\"hello\"] == \"world\"\n\n\ndef test_jsonfile_discover(json_file: str):\n    j = JSONFileSource(json_file, text_mode=True, compression=None)\n    schema = j.discover()\n    assert schema == {\"dtype\": None, \"shape\": None, \"npartitions\": 0, \"metadata\": {}}\n\n\ndef test_jsonlfile(jsonl_file: str):\n    j = JSONLinesFileSource(jsonl_file, compression=\"infer\")\n    out = j.read()\n    assert isinstance(out, list)\n\n    assert isinstance(out[0], dict)\n    assert out[0][\"hello\"] == \"world\"\n\n    assert isinstance(out[1], list)\n    assert out[1] == [1, 2, 3]\n\n\ndef test_jsonfilel_none(jsonl_file: str):\n    try:\n        j = JSONLinesFileSource(jsonl_file, compression=None)\n        out = j.read()\n        # compression=None should not raise exception for uncompressed files\n        assert jsonl_file.endswith(\".jsonl\")\n    except UnicodeDecodeError:\n        # compression=None should raise exception for compressed files\n        assert not jsonl_file.endswith(\".jsonl\")\n        return\n    assert isinstance(out, list)\n\n    assert isinstance(out[0], dict)\n    assert out[0][\"hello\"] == \"world\"\n\n    assert isinstance(out[1], list)\n    assert out[1] == [1, 2, 3]\n\n\ndef test_jsonfilel_discover(json_file: str):\n    j = JSONLinesFileSource(jsonl_file, compression=None)\n    schema = j.discover()\n    assert schema == {\"dtype\": None, \"shape\": None, \"npartitions\": 0, \"metadata\": {}}\n\n\ndef test_jsonl_head(jsonl_file: str):\n    j = JSONLinesFileSource(jsonl_file, compression=\"infer\")\n    out = j.head(1)\n    assert isinstance(out, list)\n    assert len(out) == 1\n    assert out[0][\"hello\"] == \"world\"\n"
  },
  {
    "path": "intake/source/tests/test_npy.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\nimport posixpath\n\nimport numpy as np\nimport pytest\n\nimport intake\n\nfrom ..npy import NPySource\n\nhere = os.path.abspath(os.path.dirname(__file__))\n\n\n@pytest.mark.parametrize(\"shape\", [(1,), (1, 1), (10,), (5, 2), (3, 3, 3)])\ndef test_one_file(tempdir, shape):\n    size = 1\n    for s in shape:\n        size *= s\n    data = np.random.randint(1, 100, size=size).reshape(shape)\n    fn = os.path.join(tempdir, \"out.npy\")\n    np.save(fn, data)\n    s = NPySource(fn)\n    out = s.read()\n    assert (out == data).all()\n    s = NPySource(fn, chunks=1)\n    out = s.read()\n    assert (out == data).all()\n    s = NPySource(fn, shape=shape, dtype=\"int\", chunks=1)\n    out = s.read()\n    assert (out == data).all()\n\n\n@pytest.mark.parametrize(\"shape\", [(1,), (1, 1), (10,), (5, 2), (3, 3, 3)])\ndef test_multi_file(tempdir, shape):\n    size = 1\n    for s in shape:\n        size *= s\n    data0 = np.random.randint(1, 100, size=size).reshape(shape)\n    fn0 = os.path.join(tempdir, \"out0.npy\")\n    np.save(fn0, data0)\n    data1 = np.random.randint(1, 100, size=size).reshape(shape)\n    fn1 = os.path.join(tempdir, \"out1.npy\")\n    np.save(fn1, data1)\n    data = np.stack([data0, data1])\n    fn = [fn0, fn1]\n\n    s = NPySource(fn)\n    out = s.read()\n    assert (out == data).all()\n    assert (s.to_dask().compute() == data).all()\n\n    s = NPySource(fn, chunks=1)\n    out = s.read()\n    assert (out == data).all()\n    assert (s.to_dask().compute() == data).all()\n\n    s = NPySource(fn, shape=shape, dtype=\"int\", chunks=1)\n    out = s.read()\n    assert (out == data).all()\n    da = s.to_dask()\n    assert (da.compute() == data).all()\n    schema = s.discover()\n    assert schema[\"shape\"] == data.shape\n    assert schema[\"npartitions\"] == da.npartitions\n\n    s = NPySource(os.path.join(tempdir, \"out*.npy\"))\n    out = s.read()\n    assert (out == data).all()\n    assert (s.to_dask().compute() == data).all()\n\n\ndef test_zarr_minimal():\n    pytest.importorskip(\"zarr\")\n    cat = intake.open_catalog(posixpath.join(here, \"sources.yaml\"))\n    s = cat.zarr1()\n    assert s.container == \"ndarray\"\n    assert s.read().tolist() == [73, 98, 46, 38, 20, 12, 31, 8, 89, 72]\n    assert s.npartitions == 1\n    assert s.dtype.kind == \"i\"\n    assert s.shape == (10,)\n    assert (s.read_partition((0,)) == s.read()).all()\n\n\ndef test_zarr_parts():\n    zarr = pytest.importorskip(\"zarr\")\n    out = {}\n    g = zarr.open(out, mode=\"w\")\n    z = g.create_dataset(\"data\", dtype=\"i4\", shape=(10, 10), chunks=(5, 5), compression=None)\n    z[:5, :5] = 1\n    z[:5, 5:] = 2\n    z[5:, :5] = 3\n    z[5:, 5:] = 4\n    source = intake.open_ndzarr(out, component=\"data\")\n\n    assert (source.read_partition((0, 0)) == 1).all()\n    assert (source.read_partition((1, 1)) == 4).all()\n\n    da = source.to_dask()\n    assert da.npartitions == 4\n    assert (da.compute() == z[:]).all()\n\n    g = zarr.open(out, mode=\"w\")\n    gg = g.create_group(\"inner\")\n    z = gg.create_dataset(\"data\", dtype=\"i4\", shape=(10, 10), chunks=(5, 5), compression=None)\n    z[:5, :5] = 1\n    z[:5, 5:] = 2\n    z[5:, :5] = 3\n    z[5:, 5:] = 4\n    source = intake.open_ndzarr(out, component=\"inner/data\")\n\n    assert (source.read_partition((0, 0)) == 1).all()\n    assert (source.read_partition((1, 1)) == 4).all()\n\n    da = source.to_dask()\n    assert da.npartitions == 4\n    assert (da.compute() == z[:]).all()\n"
  },
  {
    "path": "intake/source/tests/test_text.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\n\nimport pytest\nfrom fsspec import open_files\n\nimport intake\nfrom intake.source import import_name\nfrom intake.source.textfiles import TextFilesSource\n\nhere = os.path.abspath(os.path.dirname(__file__))\n\n\ndef test_textfiles(tempdir):\n    open(os.path.join(tempdir, \"1.txt\"), \"wt\").write(\"hello\\nworld\")\n    open(os.path.join(tempdir, \"2.txt\"), \"wt\").write(\"hello\\nworld\")\n    path = os.path.join(tempdir, \"*.txt\")\n    t = TextFilesSource(path)\n    t.discover()\n    assert t.npartitions == 2\n    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()\n    out = t.read()\n    assert isinstance(out, list)\n    assert out[0] == \"hello\\n\"\n\n\n@pytest.mark.parametrize(\"comp\", [None, \"gzip\", \"bz2\"])\ndef test_complex_text(tempdir, comp):\n    dump, load, read = \"json.dumps\", \"json.loads\", True\n    dump = import_name(dump)\n    data = [{\"something\": \"simple\", \"and\": 0}] * 2\n    for f in [\"1.out\", \"2.out\"]:\n        fn = os.path.join(tempdir, f)\n        with open_files([fn], mode=\"wt\", compression=comp)[0] as fo:\n            if read:\n                fo.write(dump(data))\n            else:\n                dump(data, fo)\n    # that was all setup\n\n    path = os.path.join(tempdir, \"*.out\")\n    t = TextFilesSource(path, text_mode=True, compression=comp, decoder=load)\n    t.discover()\n    assert t.npartitions == 2\n    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()\n    out = t.read()\n    assert isinstance(out, list)\n    assert out[0] == data[0]\n\n\n@pytest.mark.parametrize(\"comp\", [None, \"gzip\", \"bz2\"])\n@pytest.mark.parametrize(\n    \"pars\",\n    [\n        [\"msgpack.pack\", \"msgpack.unpack\", False],\n        [\"msgpack.packb\", \"msgpack.unpackb\", True],\n        [\"pickle.dump\", \"pickle.load\", False],\n        [\"pickle.dumps\", \"pickle.loads\", True],\n    ],\n)\ndef test_complex_bytes(tempdir, comp, pars):\n    dump, load, read = pars\n    dump = import_name(dump)\n    # using bytestrings means not needing extra en/decode argument to msgpack\n    data = [{b\"something\": b\"simple\", b\"and\": 0}] * 2\n    for f in [\"1.out\", \"2.out\"]:\n        fn = os.path.join(tempdir, f)\n        with open_files([fn], mode=\"wb\", compression=comp)[0] as fo:\n            if read:\n                fo.write(dump(data))\n            else:\n                dump(data, fo)\n    # that was all setup\n\n    path = os.path.join(tempdir, \"*.out\")\n    t = TextFilesSource(path, text_mode=False, compression=comp, decoder=load, read=read)\n    t.discover()\n    assert t.npartitions == 2\n    assert t._get_partition(0) == t.to_dask().to_delayed()[0].compute()\n    out = t.read()\n    assert isinstance(out, list)\n    assert out[0] == data[0]\n\n\ndef test_text_persist(temp_cache):\n    cat = intake.open_catalog(os.path.join(here, \"sources.yaml\"))\n    s = cat.sometext()\n    s2 = s.persist()\n    assert s.read() == s2.read()\n\n\ndef test_text_export(temp_cache):\n    import tempfile\n\n    outdir = tempfile.mkdtemp()\n    cat = intake.open_catalog(os.path.join(here, \"sources.yaml\"))\n    s = cat.sometext()\n    out = s.export(outdir)\n    fn = os.path.join(outdir, \"cat.yaml\")\n    with open(fn, \"w\") as f:\n        f.write(out.yaml())\n    cat = intake.open_catalog(fn)\n    s2 = cat[s.name]()\n    assert s.read() == s2.read()\n"
  },
  {
    "path": "intake/source/tests/test_tiled.py",
    "content": "import shlex\nimport subprocess\nimport time\n\nimport pytest\n\nimport intake\n\npytest.importorskip(\"tiled\")\n\nimport httpx  # required by tiled, so will be here\n\n\n@pytest.fixture()\ndef server():\n    cmd = shlex.split(\"tiled serve pyobject --public tiled.examples.generated:tree\")\n    P = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)\n    url = \"http://localhost:8000\"\n    timeout = 20\n    while True:\n        try:\n            r = httpx.get(url)\n            if r.status_code == 200:\n                break\n        except:\n            pass\n        timeout -= 0.1\n        if timeout < 0:\n            P.terminate()\n            out = P.communicate()\n            raise RuntimeError(\"timeout waiting for Tiled server\\n%s\", out)\n        time.sleep(0.1)\n    yield url + \"/api\"\n    P.terminate()\n    P.wait()\n\n\ndef test_simple(server):\n    cat = intake.open_tiled_cat(server)\n    out = cat.tiny_image.read()\n    assert out.shape\n    assert out.all()\n"
  },
  {
    "path": "intake/source/tests/test_utils.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n"
  },
  {
    "path": "intake/source/tests/util.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\n\ndef verify_plugin_interface(plugin):\n    assert isinstance(plugin.version, str)\n    assert isinstance(plugin.container, str)\n    assert isinstance(plugin.partition_access, bool)\n\n\ndef verify_datasource_interface(source):\n    for attr in [\n        \"container\",\n        \"description\",\n        \"dtype\",\n        \"shape\",\n        \"npartitions\",\n        \"metadata\",\n    ]:\n        assert hasattr(source, attr)\n\n    for method in [\n        \"discover\",\n        \"read\",\n        \"read_chunked\",\n        \"read_partition\",\n        \"to_dask\",\n        \"close\",\n    ]:\n        assert hasattr(source, method)\n\n\ndef zscore(s):\n    return (s - s.mean()) / s.std(ddof=0)\n\n\ndef reverse(s):\n    return s[::-1]\n"
  },
  {
    "path": "intake/source/textfiles.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom . import base\nfrom intake.readers.datatypes import Text\nfrom intake.readers.readers import SparkText, FileByteReader\n\n\nclass TextFilesSource(base.DataSource):\n    \"\"\"Read textfiles as sequence of lines\n\n    Prototype of sources reading sequential data.\n\n    Takes a set of files, and returns an iterator over the text in each of them.\n    The files can be local or remote. Extra parameters for encoding, etc.,\n    go into ``storage_options``.\n    \"\"\"\n\n    name = \"textfiles\"\n    version = \"0.0.1\"\n    container = \"python\"\n    partition_access = True\n\n    def __init__(\n        self,\n        urlpath,\n        text_mode=True,\n        text_encoding=\"utf8\",\n        compression=None,\n        decoder=None,\n        metadata=None,\n        storage_options=None,\n    ):\n        \"\"\"\n\n        Parameters\n        ----------\n        urlpath : str or list(str)\n            Target files. Can be a glob-path (with \"*\") and include protocol\n            specified (e.g., \"s3://\"). Can also be a list of absolute paths.\n        text_mode : bool\n            Whether to open the file in text mode, recoding binary\n            characters on the fly\n        text_encoding : str\n            If text_mode is True, apply this encoding. UTF* is by far the most\n            common\n        compression : str or None\n            If given, decompress the file with the given codec on load. Can\n            be something like \"gzip\", \"bz2\", or to try to guess from the filename,\n            'infer'\n        decoder : function, str or None\n            Use this to decode the contents of files. If None, you will get\n            a list of lines of text/bytes. If a function, it must operate on\n            an open file-like object or a bytes/str instance, and return a\n            list\n        storage_options: dict\n            Options to pass to the file reader backend, including text-specific\n            encoding arguments, and parameters specific to the remote\n            file-system driver, if using.\n        \"\"\"\n        if compression:\n            storage_options[\"compression\"] = compression\n        self.data = Text(url=urlpath, storage_options=storage_options, metadata=metadata)\n        self.metadata = metadata\n        self.kwargs = dict(text_mode=text_mode, text_encoding=text_encoding, decoder=decoder)\n\n    def read(self):\n        reader = FileByteReader(self.data)\n        if self.kwargs[\"text_mode\"]:\n            if self.kwargs[\"decoder\"]:\n                reader = reader.apply(self.kwargs[\"decoder\"])\n            else:\n                reader = reader.apply(bytes.decode, encoding=self.kwargs[\"text_encoding\"])\n        return reader.read()\n\n    def to_spark(self):\n        return SparkText(self.data).read()\n"
  },
  {
    "path": "intake/source/tiled.py",
    "content": "from intake.catalog import Catalog\nfrom intake.source import DataSource\n\n\nclass TiledCatalog(Catalog):\n    \"\"\"View Tiled server as a catalog\n\n    See the documentation for setting up such a server at\n    https://blueskyproject.io/tiled/\n\n    A tiled server may contain sources of dataframe, array or xarray type.\n    This driver exposes the full tree as exposed by the server, but you\n    can also specify the sub-path of that tree.\n    \"\"\"\n\n    name = \"tiled_cat\"\n\n    def __init__(self, server, path=None):\n        \"\"\"\n\n        Parameters\n        ----------\n        server: str or tiled.client.node.Node\n            Location of tiles server. Usually of the form \"http[s]://address:port/\"\n            May include a path. If the protocol is \"tiled\", we assume HTTP\n            connection. Alternatively, can be a Node instance, already connected\n            to a server.\n        path: str (optional)\n            If given, restrict the catalog to this part of the server's catalog\n            tree. Equivalent to extending the server URL.\n        \"\"\"\n        from tiled.client import from_uri\n\n        self.path = path\n        if isinstance(server, str):\n            if server.startswith(\"tiled\"):\n                uri = server.replace(\"tiled\", \"http\", 1)\n            else:\n                uri = server\n            client = from_uri(uri, \"dask\")\n        else:\n            client = server\n            uri = server.uri\n        self.uri = uri\n        if path is not None:\n            client = client[path]\n        super().__init__(entries=client, name=\"tiled:\" + uri.split(\":\", 1)[1])\n\n    def search(self, query, type=\"text\"):\n        \"\"\"Full text search\n\n        Queries other than full text will be added later\n        \"\"\"\n        if type == \"text\":\n            from tiled.queries import FullText\n\n            q = FullText(query)\n        else:\n            raise NotImplementedError\n        return TiledCatalog.from_dict(self._entries.search(q), uri=self.uri, path=self.path)\n\n    def __getitem__(self, item):\n        from tiled.client.node import Node\n\n        node = self._entries[item]\n        if isinstance(node, Node):\n            return TiledCatalog(node)\n        else:\n            return TiledSource(uri=self.uri, path=item, instance=node)\n\n\ntypes = {\n    \"DaskArrayClient\": \"ndarray\",\n    \"DaskDataArrayClient\": \"xarray\",\n    \"DaskDatasetClient\": \"xarray\",\n    \"DaskVariableClient\": \"xarray\",\n    \"DaskDataFrameClient\": \"dataframe\",\n}\n\n\nclass TiledSource(DataSource):\n    \"\"\"A source on a Tiled server\n\n    The container type of this source is determined at runtime.\n    The attribute ``.instance`` gives access to the underlying Tiled\n    API, but most users will only call ``.to_dask()``.\n    \"\"\"\n\n    name = \"tiled\"\n\n    def __init__(self, uri=\"\", path=\"\", instance=None, metadata=None):\n        \"\"\"\n\n        Parameters\n        ----------\n        uri: str (optional)\n            Location of the server. If ``instance`` is given, this is\n            only used for the repr\n        pathL str (optional)\n            Path of the data source within the server tree. If ``instance``\n            is given, this is only used for the repr\n        instance: tiled.client.node.None (optional)\n            The tiled object pointing to the data source; normally created\n            by a ``TiledCatalog``\n        metadata: dict\n            Extra metadata for this source; metadata will also be provided\n            by the server.\n        \"\"\"\n        from tiled.client import from_uri\n\n        if instance is None:\n            instance = from_uri(uri, \"dask\")[path].read()\n        self.instance = instance\n        md = dict(instance.metadata)\n        if metadata:\n            md.update(metadata)\n        super().__init__(metadata=md)\n        self.name = path\n        self.container = types[type(self.instance).__name__]\n\n    def discover(self):\n        x = self.to_dask()\n        dt = getattr(x, \"dtype\", None) or getattr(x, \"dtypes\", None)\n        parts = getattr(x, \"npartitions\", None) or x.data.npartitions\n        return dict(\n            dtype=dt,\n            shape=getattr(self.instance.structure().macro, \"shape\", x.shape),\n            npartitions=parts,\n            metadata=self.metadata,\n        )\n\n    def to_dask(self):\n        # cache this?\n        return self.instance.read()\n\n    def read(self):\n        return self.instance.read().compute()\n\n    def _yaml(self):\n        y = super()._yaml()\n        v = list(y[\"sources\"].values())[0]\n        v[\"args\"].pop(\"instance\")\n        return y\n"
  },
  {
    "path": "intake/source/utils.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nfrom hashlib import md5\n\n\ndef tokenize(*args, **kwargs):\n    \"\"\"Deterministic token\n\n    copied from dask\n    \"\"\"\n    hasher = md5(str(tuple(args)).encode())\n    if kwargs:\n        hasher.update(str(args).encode())\n    return hasher.hexdigest()\n\n\ndef _validate_format_spec(format_spec):\n    if format_spec[-1].isalpha():\n        format_spec = format_spec[:-1]\n    if not format_spec.isdigit():\n        raise ValueError(\"Format specifier must have a set width\")\n    return int(format_spec)\n\n\ndef _get_parts_of_format_string(resolved_string, literal_texts, format_specs):\n    \"\"\"\n    Inner function of reverse_format, returns the resolved value for each\n    field in pattern.\n    \"\"\"\n    _text = resolved_string\n    bits = []\n\n    if literal_texts[-1] != \"\" and _text.endswith(literal_texts[-1]):\n        _text = _text[: -len(literal_texts[-1])]\n        literal_texts = literal_texts[:-1]\n        format_specs = format_specs[:-1]\n\n    for i, literal_text in enumerate(literal_texts):\n        if literal_text != \"\":\n            if literal_text not in _text:\n                raise ValueError(\n                    (\"Resolved string must match pattern. \" \"'{}' not found.\".format(literal_text))\n                )\n            bit, _text = _text.split(literal_text, 1)\n            if bit:\n                bits.append(bit)\n        elif i == 0:\n            continue\n        else:\n            try:\n                format_spec = _validate_format_spec(format_specs[i - 1])\n                bits.append(_text[0:format_spec])\n                _text = _text[format_spec:]\n            except Exception:\n                if i == len(format_specs) - 1:\n                    format_spec = _validate_format_spec(format_specs[i])\n                    bits.append(_text[:-format_spec])\n                    bits.append(_text[-format_spec:])\n                    _text = []\n                else:\n                    _validate_format_spec(format_specs[i - 1])\n    if _text:\n        bits.append(_text)\n    if len(bits) > len([fs for fs in format_specs if fs is not None]):\n        bits = bits[1:]\n    return bits\n\n\ndef reverse_format(format_string, resolved_string):\n    \"\"\"\n    Reverse the string method format.\n\n    Given format_string and resolved_string, find arguments that would\n    give ``format_string.format(**arguments) == resolved_string``\n\n    Parameters\n    ----------\n    format_string : str\n        Format template string as used with str.format method\n    resolved_string : str\n        String with same pattern as format_string but with fields\n        filled out.\n\n    Returns\n    -------\n    args : dict\n        Dict of the form {field_name: value} such that\n        ``format_string.(**args) == resolved_string``\n\n    Examples\n    --------\n\n    >>> reverse_format('data_{year}_{month}_{day}.csv', 'data_2014_01_03.csv')\n    {'year': '2014', 'month': '01', 'day': '03'}\n    >>> reverse_format('data_{year:d}_{month:d}_{day:d}.csv', 'data_2014_01_03.csv')\n    {'year': 2014, 'month': 1, 'day': 3}\n    >>> reverse_format('data_{date:%Y_%m_%d}.csv', 'data_2016_10_01.csv')\n    {'date': datetime.datetime(2016, 10, 1, 0, 0)}\n    >>> reverse_format('{state:2}{zip:5}', 'PA19104')\n    {'state': 'PA', 'zip': '19104'}\n\n    See also\n    --------\n    str.format : method that this reverses\n    reverse_formats : method for reversing a list of strings using one pattern\n    \"\"\"\n    from datetime import datetime\n    from string import Formatter\n    from fsspec.implementations.local import make_path_posix\n\n    fmt = Formatter()\n    args = {}\n\n    # ensure that format_string is in posix format\n    format_string = make_path_posix(format_string)\n\n    # split the string into bits\n    literal_texts, field_names, format_specs, conversions = zip(*fmt.parse(format_string))\n    if not any(field_names):\n        return {}\n\n    for i, conversion in enumerate(conversions):\n        if conversion:\n            raise ValueError((\"Conversion not allowed. Found on {}.\".format(field_names[i])))\n\n    # ensure that resolved string is in posix format\n    resolved_string = make_path_posix(resolved_string)\n\n    # get a list of the parts that matter\n    bits = _get_parts_of_format_string(resolved_string, literal_texts, format_specs)\n\n    for i, (field_name, format_spec) in enumerate(zip(field_names, format_specs)):\n        if field_name:\n            try:\n                if format_spec.startswith(\"%\"):\n                    args[field_name] = datetime.strptime(bits[i], format_spec)\n                elif format_spec[-1] in list(\"bcdoxX\"):\n                    args[field_name] = int(bits[i])\n                elif format_spec[-1] in list(\"eEfFgGn\"):\n                    args[field_name] = float(bits[i])\n                elif format_spec[-1] == \"%\":\n                    args[field_name] = float(bits[i][:-1]) / 100\n                else:\n                    args[field_name] = fmt.format_field(bits[i], format_spec)\n            except Exception:\n                args[field_name] = bits[i]\n\n    return args\n\n\ndef reverse_formats(format_string, resolved_strings):\n    \"\"\"\n    Reverse the string method format for a list of strings.\n\n    Given format_string and resolved_strings, for each resolved string\n    find arguments that would give\n    ``format_string.format(**arguments) == resolved_string``.\n\n    Each item in the output corresponds to a new column with the key setting\n    the name and the values representing a mapping from list of resolved_strings\n    to the related value.\n\n    Parameters\n    ----------\n    format_string : str\n        Format template string as used with str.format method\n    resolved_strings : list\n        List of strings with same pattern as format_string but with fields\n        filled out.\n\n    Returns\n    -------\n    args : dict\n        Dict of the form ``{field: [value_0, ..., value_n], ...}`` where values are in\n        the same order as resolved_strings, so:\n        ``format_sting.format(**{f: v[0] for f, v in args.items()}) == resolved_strings[0]``\n\n    Examples\n    --------\n\n    >>> paths = ['data_2014_01_03.csv', 'data_2014_02_03.csv', 'data_2015_12_03.csv']\n    >>> reverse_formats('data_{year}_{month}_{day}.csv', paths)\n    {'year':  ['2014', '2014', '2015'],\n     'month': ['01', '02', '12'],\n     'day':   ['03', '03', '03']}\n    >>> reverse_formats('data_{year:d}_{month:d}_{day:d}.csv', paths)\n    {'year': [2014, 2014, 2015], 'month': [1, 2, 12], 'day': [3, 3, 3]}\n    >>> reverse_formats('data_{date:%Y_%m_%d}.csv', paths)\n    {'date': [datetime.datetime(2014, 1, 3, 0, 0),\n              datetime.datetime(2014, 2, 3, 0, 0),\n              datetime.datetime(2015, 12, 3, 0, 0)]}\n    >>> reverse_formats('{state:2}{zip:5}', ['PA19104', 'PA19143', 'MA02534'])\n    {'state': ['PA', 'PA', 'MA'], 'zip': ['19104', '19143', '02534']}\n\n    See also\n    --------\n    str.format : method that this reverses\n    reverse_format : method for reversing just one string using a pattern\n    \"\"\"\n    from string import Formatter\n\n    fmt = Formatter()\n\n    # get the fields from the format_string\n    field_names = [i[1] for i in fmt.parse(format_string) if i[1]]\n\n    # itialize the args dict with an empty dict for each field\n    args = {field_name: [] for field_name in field_names}\n    for resolved_string in resolved_strings:\n        for field, value in reverse_format(format_string, resolved_string).items():\n            args[field].append(value)\n\n    return args\n"
  },
  {
    "path": "intake/source/zarr.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nfrom .base import DataSource\nfrom intake.readers.datatypes import Zarr\nfrom intake.readers.readers import DaskZarr, NumpyZarr\n\n\nclass ZarrArraySource(DataSource):\n    \"\"\"Read Zarr format files into an array\n\n    Zarr is an numerical array storage format which works particularly well\n    with remote and parallel access.\n    For specifics of the format, see https://zarr.readthedocs.io/en/stable/\n    \"\"\"\n\n    container = \"ndarray\"\n    name = \"ndzarr\"\n    version = \"0.0.1\"\n    partition_access = True\n\n    def __init__(self, urlpath, storage_options=None, component=None, metadata=None):\n        \"\"\"\n\n        Parameters\n        ----------\n        urlpath : str\n            Location of data file(s), possibly including protocol\n            information\n        storage_options : dict\n            Passed on to storage backend for remote files\n        component : str or None\n            If None, assume the URL points to an array. If given, assume\n            the URL points to a group, and descend the group to find the\n            array at this location in the hierarchy; components are separated\n            by the \"/\" character.\n        \"\"\"\n        self.data = Zarr(\n            url=urlpath,\n            storage_options=storage_options,\n            root=component,\n            metadata=metadata,\n        )\n        self.metadata = metadata\n\n    def to_dask(self):\n        return DaskZarr(self.data).read()\n\n    def read(self):\n        return NumpyZarr(self.data).read()\n"
  },
  {
    "path": "intake/tests/__init__.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n"
  },
  {
    "path": "intake/tests/catalog1.yml",
    "content": "sources:\n  ex1:\n    description: this source doesn't work\n    driver: csv\n    args: {}\n  ex2:\n    description: this source doesn't work\n    metadata:\n      foo: 'bar'\n      bar: [1, 2, 3]\n    driver: csv\n    args: {}\n"
  },
  {
    "path": "intake/tests/catalog2.yml",
    "content": "sources:\n  ex3:\n    description: this source doesn't work\n    driver: csv\n    args: {}\n  ex4:\n    description: this source doesn't work\n    metadata:\n      foo: 'bar'\n      bar: [1, 2, 3]\n    driver: csv\n    args: {}\n"
  },
  {
    "path": "intake/tests/catalog_inherit_params.yml",
    "content": "---\nmetadata:\n  version: 1\n  parameters:\n    bucket:\n      type: str\n      description: description\n      default: test_bucket\nsources:\n  param:\n    driver: parquet\n    description: description\n    args:\n      urlpath: s3://{{bucket}}/file.parquet\n  local_param_overwrites:\n    driver: parquet\n    description: description\n    parameters:\n      bucket:\n        type: str\n        description: description\n        default: local_param\n    args:\n      urlpath: s3://{{bucket}}/file.parquet\n  local_and_global_params:\n    driver: parquet\n    description: description\n    parameters:\n      filename:\n        type: str\n        description: description\n        default: local_filename.parquet\n    args:\n      urlpath: s3://{{bucket}}/{{filename}}\n  subcat:\n    driver: yaml_file_cat\n    args:\n      path: \"{{CATALOG_DIR}}/catalog_nested_sub.yml\"\n      user_parameters:\n        inner:\n          type: str\n          description: description\n          default: test_name\n"
  },
  {
    "path": "intake/tests/catalog_nested.yml",
    "content": "sources:\n  nested:\n    description: References catalog_nested_sub.yml\n    driver: yaml_file_cat\n    args:\n      path: \"{{ CATALOG_DIR }}/__unit_test_catalog_nested_sub.yml\"\n"
  },
  {
    "path": "intake/tests/catalog_nested_sub.yml",
    "content": "sources:\n  ex1:\n    description: this is a sub-resource\n    driver: csv\n    args:\n      urlpath: \"\"\n  ex2:\n    description: this is a sub-resource\n    driver: csv\n    args:\n      urlpath: \"{{bucket}}/{{inner}}\"\n"
  },
  {
    "path": "intake/tests/test_config.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\n\nimport pytest\n\nfrom intake import config\nfrom intake.config import Config, defaults\nfrom intake.util_tests import temp_conf\n\n\n@pytest.mark.parametrize(\"conf\", [{}, {\"port\": 5000}, {\"other\": True}])\ndef test_load_conf(conf):\n    # This test will only work if your config is set to default\n    inconf = defaults.copy()\n    expected = inconf.copy()\n    with temp_conf(conf) as fn:\n        config = Config(fn)\n        config.load(fn)\n        expected.update(conf)\n        assert dict(config) == expected\n        config.reset()\n        assert dict(config) == inconf\n\n\n@pytest.mark.skipif(os.name == \"nt\", reason=\"Paths are different on win\")\ndef test_pathdirs():\n    assert config.intake_path_dirs([]) == []\n    assert config.intake_path_dirs([\"paths\"]) == [\"paths\"]\n    assert config.intake_path_dirs(\"\") == [\"\"]\n    assert config.intake_path_dirs(\"path1:path2\") == [\"path1\", \"path2\"]\n    assert config.intake_path_dirs(\"memory://path1:memory://path2\") == [\n        \"memory://path1\",\n        \"memory://path2\",\n    ]\n\n\n@pytest.mark.parametrize(\n    \"conf\",\n    [\n        {\"environment_conf_parse\": \"error\"},\n        {\"environment_conf_parse\": \"warn\"},\n        {\"environment_conf_parse\": \"ignore\"},\n    ],\n)\ndef test_load_env(conf):\n    # test the parsing of environment variables as strings\n    os.environ[\"INTAKE_CACHE\"] = \"./tmp\"  # this causes a SyntaxError\n    os.environ[\"INTAKE_CACHE2\"] = \"tmp\"  # this causes a ValueError\n    with temp_conf(conf) as fn:\n        if conf[\"environment_conf_parse\"] == \"error\":\n            # When raise_on_error is True, ensure the exception is raised\n            with pytest.raises(ValueError, SyntaxError):\n                Config(fn)\n        elif conf[\"environment_conf_parse\"] == \"warn\":\n            # When raise_on_error is False, ensure the variable is parsed as a string\n            with pytest.warns(UserWarning, match=\"environment variable\"):\n                config = Config(fn)\n            assert config[\"cache\"] == \"./tmp\"\n            assert config[\"cache2\"] == \"tmp\"\n        else:\n            with pytest.warns(None) as warnings:\n                config = Config(fn)\n            assert not warnings\n            assert config[\"cache\"] == \"./tmp\"\n            assert config[\"cache2\"] == \"tmp\"\n"
  },
  {
    "path": "intake/tests/test_top_level.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\nimport posixpath\nimport sys\nimport tempfile\nimport time\n\nimport platformdirs\nimport pytest\n\nimport intake\nimport intake.catalog.local\n\nfrom .test_utils import copy_test_file\n\n\n@pytest.fixture\ndef user_catalog():\n    target_catalog = copy_test_file(\n        \"catalog1.yml\", platformdirs.user_data_dir(appname=\"intake\", appauthor=\"intake\")\n    )\n    yield target_catalog\n    # Remove the file, but not the directory (because there might be other\n    # files already there)\n    os.remove(target_catalog)\n\n\n@pytest.fixture\ndef tmp_path_catalog():\n    tmp_path = posixpath.join(tempfile.gettempdir(), \"intake\")\n    try:\n        os.makedirs(tmp_path)\n    except:\n        pass\n    target_catalog = copy_test_file(\"catalog1.yml\", tmp_path)\n    yield target_catalog\n    # Remove the file, but not the directory (because there might be other\n    # files already there)\n    os.remove(target_catalog)\n\n\ndef test_autoregister_open():\n    assert hasattr(intake, \"open_csv\")\n\n\ndef test_default_catalogs():\n    # No assumptions about contents of these catalogs.\n    # Just make sure they exist and don't raise exceptions\n    list(intake.cat)\n\n\ndef test_user_catalog(user_catalog):\n    cat = intake.load_combo_catalog()\n    assert set(cat) >= set([\"ex1\", \"ex2\"])\n\n\ndef test_open_styles(tmp_path_catalog):\n    cat = intake.catalog.local.YAMLFileCatalog(tmp_path_catalog)\n    cat2 = intake.open_catalog(tmp_path_catalog)\n    assert list(cat) == list(cat2)\n    cat2 = intake.open_catalog([tmp_path_catalog])\n    assert list(cat) == list(cat2)\n    cat2 = intake.open_catalog(os.path.join(os.path.dirname(tmp_path_catalog), \"*\"))\n    assert list(cat) == list(cat2)\n    assert type(cat2).name == \"yaml_files_cat\"\n    cat2 = intake.open_catalog(os.path.dirname(tmp_path_catalog))\n    assert list(cat) == list(cat2)\n    assert type(cat2).name == \"yaml_files_cat\"\n    cat2 = intake.open_yaml_file_cat(tmp_path_catalog)\n    assert list(cat) == list(cat2)\n    cat2 = intake.open_yaml_files_cat([tmp_path_catalog])\n    assert list(cat) == list(cat2)\n    cat2 = intake.open_yaml_files_cat(os.path.join(os.path.dirname(tmp_path_catalog), \"*\"))\n    assert list(cat) == list(cat2)\n\n\ndef test_path_catalog(tmp_path_catalog):\n    intake.config.conf[\"catalog_path\"] = [posixpath.join(tempfile.gettempdir(), \"intake\")]\n    cat = intake.load_combo_catalog()\n    time.sleep(2)  # wait 2 seconds for catalog to refresh\n    assert set(cat) >= set([\"ex1\", \"ex2\"])\n    del intake.config.conf[\"catalog_path\"]\n\n\ndef test_bad_open():\n    with pytest.raises(ValueError):\n        # unknown driver\n        intake.open_catalog(\"\", driver=\"unknown\")\n    with pytest.raises(ValueError):\n        # bad URI type (NB falsish values become empty catalogs)\n        intake.open_catalog(True)\n    # default empty catalog\n    assert intake.open_catalog() == intake.open_catalog(None)\n\n\ndef test_bad_open_helptext():\n    with pytest.raises(ValueError) as val_err:\n        # unknown driver\n        intake.open_catalog(\"\", driver=\"unknown\")\n    assert \"plugin directory\" in str(val_err.value).lower()\n\n    with pytest.raises(AttributeError) as attr_err:\n        intake.open_not_a_real_plugin()\n    assert \"plugin directory\" in str(attr_err.value).lower()\n\n\ndef test_output_notebook():\n    pytest.importorskip(\"hvplot\")\n    intake.output_notebook()\n\n\ndef test_old_usage():\n    assert isinstance(intake.Catalog(), intake.Catalog)\n    assert intake.Catalog is intake.catalog.base.Catalog\n\n\ndef test_no_imports():\n    mods = [mod for mod in sys.modules if mod.startswith(\"intake\")]\n    [sys.modules.pop(mod) for mod in mods]\n\n    import intake  # noqa\n\n    assert \"intake\" in sys.modules\n\n    for mod in [\n        \"intake.tests\",\n        \"intake.interface\",\n        \"intake.source.csv\",\n        \"intake.cli\",\n        \"intake.auth\",\n    ]:\n        assert mod not in sys.modules\n\n\n@pytest.fixture\ndef tmp_path_catalog_nested():\n    with tempfile.TemporaryDirectory() as tmp_dir:\n        tmp_path = posixpath.join(tmp_dir, \"intake\")\n        target_catalog = copy_test_file(\"catalog_nested.yml\", tmp_path)\n        copy_test_file(\"catalog_nested_sub.yml\", tmp_path)\n        yield target_catalog\n\n\ndef test_nested_catalog_access(tmp_path_catalog_nested):\n    cat = intake.open_catalog(tmp_path_catalog_nested)\n    entry1 = cat.nested.ex1\n    entry2 = cat[\"nested.ex1\"]\n    entry3 = cat[[\"nested\", \"ex1\"]]\n    entry4 = cat[\"nested\", \"ex1\"]\n    assert entry1 == entry2 == entry3 == entry4\n"
  },
  {
    "path": "intake/tests/test_utils.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\nimport os\nimport posixpath\nimport shutil\n\nimport pytest\nimport yaml\n\nfrom intake.utils import make_path_posix, no_duplicate_yaml\n\n\ndef test_windows_file_path():\n    path = \"C:\\\\Users\\\\user\\\\fake.file\"\n    actual = make_path_posix(path)\n    expected = \"C:/Users/user/fake.file\"\n    assert actual == expected\n\n\ndef test_make_path_posix_removes_double_sep():\n    path = \"user//fake.file\"\n    actual = make_path_posix(path)\n    expected = \"user/fake.file\"\n    assert actual == expected\n\n\n@pytest.mark.parametrize(\n    \"path\",\n    [\n        \"~/fake.file\",\n        \"https://example.com\",\n    ],\n)\ndef test_noops(path):\n    \"\"\"For non windows style paths, make_path_posix should be a noop\"\"\"\n    assert make_path_posix(path) == path\n\n\ndef test_roundtrip_file_path():\n    path = os.path.dirname(__file__)\n    actual = make_path_posix(path)\n    assert \"\\\\\" not in actual\n    assert os.path.samefile(actual, path)\n\n\ndef test_yaml_tuples():\n    data = (1, 2)\n    text = yaml.dump(data)\n    with no_duplicate_yaml():\n        assert yaml.safe_load(text) == data\n\n\ndef copy_test_file(filename, target_dir):\n    if not os.path.exists(target_dir):\n        os.makedirs(target_dir)  # can't use exist_ok in Python 2.7\n    target_dir = make_path_posix(target_dir)\n    # Put a catalog file in the user catalog directory\n    test_dir = make_path_posix(os.path.dirname(__file__))\n    test_catalog = posixpath.join(test_dir, filename)\n    target_catalog = posixpath.join(target_dir, \"__unit_test_\" + filename)\n\n    shutil.copyfile(test_catalog, target_catalog)\n    return target_catalog\n"
  },
  {
    "path": "intake/util_tests.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport os\nimport shutil\nimport tempfile\nfrom contextlib import contextmanager\nimport yaml\n\n\n@contextmanager\ndef tempdir():\n    d = tempfile.mkdtemp()\n    try:\n        yield d\n    finally:\n        if os.path.exists(d):\n            shutil.rmtree(d)\n\n\n@contextmanager\ndef temp_conf(conf):\n    with tempdir() as d:\n        fn = os.path.join(d, \"conf.yaml\")\n        with open(fn, \"w\") as f:\n            yaml.dump(conf, f)\n        yield fn\n"
  },
  {
    "path": "intake/utils.py",
    "content": "# -----------------------------------------------------------------------------\n# Copyright (c) 2012 - 2018, Anaconda, Inc. and Intake contributors\n# All rights reserved.\n#\n# The full license is in the LICENSE file, distributed with this software.\n# -----------------------------------------------------------------------------\n\nimport collections\nimport collections.abc\nimport datetime\nimport importlib\nimport logging\nimport os\nimport sys\nimport warnings\nfrom collections import OrderedDict\nfrom contextlib import contextmanager\n\nimport yaml\n\nlogger = logging.getLogger(\"intake\")\n\n\ndef import_name(name):\n    modname = name.split(\":\", 1)[0]\n    logger.debug(\"Importing: '%s'\" % modname)\n    mod = importlib.import_module(modname)\n    if \":\" in name:\n        end = name.split(\":\")[1]\n        for bit in end.split(\".\"):\n            mod = getattr(mod, bit)\n    return mod\n\n\ndef make_path_posix(path):\n    \"\"\"Make path generic\"\"\"\n    # TODO: migrate to fsspec' implementation\n    if \"://\" in path:\n        return path\n    return path.replace(\"\\\\\", \"/\").replace(\"//\", \"/\")\n\n\ndef no_duplicates_constructor(loader, node, deep=False):\n    \"\"\"Check for duplicate keys while loading YAML\n\n    https://gist.github.com/pypt/94d747fe5180851196eb\n    \"\"\"\n\n    mapping = {}\n    for key_node, value_node in node.value:\n        key = loader.construct_object(key_node, deep=deep)\n        value = loader.construct_object(value_node, deep=deep)\n        if key in mapping:\n            from intake.catalog.exceptions import DuplicateKeyError\n\n            raise DuplicateKeyError(\n                \"while constructing a mapping\",\n                node.start_mark,\n                \"found duplicate key (%s)\" % key,\n                key_node.start_mark,\n            )\n        mapping[key] = value\n\n    return loader.construct_mapping(node, deep)\n\n\ndef tuple_constructor(loader, node, deep=False):\n    return tuple(loader.construct_object(node, deep=deep) for node in node.value)\n\n\ndef represent_dictionary_order(self, dict_data):\n    return self.represent_mapping(\"tag:yaml.org,2002:map\", dict_data.items())\n\n\nyaml.add_representer(OrderedDict, represent_dictionary_order)\n\n\n@contextmanager\ndef no_duplicate_yaml():\n    yaml.SafeLoader.add_constructor(\n        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, no_duplicates_constructor\n    )\n    yaml.SafeLoader.add_constructor(\"tag:yaml.org,2002:python/tuple\", tuple_constructor)\n    try:\n        yield\n    finally:\n        yaml.SafeLoader.add_constructor(\n            yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,\n            yaml.constructor.SafeConstructor.construct_yaml_map,\n        )\n\n\ndef yaml_load(stream):\n    \"\"\"Parse YAML in a context where duplicate keys raise exception\"\"\"\n    with no_duplicate_yaml():\n        return yaml.safe_load(stream)\n\n\ndef classname(ob):\n    \"\"\"Get the object's class's name as package.module.Class\"\"\"\n    import inspect\n\n    if inspect.isclass(ob):\n        return \".\".join([ob.__module__, ob.__name__])\n    else:\n        return \".\".join([ob.__class__.__module__, ob.__class__.__name__])\n\n\nclass DictSerialiseMixin(object):\n    __tok_cache = None\n\n    def __new__(cls, *args, **kwargs):\n        \"\"\"Capture creation args when instantiating\"\"\"\n        o = object.__new__(cls)\n        o._captured_init_args = args\n        o._captured_init_kwargs = kwargs\n\n        return o\n\n    @property\n    def classname(self):\n        return classname(self)\n\n    def __dask_tokenize__(self):\n        if self.__tok_cache is None:\n            from intake.source.utils import tokenize\n\n            self.__tok_cache = tokenize(self)\n        return self.__tok_cache\n\n    def __getstate__(self):\n        args = [\n            arg.__getstate__() if isinstance(arg, DictSerialiseMixin) else arg\n            for arg in self._captured_init_args\n        ]\n        # We employ OrderedDict in several places. The motivation\n        # is to speed up dask tokenization. When dask tokenizes a plain dict,\n        # it sorts the keys, and it turns out that this sort operation\n        # dominates the call time, even for very small dicts. Using an\n        # OrderedDict steers dask toward a different and faster tokenization.\n        kwargs = collections.OrderedDict(\n            {\n                k: arg.__getstate__() if isinstance(arg, DictSerialiseMixin) else arg\n                for k, arg in self._captured_init_kwargs.items()\n            }\n        )\n        return collections.OrderedDict(cls=self.classname, args=args, kwargs=kwargs)\n\n    def __setstate__(self, state):\n        # reconstitute instances here\n        self._captured_init_kwargs = state[\"kwargs\"]\n        self._captured_init_args = state[\"args\"]\n        state.pop(\"cls\", None)\n        self.__init__(*state[\"args\"], **state[\"kwargs\"])\n\n    def __hash__(self):\n        from intake.source.utils import tokenize\n\n        return int(tokenize(self), 16)\n\n    def __eq__(self, other):\n        return hash(self) == hash(other)\n\n\ndef remake_instance(data):\n    import importlib\n\n    if isinstance(data, str):\n        data = {\"cls\": data}\n    else:\n        data = data.copy()\n    mod, klass = data.pop(\"cls\").rsplit(\".\", 1)\n    module = importlib.import_module(mod)\n    cl = getattr(module, klass)\n    return cl(*data.get(\"args\", ()), **data.get(\"kwargs\", {}))\n\n\ndef pretty_describe(object, nestedness=0, indent=2):\n    \"\"\"Maintain dict ordering - but make string version prettier\"\"\"\n    if not isinstance(object, dict):\n        return str(object)\n    sep = f'\\n{\" \" * nestedness * indent}'\n    out = sep.join(f\"{k}: {pretty_describe(v, nestedness + 1)}\" for k, v in object.items())\n    if nestedness > 0 and out:\n        return f\"{sep}{out}\"\n    return out\n\n\ndef decode_datetime(obj):\n    import numpy\n\n    if not isinstance(obj, numpy.ndarray) and \"__datetime__\" in obj:\n        try:\n            obj = datetime.datetime.strptime(\n                obj[\"as_str\"],\n                \"%Y%m%dT%H:%M:%S.%f%z\",\n            )\n        except ValueError:  # Perhaps lacking tz info\n            obj = datetime.datetime.strptime(\n                obj[\"as_str\"],\n                \"%Y%m%dT%H:%M:%S.%f\",\n            )\n    return obj\n\n\ndef encode_datetime(obj):\n    if isinstance(obj, datetime.datetime):\n        return {\"__datetime__\": True, \"as_str\": obj.strftime(\"%Y%m%dT%H:%M:%S.%f%z\")}\n    return obj\n\n\nclass RegistryView(collections.abc.Mapping):\n    \"\"\"\n    Wrap registry dict in a read-only dict view.\n\n    Subclasses define attributes filled into warning and error messages:\n    - self._registry_name\n    - self._register_func_name\n    - self._unregister_func_name\n    \"\"\"\n\n    def __init__(self, registry):\n        self._registry = registry\n\n    def __repr__(self):\n        return f\"{self.__class__.__name__}({self._registry!r})\"\n\n    def __getitem__(self, key):\n        return self._registry[key]\n\n    def __iter__(self):\n        yield from self._registry\n\n    def __len__(self):\n        return len(self._registry)\n\n    # Support the common mutation methods for now, but warn.\n\n    def update(self, *args, **kwargs):\n        warnings.warn(\n            f\"In a future release of intake, the {self._registry_name} will \"\n            f\"not be directly mutable. Use {self._register_func_name}.\",\n            DeprecationWarning,\n        )\n        self._registry.update(*args, **kwargs)\n        # raise TypeError(\n        #     f\"The registry cannot be edited directly. \"\n        #     f\"Instead, use the {self._register_func_name{\")\n\n    def __setitem__(self, key, value):\n        warnings.warn(\n            f\"In a future release of intake, the {self._registry_name} will \"\n            f\"not be directly mutable. Use {self._register_func_name}.\",\n            DeprecationWarning,\n        )\n        self._registry[key] = value\n        # raise TypeError(\n        #     f\"The registry cannot be edited directly. \"\n        #     f\"Instead, use the {self._register_func_name{\")\n\n    def __delitem__(self, key):\n        warnings.warn(\n            f\"In a future release of intake, the {self._registry_name} will \"\n            f\"not be directly mutable. Use {self._unregister_func_name}.\",\n            DeprecationWarning,\n        )\n        del self._registry[key]\n        # raise TypeError(\n        #     f\"The registry cannot be edited directly. \"\n        #     f\"Instead, use the {self._unregister_func_name{\")\n\n\nclass DriverRegistryView(RegistryView):\n    # This attributes are used by the base class\n    # to fill in warning and error messages.\n    _registry_name = \"intake.registry\"\n    _register_func_name = \"intake.register_driver\"\n    _unregister_func_name = \"intake.unregister_driver\"\n\n\nclass ContainerRegistryView(RegistryView):\n    # This attributes are used by the base class\n    # to fill in warning and error messages.\n    _registry_name = \"intake.container_map\"\n    _register_func_name = \"intake.register_container\"\n    _unregister_func_name = \"intake.unregister_container\"\n\n\nclass ModuleImporter:\n    def __init__(self, destination):\n        self.destination = destination\n        self.module = None\n\n    def __getattribute__(self, item):\n        d = object.__getattribute__(self, \"__dict__\")\n        if item in d:\n            return d[item]\n        if self.module is None:\n            print(\"Importing module: \", self.destination)\n            self.module = __import__(self.destination)\n        else:\n            print(\"Referencing module: \", self.destination)\n        sys.modules[self.destination] = self.module\n        return getattr(self.module, item)\n\n\ndef is_notebook() -> bool:\n    \"\"\"Check if code is running in a notebook\n\n    Copied from tqdm.autonotebook\n\n    Returns\n    -------\n    bool\n        True if inside a notebook. False if not inside a notebook.\n    \"\"\"\n\n    try:\n        get_ipython = sys.modules[\"IPython\"].get_ipython\n        if \"IPKernelApp\" not in get_ipython().config:  # pragma: no cover\n            raise ImportError(\"console\")\n        if \"VSCODE_PID\" in os.environ:  # pragma: no cover\n            raise ImportError(\"vscode\")\n        return True\n    except Exception:\n        return False\n\n\ndef is_fsspec_url(s: str) -> bool:\n    \"\"\"Simple test to see if given string is likely an fsspec URL\"\"\"\n    return \"://\" in s or \"::\" in s\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[tool.black]\nline-length = 100\n\n[tool.ruff]\nline-length = 130\nignore = [\"E402\"]  # top of file imports\n\n[tool.ruff.per-file-ignores]\n\"*/tests/*\" = [\"E722\"]  # bare `except`\n\"intake/util_tests.py\" = [\"E722\"]  # bare `except`\n\"__init__.py\" = [\"F401\"]  # imported but unused\n\n[tool.isort]\nprofile = \"black\"\n\n[build-system]\nrequires = [\"setuptools>=64\", \"setuptools_scm>=8\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"intake\"\ndynamic = [\"version\"]\ndescription = \"Data catalog, search and load\"\nreadme = \"README.md\"\nrequires-python = \">=3.10\"\nlicense = {text = \"MIT\"}\nauthors = [\n  {name = \"Martin Durant\", email = \"martin.durant@alumni.utoronto.ca\"},\n]\nclassifiers = [\n  \"License :: OSI Approved :: MIT License\",\n  \"Operating System :: OS Independent\",\n  \"Programming Language :: Python\",\n  \"Development Status :: 4 - Beta\",\n]\ndependencies = [\n  \"fsspec >=2023.0.0\",\n  \"pyyaml\",\n  \"platformdirs\",\n  \"networkx\"\n]\n\n[tool.setuptools_scm]\nversion_file = \"intake/_version.py\"\n\n[tool.setuptools]\ninclude-package-data = false\nzip-safe = false\n\n[tool.setuptools.packages.find]\nexclude = [\"*tests*\", \"dist\", \"*examples*\", \"*scripts*\", \"*docs*\"]\nnamespaces = false\n\n[flake8]\nexclude = \"__init__.py\"\nignore = \"E,W\"\nselect = \"F,E101,E111,E501\"\nmax-line-length = 100\n\n[project.entry-points.\"intake.drivers\"]\ncsv = \"intake.source.csv:CSVSource\"\njsonfiles = \"intake.source.jsonfiles:JSONFileSource\"\nnumpy = \"intake.source.npy:NPySource\"\ntiled_cat = \"intake.source.tiled:TiledCatalog\"\ntextfiles = \"intake.source.textfiles:TextFilesSource\"\nndzarr = \"intake.source.zarr:ZarrArraySource\"\n"
  },
  {
    "path": "readthedocs.yml",
    "content": "version: 2\n\nformats: all\n\nbuild:\n  os: ubuntu-22.04\n  tools:\n    python: mambaforge-4.10\n\nconda:\n  environment: docs/environment.yml\n\npython:\n  install:\n    - method: pip\n      path: .\n\nsphinx:\n  configuration: docs/source/conf.py\n"
  },
  {
    "path": "scripts/ci/environment-pip.yml",
    "content": "name: test_env\nchannels:\n  - defaults\ndependencies:\n  - python=3.10\n  - pip\n  - pip:\n      - rangehttpserver\n      - aiohttp\n      - flask\n      - appdirs\n      - dask\n      - jinja2\n      - numpy\n      - pyyaml\n      - requests\n      - msgpack-numpy\n      - pytest-cov\n      - coveralls\n      - pytest\n      - fsspec\n      - intake-parquet\n      - zarr\n      - notebook\n      - panel\n      - hvplot\n      - bokeh\n      - dask\n      - h5netcdf\n      - h5py < 3.15  # due to HDF error, see netcdf4 Issue #1438\n      - intake\n      - netcdf4\n      - pip\n      - pydap\n      - pytest\n      - rasterio\n      - s3fs\n      - scikit-image\n      - xarray\n      - zarr\n      - moto\n      - toolz\n      - pre-commit\n      - siphon\n"
  },
  {
    "path": "scripts/ci/environment-py310.yml",
    "content": "name: test_env\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.10\n  - aiohttp\n  - flask\n  - appdirs\n  - dask\n  - jinja2\n  - numpy\n  - pyyaml\n  - requests\n  - msgpack-numpy\n  - pytest\n  - fsspec\n  - intake-parquet\n  - zarr\n  - notebook\n  - panel\n  - hvplot\n  - bokeh\n  - dask\n  - intake\n  - pip\n  - pytest\n  - rasterio\n  - s3fs\n  - h5netcdf\n  - xarray\n  - zarr\n  - moto\n  - httpx\n  - typer\n  - pydantic >=1.8\n  - sqlalchemy\n  - click\n  - rangehttpserver\n  - pre-commit\n  - sqlalchemy\n  - python-duckdb\n  - pytest-postgresql\n  - psycopg\n  - siphon\n  - postgresql\n"
  },
  {
    "path": "scripts/ci/environment-py311.yml",
    "content": "name: test_env\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.11\n  - aiohttp\n  - flask\n  - appdirs\n  - dask\n  - jinja2\n  - numpy\n  - pyyaml\n  - requests\n  - msgpack-numpy\n  - pytest\n  - fsspec\n  - intake-parquet\n  - zarr\n  - notebook\n  - panel\n  - hvplot\n  - bokeh\n  - dask\n  - intake\n  - pip\n  - pytest\n  - rasterio\n  - s3fs\n  - xarray\n  - h5netcdf\n  - zarr\n  - moto\n  - httpx\n  - typer\n  - pydantic >=1.8\n  - sqlalchemy\n  - click\n  - rangehttpserver\n  - pre-commit\n  - pytest-postgresql\n  - psycopg\n  - python-duckdb\n  - siphon\n  - postgresql\n"
  },
  {
    "path": "scripts/ci/environment-py312.yml",
    "content": "name: test_env\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.12\n  - aiohttp\n  - flask\n  - appdirs\n  - dask\n  - jinja2\n  - numpy\n  - pyyaml\n  - requests\n  - msgpack-numpy\n  - pytest\n  - fsspec\n  - intake-parquet\n  - zarr\n  - notebook\n  - panel\n  - hvplot\n  - bokeh\n  - dask\n  - intake\n  - pip\n  - pytest\n  - rasterio\n  - s3fs\n  - xarray\n  - h5netcdf\n  - zarr\n  - moto\n  - httpx\n  - typer\n  - pydantic >=1.8\n  - sqlalchemy\n  - click\n  - rangehttpserver\n  - pre-commit\n  - pytest-postgresql\n  - psycopg\n  - python-duckdb\n  - siphon\n  - postgresql\n"
  },
  {
    "path": "scripts/ci/environment-py313.yml",
    "content": "name: test_env\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.13\n  - aiohttp\n  - flask\n  - appdirs\n  - dask\n  - jinja2\n  - numpy\n  - pyyaml\n  - requests\n  - msgpack-numpy\n  - pytest-cov\n  - coveralls\n  - pytest\n  - fsspec\n  - intake-parquet\n  - zarr\n  - notebook\n  - panel\n  - hvplot\n  - bokeh\n  - dask\n  - h5netcdf\n  - intake\n  - netcdf4\n  - pip\n  - pydap\n  - pytest\n  - rasterio\n  - s3fs\n  - scikit-image\n  - xarray\n  - zarr\n  - moto\n  - rangehttpserver\n  - pre-commit\n  - sqlalchemy\n  - python-duckdb\n  - pytest-postgresql <7\n  - psycopg\n  - siphon\n  - postgresql\n"
  },
  {
    "path": "scripts/ci/environment-py314.yml",
    "content": "name: test_env\nchannels:\n  - conda-forge\ndependencies:\n  - python=3.14\n  - aiohttp\n  - flask\n  - appdirs\n  - dask\n  - jinja2\n  - numpy\n  - pyyaml\n  - requests\n  - msgpack-numpy\n  - pytest\n  - fsspec\n  - intake-parquet\n  - zarr\n  - notebook\n  - panel\n  - hvplot\n  - bokeh\n  - intake\n  - pip\n  - pytest\n  - rasterio\n  - s3fs\n  - xarray\n  - zarr\n  - moto\n  - httpx\n  - typer\n  - pydantic >=1.8\n  - sqlalchemy\n  - click\n  - rangehttpserver\n  - pre-commit\n  - sqlalchemy\n  - python-duckdb\n  - pytest-postgresql\n  - psycopg\n  - siphon\n  - postgresql\n"
  }
]