Repository: ckan/ckanapi Branch: master Commit: bedd91d1d410 Files: 38 Total size: 168.7 KB Directory structure: gitextract_v9a2d6s5/ ├── .github/ │ └── workflows/ │ ├── publish-pypi.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── COPYING ├── COPYING.fr ├── README.md ├── ckanapi/ │ ├── __init__.py │ ├── cli/ │ │ ├── __init__.py │ │ ├── action.py │ │ ├── batch.py │ │ ├── ckan_click.py │ │ ├── delete.py │ │ ├── dump.py │ │ ├── load.py │ │ ├── main.py │ │ ├── search.py │ │ ├── utils.py │ │ └── workers.py │ ├── common.py │ ├── datapackage.py │ ├── errors.py │ ├── localckan.py │ ├── remoteckan.py │ ├── testappckan.py │ ├── tests/ │ │ ├── __init__.py │ │ ├── mock/ │ │ │ └── mock_ckan.py │ │ ├── test_call.py │ │ ├── test_cli_action.py │ │ ├── test_cli_dump.py │ │ ├── test_cli_load.py │ │ ├── test_cli_workers.py │ │ ├── test_datapackage.py │ │ └── test_remote.py │ └── version.py ├── examples/ │ └── update_single_field.py ├── pyproject.toml └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/publish-pypi.yml ================================================ name: Publish to PyPI # Publish to PyPI when a tag is pushed on: push: tags: - 'ckanapi-**' jobs: build: if: github.repository == 'ckan/ckanapi' name: Build distribution runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.11" - name: Install pypa/build run: python3 -m pip install build --user - name: Build a binary wheel and a source tarball run: python3 -m build - name: Store the distribution packages uses: actions/upload-artifact@v4 with: name: python-package-distributions path: dist/ publish-to-pypi: name: Publish Python distribution on PyPI needs: - build runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/ckanapi permissions: id-token: write steps: - name: Download all the dists uses: actions/download-artifact@v4 with: name: python-package-distributions path: dist/ - name: Publish distribution to PyPI uses: pypa/gh-action-pypi-publish@release/v1 publishSkipped: if: github.repository != 'ckan/ckanapi' runs-on: ubuntu-latest steps: - run: | echo "## Skipping PyPI publish on downstream repository" >> $GITHUB_STEP_SUMMARY ================================================ FILE: .github/workflows/test.yml ================================================ name: Tests on: [push, pull_request] jobs: test: strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] runs-on: ubuntu-latest container: # INFO: python 2 is no longer supported in # actions/setup-python, use python docker image instead image: python:${{ matrix.python-version }} steps: - uses: actions/checkout@v3 - name: Install requirements (py ${{ matrix.python-version }}) run: | pip install -e ".[testing]" - name: Run all tests (py ${{ matrix.python-version }}) run: python -m unittest discover ================================================ FILE: .gitignore ================================================ *.pyc MANIFEST build/ dist/ ckanapi.egg-info/ ================================================ FILE: CHANGELOG.md ================================================ # ckanapi Changelog ## v4.11 - 2026-03-20 * Fix Reference Assignment in Dump Things [#227](https://github.com/ckan/ckanapi/pull/227) ## v4.10 - 2026-03-13 * Fix Log File Not Working (Requires Bytes Mode) [#224](https://github.com/ckan/ckanapi/pull/224) * Python 3.14 support, Drop Python 2 support, cleanup [#225](https://github.com/ckan/ckanapi/pull/225) * Arguments for Dataset Dump Include Private and States [#223](https://github.com/ckan/ckanapi/pull/223) * Allow to define a timeout to all requests [#226](https://github.com/ckan/ckanapi/pull/226) ================================================ FILE: COPYING ================================================ ckanapi - Terms and Conditions of Use Unless otherwise noted, computer program source code of ckanapi is covered under Crown Copyright, Government of Canada, and is distributed under the MIT License. MIT License Copyright (c) Her Majesty the Queen in Right of Canada, represented by the President of the Treasury Board, 2013-2018 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: COPYING.fr ================================================ ckanapi - Conditions régissant l'utilisation Sauf indication contraire, le code source de la ckanapi est protégé par le droit d'auteur de la Couronne du gouvernement du Canada et distribué sous la licence MIT. Licence MIT (c) Droit d'auteur – Sa Majesté la Reine du chef du Canada, représentée par le président du Conseil du Trésor, 2013-2018 La présente autorise toute personne d'obtenir gratuitement une copie du présent logiciel et des documents connexes (le « logiciel »), de traiter le logiciel sans restriction, y compris, mais sans s'y limiter, les droits d'utiliser, de copier, de modifier, de fusionner, de publier, de distribuer, d'accorder une sous licence et de vendre des copies dudit logiciel, et de permettre aux personnes auxquelles le logiciel est fourni de le faire, selon les conditions suivantes : L'avis de droit d'auteur ci dessus et le présent avis de permission seront inclus dans toutes les copies et les sections importantes du logiciel. LE LOGICIEL EST FOURNI « TEL QUEL », SANS AUCUNE GARANTIE, EXPRESSE OU IMPLICITE, Y COMPRIS, MAIS SANS S'Y LIMITER, LA GARANTIE DE QUALITÉ MARCHANDE, L'ADAPTATION À UN USAGE PARTICULIER ET L'ABSENCE DE CONTREFAÇON. EN AUCUN CAS LES AUTEURS OU LES DÉTENTEURS DU DROIT D'AUTEUR NE SERONT TENUS RESPONSABLES DE TOUTE DEMANDE, DOMMAGE OU BRIS DE CONTRAT, DÉLIT CIVIL OU TOUT AUTRE MANQUEMENT LIÉ AU LOGICIEL, À SON UTILISATION OU À D'AUTRES ÉCHANGES LIÉS AU LOGICIEL. ================================================ FILE: README.md ================================================ ## ckanapi A command line interface and Python module for accessing the [CKAN Action API](http://docs.ckan.org/en/latest/api/index.html#action-api-reference) - [Installation](https://github.com/ckan/ckanapi/blob/master/README.md#installation) - [ckanapi CLI](https://github.com/ckan/ckanapi/blob/master/README.md#ckanapi-cli) - [Actions](https://github.com/ckan/ckanapi/blob/master/README.md#actions) - [Action Arguments](https://github.com/ckan/ckanapi/blob/master/README.md#action-arguments) - [Bulk Dumping and Loading](https://github.com/ckan/ckanapi/blob/master/README.md#bulk-dumping-and-loading) - [Bulk Delete](https://github.com/ckan/ckanapi/blob/master/README.md#bulk-delete) - [Bulk Dataset and Resource Export](https://github.com/ckan/ckanapi/blob/master/README.md#bulk-dataset-and-resource-export---datapackagejson-format) - [Batch Actions](https://github.com/ckan/ckanapi/blob/master/README.md#batch-actions) - [Shell Pipelines](https://github.com/ckan/ckanapi/blob/master/README.md#shell-pipelines) - [ckanapi Python Module](https://github.com/ckan/ckanapi/blob/master/README.md#ckanapi-python-module) - [RemoteCKAN](https://github.com/ckan/ckanapi/blob/master/README.md#remoteckan) - [Exceptions](https://github.com/ckan/ckanapi/blob/master/README.md#exceptions) - [File Uploads](https://github.com/ckan/ckanapi/blob/master/README.md#file-uploads) - [Session Control](https://github.com/ckan/ckanapi/blob/master/README.md#session-control) - [LocalCKAN](https://github.com/ckan/ckanapi/blob/master/README.md#localckan) - [TestAppCKAN](https://github.com/ckan/ckanapi/blob/master/README.md#testappckan) - [Tests](https://github.com/ckan/ckanapi/blob/master/README.md#tests) - [License](https://github.com/ckan/ckanapi/blob/master/README.md#license) ## Installation Installation with pip: ``` pip install ckanapi ``` Installation with conda: ``` conda install -c conda-forge ckanapi ``` ## ckanapi CLI The ckanapi command line interface lets you access local and remote CKAN instances for bulk operations and simple API actions. ### Actions Simple actions with string parameters may be called directly. The response is pretty-printed to STDOUT. #### 🔧 List names of groups on a remote CKAN site ``` $ ckanapi action group_list -r https://demo.ckan.org --insecure [ "data-explorer", "example-group", "geo-examples", ... ] ``` Use -r to specify the remote CKAN instance, and -a to provide an API KEY. Remote actions connect as an anonymous user by default. For this example, we use --insecure as the CKAN demo uses a self-signed certificate. Local CKAN actions may be run by specifying the config file with -c. If no remote server or config file is specified, the CLI will look for a ckan.ini file in the current directory, much like `ckan` commands. Local CKAN actions are performed by the site user (default system administrator) when -u is not specified. To perform local actions with a less privileged user use the -u option with a user name or a name that doesn't exist. This is useful if you don't want things like deleted datasets or private information to be returned. Note that all actions in the [CKAN Action API](http://docs.ckan.org/en/latest/api/index.html#action-api-reference) and actions added by CKAN plugins are supported. ### Action Arguments Simple action arguments may be passed in KEY=STRING form for string values or in KEY:JSON form for JSON values. #### 🔧 View a dataset using a KEY=STRING parameter ``` $ ckanapi action package_show id=my-dataset-name { "name": "my-dataset-name", ... } ``` #### 🔧 Get detailed info about a resource in the datastore ``` $ ckanapi action datastore_info id=my-resource-id-or-alias { "meta": { "aliases": [ "test_alias" ], "count": 1000, ... } ``` #### 🔧 Get the number of datasets for each organization using KEY:JSON parameters ``` $ ckanapi action package_search facet.field:'["organization"]' rows:0 { "facets": { "organization": { "org1": 42, "org2": 21, ... } }, ... } ``` #### 🔧 Create a resource with a file attached Files may be passed for upload using the KEY@FILE form. ``` $ ckanapi action resource_create package_id=my-dataset-with-files \ upload@/path/to/file/to/upload.csv ``` #### 🔧 Edit a dataset with a text editor ``` $ ckanapi action package_show id=my-dataset-id > my-dataset.json $ nano my-dataset.json $ ckanapi action package_update -I my-dataset.json $ rm my-dataset.json ``` #### 🔧 Update a single resource field ``` $ ckanapi action resource_patch id=my-resource-id size:42000000 ``` ### Bulk Dumping and Loading Datasets, groups, organizations, users and related items may be dumped to [JSON lines](http://jsonlines.org) text files and created or updated from JSON lines text files. `dump` and `load` jobs can be run in parallel with multiple worker processes using the `-p` parameter. The jobs in progress, the rate of job completion and any individual errors are shown on STDERR while the jobs run. There are no parallel limits when running against a CKAN on localhost. When running against a remote site, there's a default limit of 3 worker processes. The environment variables `CKANAPI_MY_SITES` and`CKANAPI_PARALLEL_LIMIT` can be used to adjust these limits. `CKANAPI_MY_SITES` (comma-delimited list of CKAN urls) will not have the `PARALLEL_LIMIT` applied. `dump` and `load` jobs may be resumed from the last completed record or split across multiple servers by specifying record start and max values. #### 🔧 Dump datasets from CKAN into a local file with 4 processes ``` $ ckanapi dump datasets --all -O datasets.jsonl.gz -z -p 4 -r http://localhost ``` #### 🔧 Export datasets including private ones using search ``` $ ckanapi search datasets include_private=true -O datasets.jsonl.gz -z \ -c /etc/ckan/production.ini ``` `search` is faster than `dump` because it calls `package_search` to retrieve many records per call, paginating automatically. You may add parameters supported by `package_search` to filter the records returned. #### 🔧 Load/update datasets from a dataset JSON lines file with 3 processes ``` $ ckanapi load datasets -I datasets.jsonl.gz -z -p 3 -c /etc/ckan/production.ini ``` ### Bulk Delete Datasets, groups, organizations, users and related items may be deleted in bulk with the delete command. This command accepts ids or names on the command line or a number of different formats piped on standard input. #### 🔧 All datasets (JSON list of "id" or "name" values) ``` $ ckanapi action package_list -j | ckanapi delete datasets ``` #### 🔧 Selective delete (JSON object with "results" list containing "id" values) ``` $ ckanapi action package_search q=ponies | ckanapi delete datasets ``` #### 🔧 Processed JSON Lines (JSON objects with "id" or "name" value, one per line) ``` $ ckanapi dump groups --all > groups.jsonl $ grep ponies groups.jsonl | ckanapi delete groups ``` #### 🔧 Text list of "id" or "name" values (one per line) ``` $ cat users_to_remove.txt fred bill larry $ ckanapi delete users < users_to_remove.txt ``` ### Bulk Dataset and Resource Export - datapackage.json format Datasets may be exported to a simplified [datapackage.json format](http://dataprotocols.org/data-packages/) (which includes the actual resources, where available). If the resource url is not available, the resource will be included in the datapackage.json file but the actual resource data will not be downloaded. ``` $ ckanapi dump datasets --all --datapackages=./output_directory/ -r http://sourceckan.example.com ``` ### Batch Actions Run a set of actions from a JSON lines file. For local actions this is much faster than running `ckanapi action ...` in a shell loop because the local start-up time only happens once. Batch actions can also be run in parallel with multiple processes and errors logged, just like the dump and load commands. #### 🔧 Update a dataset field across a number of datasets ``` $ cat update-emails.jsonl {"action":"package_patch","data":{"id":"dataset-1","maintainer_email":"new@example.com"}} {"action":"package_patch","data":{"id":"dataset-2","maintainer_email":"new@example.com"}} {"action":"package_patch","data":{"id":"dataset-3","maintainer_email":"new@example.com"}} $ ckanapi batch -I update-emails.jsonl ``` #### 🔧 Replace a set of uploaded files ``` $ cat upload-files.jsonl {"action":"resource_patch","data":{"id":"408e1b1d-d0ca-50ca-9ae6-aedcee37aaa9"},"files":{"upload":"data1.csv"}} {"action":"resource_patch","data":{"id":"c1eab17f-c2d0-536d-a3f6-41a3dfe6a2c3"},"files":{"upload":"data2.csv"}} {"action":"resource_patch","data":{"id":"8ed068c2-4d4c-5f20-90db-39d2d596ce1a"},"files":{"upload":"data3.csv"}} $ ckanapi batch -I upload-files.jsonl --local-files ``` The `"files"` values in the JSON lines file is ignored unless the `--local-files` parameter is passed. Paths in the JSON lines file reference files on the local filesystems relative to the current working directory. ### Shell pipelines Simple shell pipelines are possible with the CLI. #### 🔧 Copy the name of a dataset to its title with 'jq' ``` $ ckanapi action package_show id=my-dataset \ | jq '.+{"title":.name}' \ | ckanapi action package_update -i ``` #### 🔧 Mirror all datasets from one CKAN instance to another ``` $ ckanapi dump datasets --all -q -r http://sourceckan.example.com \ | ckanapi load datasets ``` ## ckanapi Python Module The ckanapi Python module may be used from within a [CKAN extension](http://docs.ckan.org/en/latest/extensions/index.html) or in a Python 2 or Python 3 application separate from CKAN. ### RemoteCKAN Making a request: ```python from ckanapi import RemoteCKAN ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' demo = RemoteCKAN('https://demo.ckan.org', user_agent=ua) groups = demo.action.group_list(id='data-explorer') print(groups) ``` result: ``` [u'data-explorer', u'example-group', u'geo-examples', u'skeenawild'] ``` The example above is using an "action shortcut". The `.action` object detects the method name used ("group_list" above) and converts it to a normal `call_action` call. This is equivalent code without using an action shortcut: ```python groups = demo.call_action('group_list', {'id': 'data-explorer'}) ``` Once again, all actions in the [CKAN Action API](http://docs.ckan.org/en/latest/api/index.html#action-api-reference) and actions added by CKAN plugins are supported by action shortcuts and `call_action` calls. For example, if the [Showcase](https://github.com/ckan/ckanext-showcase#api) extension is installed: ```python from ckanapi import RemoteCKAN ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' demo = RemoteCKAN('https://demo.ckan.org', user_agent=ua) showcases= demo.action.ckanext_showcase_list() print(showcases) ``` Combining query parameters clauses is possible as in the following `package_search` action. This query combines three clauses that are all satisfied by the single [example dataset](https://demo.ckan.org/dataset/sample-dataset-1) in the Demo CKAN site. More detailed complex query syntax examples can be found in the [SOLR documentation](https://solr.apache.org/guide/6_6/common-query-parameters.html). ```python from ckanapi import RemoteCKAN ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' demo = RemoteCKAN('https://demo.ckan.org', user_agent=ua) packages = demo.action.package_search(q='+organization:sample-organization +res_format:GeoJSON +tags:geojson') print(packages) ``` Many CKAN API functions can only be used by authenticated users. Use the `apikey` parameter to supply your CKAN API key to `RemoteCKAN`: demo = RemoteCKAN('https://demo.ckan.org', apikey='MY-SECRET-API-KEY') An example of updating a single field in an existing dataset can be seen in the [Examples directory](examples/update_single_field.py) ### Exceptions * `NotAuthorized` - user unauthorized or accessing a deleted item * `NotFound` - name/id not found * `ValidationError` - field errors listed in `.error_dict` * `SearchQueryError` - error reported from SOLR index * `SearchError` * `CKANAPIError` - incorrect use of ckanapi or unable to parse response * `ServerIncompatibleError` - the remote API is not a CKAN API When using an action shortcut or the `call_action` method failures are raised as exceptions just like when calling `get_action` from a CKAN plugin: ```python from ckanapi import RemoteCKAN, NotAuthorized ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' demo = RemoteCKAN('https://demo.ckan.org', apikey='phony-key', user_agent=ua) try: pkg = demo.action.package_create(name='my-dataset', title='not going to work') except NotAuthorized: print('denied') ``` When it is possible to `import ckan` all the ckanapi exception classes are replaced with the CKAN exceptions with the same names. ### File Uploads File uploads for CKAN 2.2+ are supported by passing file-like objects to action shortcut methods: ```python from ckanapi import RemoteCKAN ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' mysite = RemoteCKAN('http://myckan.example.com', apikey='real-key', user_agent=ua) mysite.action.resource_create( package_id='my-dataset-with-files', url='dummy-value', # ignored but required by CKAN<2.6 upload=open('/path/to/file/to/upload.csv', 'rb')) ``` When using `call_action` you must pass file objects separately: ```python mysite.call_action('resource_create', {'package_id': 'my-dataset-with-files'}, files={'upload': open('/path/to/file/to/upload.csv', 'rb')}) ``` ### Session Control As of ckanapi 4.0 RemoteCKAN will keep your HTTP connection open using a [requests session](http://docs.python-requests.org/en/master/user/advanced/). For long-running scripts make sure to close your connections by using RemoteCKAN as a context manager: ```python from ckanapi import RemoteCKAN ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' with RemoteCKAN('https://demo.ckan.org', user_agent=ua) as demo: groups = demo.action.group_list(id='data-explorer') print(groups) ``` Or by explicitly calling `RemoteCKAN.close()`. ### LocalCKAN A similar class is provided for accessing local CKAN instances from a plugin in the same way as remote CKAN instances. Unlike [CKAN's get_action](http://docs.ckan.org/en/latest/extensions/plugins-toolkit.html?highlight=get_action#ckan.plugins.toolkit.get_action) LocalCKAN prevents data from one action call leaking into the next which can cause issues that are very hard do debug. This class defaults to using the site user with full access. ```python from ckanapi import LocalCKAN, ValidationError registry = LocalCKAN() try: registry.action.package_create(name='my-dataset', title='this will work fine') except ValidationError: print('unless my-dataset already exists') ``` For extra caution pass a blank username to LocalCKAN and only actions allowed by anonymous users will be permitted. ```python from ckanapi import LocalCKAN anon = LocalCKAN(username='') print(anon.action.status_show()) ``` #### Extra Loggging To enable extra info logging for the execution of LocalCKAN ckanapi commands, you can enable the config option in your CKAN INI file. ``` ckanapi.log_local = True ``` The output of the log will look like: ``` INFO [ckan.ckanapi] OS User executed LocalCKAN: ckanapi ``` ### TestAppCKAN A class is provided for making action requests to a [webtest.TestApp](http://webtest.readthedocs.org/en/latest/testapp.html) instance for use in CKAN tests: ```python from ckanapi import TestAppCKAN from webtest import TestApp test_app = TestApp(...) demo = TestAppCKAN(test_app, apikey='my-test-key') groups = demo.action.group_list(id='data-explorer') ``` ## Timeouts All requests performed to CKAN either via the CLI or the Python module can have a timeout defined. By defaults it is not set, but you can define a custom timeout value using environment variables or passing a value explicitly. To use environment variables: * `CKANAPI_REQUEST_TIMEOUT`: this is the connect timeout (the time waited to connect to the remote server) * `CKANAPI_REQUEST_READ_TIMEOUT`: this is the read timeout (the time waited to receive a response) If the read timeout is not defined, the connect timeout will be used. Please refer to the [requests library documentation](https://requests.readthedocs.io/en/latest/user/advanced/#timeouts) for more details. To pass a timeout on a `call_action` call use the `requests_kwargs` param: ``` ```python from ckanapi import RemoteCKAN demo = RemoteCKAN('https://demo.ckan.org') groups = demo.action.group_list(id='data-explorer', requests_kwargs={"timeout": 10}) ``` ## Tests To run the tests: python -m unittest discover ## License 🇨🇦 Government of Canada / Gouvernement du Canada The project files are covered under Crown Copyright, Government of Canada and is distributed under the MIT license. Please see [COPYING](COPYING) / [COPYING.fr](COPYING.fr) for full details. ================================================ FILE: ckanapi/__init__.py ================================================ """ ckanapi ------- This module a thin wrapper around the CKAN's action API. """ from ckanapi.errors import ( CKANAPIError, NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, SearchIndexError, ServerIncompatibleError, ) from ckanapi.localckan import LocalCKAN from ckanapi.remoteckan import RemoteCKAN from ckanapi.testappckan import TestAppCKAN ================================================ FILE: ckanapi/cli/__init__.py ================================================ ================================================ FILE: ckanapi/cli/action.py ================================================ """ implementation of the action cli command """ import sys import json from os.path import expanduser from ckanapi.cli.utils import compact_json, pretty_json from ckanapi.errors import CLIError def action(ckan, arguments, stdin=None): """ call an action with KEY=STRING, KEY:JSON or JSON args, yield the result """ if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) file_args = {} requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} if arguments['--input-json']: action_args = json.loads(stdin.read().decode('utf-8')) elif arguments['--input']: action_args = {} with open(expanduser(arguments['--input'])) as in_f: action_args = json.loads( in_f.read()) else: action_args = {} for kv in arguments['KEY=STRING']: if hasattr(kv, 'decode'): kv = kv.decode('utf-8') skey, p, svalue = kv.partition('=') jkey, p, jvalue = kv.partition(':') fkey, p, fvalue = kv.partition('@') if len(jkey) > len(skey) < len(fkey): action_args[skey] = svalue elif len(skey) > len(jkey) < len(fkey): try: value = json.loads(jvalue) except ValueError: raise CLIError("KEY:JSON argument %r has invalid JSON " "value %r" % (jkey, jvalue)) action_args[jkey] = value elif len(jkey) > len(fkey) < len(skey): try: f = open(expanduser(fvalue), 'rb') except IOError as e: raise CLIError("Error opening %r: %s" % (expanduser(fvalue), e.args[1])) file_args[fkey] = f else: raise CLIError("argument not in the form KEY=STRING, " "KEY:JSON or KEY@FILE %r" % kv) def call(): return ckan.call_action(arguments['ACTION_NAME'], action_args, files=file_args, requests_kwargs=requests_kwargs) if arguments['--profile']: from cProfile import Profile with Profile() as pr: result = call() pr.dump_stats(arguments['--profile']) else: result = call() if arguments['--output-jsonl']: if isinstance(result, list): for r in result: yield compact_json(r) + b'\n' else: yield compact_json(result) + b'\n' elif arguments['--output-json']: yield compact_json(result) + b'\n' else: yield pretty_json(result) + b'\n' ================================================ FILE: ckanapi/cli/batch.py ================================================ """ implementation of batch cli command """ import sys import gzip import json from datetime import datetime from ckanapi.errors import (NotFound, NotAuthorized, ValidationError, SearchIndexError) from ckanapi.cli import workers from ckanapi.cli.utils import completion_stats, compact_json, quiet_int_pipe def batch_actions(ckan, arguments, worker_pool=None, stdin=None, stdout=None, stderr=None): """ call actions from a jsonl file The parent process creates a pool of worker processes and hands out json lines to each worker as they finish a task. Status of last record completed and records being processed is displayed on stderr. """ if worker_pool is None: worker_pool = workers.worker_pool if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) if stderr is None: stderr = getattr(sys.stderr, 'buffer', sys.stderr) if arguments['--worker']: return batch_actions_worker(ckan, arguments) log = None if arguments['--log']: log = open(arguments['--log'], 'ab') jsonl_input = stdin if arguments['--input']: jsonl_input = open(arguments['--input'], 'rb') if arguments['--gzip']: jsonl_input = gzip.GzipFile(fileobj=jsonl_input) def line_reader(): """ handle start-record and max-records options """ start_record = int(arguments['--start-record']) max_records = arguments['--max-records'] if max_records is not None: max_records = int(max_records) for num, line in enumerate(jsonl_input, 1): # records start from 1 if num < start_record: continue if max_records is not None and num >= start_record + max_records: break yield num, line cmd = _worker_command_line(arguments) processes = int(arguments['--processes']) if hasattr(ckan, 'parallel_limit'): # add your sites to CKANAPI_MY_SITES instead of removing processes = min(processes, ckan.parallel_limit) stats = completion_stats(processes) pool = worker_pool(cmd, processes, line_reader()) with quiet_int_pipe() as errors: for job_ids, finished, result in pool: if not result: # child exited with traceback return 1 timestamp, action, error, response = json.loads( result.decode('utf-8')) if not arguments['--quiet']: stderr.write(('%s %s %s %s %s %s\n' % ( finished, job_ids, next(stats), action, error, compact_json(response).decode('utf-8') if response else '' )).encode('utf-8')) if log: log.write(compact_json([ timestamp, finished, action, error, response, ]) + b'\n') log.flush() if 'pipe' in errors: return 1 if 'interrupt' in errors: return 2 def batch_actions_worker(ckan, arguments, stdin=None, stdout=None): """ a process that accepts lines of json on stdin which is parsed and passed to action calls. it produces lines of json which are the responses from each action call. """ if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) # hack so that pdb can be used in extension/ckan # code called by this worker try: sys.stdin = open('/dev/tty', 'rb') except IOError: pass if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) # hack so that "print debugging" can work in extension/ckan # code called by this worker sys.stdout = sys.stderr def reply(action, error, response): """ format messages to be sent back to parent process """ stdout.write(compact_json([ datetime.now().isoformat(), action, error, response]) + b'\n') stdout.flush() for line in iter(stdin.readline, b''): try: obj = json.loads(line.decode('utf-8')) except UnicodeDecodeError as e: obj = None reply('read', 'UnicodeDecodeError', str(e)) continue requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} if obj is not None: action = obj['action'] data = obj.get('data', {}) files = {} if arguments['--local-files']: try: for fkey, fvalue in obj.get('files', {}).items(): f = open(fvalue, 'rb') files[fkey] = f except IOError as e: reply('read', 'IOError', { 'parameter':fkey, 'file_name':fvalue, 'error':str(e.args[1]), }) continue try: r = ckan.call_action(action, data, files=files, requests_kwargs=requests_kwargs) except ValidationError as e: reply(action, 'ValidationError', e.error_dict) except SearchIndexError as e: reply(action, 'SearchIndexError', str(e)) except NotAuthorized as e: reply(action, 'NotAuthorized', str(e)) except NotFound: reply(action, 'NotFound', obj) else: reply(action, None, r) def _worker_command_line(arguments): """ Create a worker command line suitable for Popen with only the options the worker process requires """ def a(name): "options with values" return [name, arguments[name]] * (arguments[name] is not None) def b(name): "boolean options" return [name] * bool(arguments[name]) return ( ['ckanapi', 'batch', '--worker'] + a('--config') + a('--ckan-user') + a('--remote') + a('--apikey') + b('--local-files') + b('--insecure') ) ================================================ FILE: ckanapi/cli/ckan_click.py ================================================ import click @click.command( context_settings={'ignore_unknown_options': True}, short_help='Local API calls with ckanapi tool' ) @click.argument('args', nargs=-1, type=click.UNPROCESSED) @click.pass_context def api(context, args): from ckanapi.cli.main import main import sys sys.argv[1:] = args context.exit(main(running_with_ckan_command=True) or 0) ================================================ FILE: ckanapi/cli/delete.py ================================================ """ implementation of delete cli command """ import sys import gzip import json from datetime import datetime from itertools import chain import re from urllib.parse import urlparse from ckanapi.errors import (NotFound, NotAuthorized, ValidationError, SearchIndexError) from ckanapi.cli import workers from ckanapi.cli.utils import completion_stats, compact_json, quiet_int_pipe def delete_things(ckan, thing, arguments, worker_pool=None, stdin=None, stdout=None, stderr=None): """ delete datasets, groups, orgs, users etc, The parent process creates a pool of worker processes and hands out json lines to each worker as they finish a task. Status of last record completed and records being processed is displayed on stderr. """ if worker_pool is None: worker_pool = workers.worker_pool if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) if stderr is None: stderr = getattr(sys.stderr, 'buffer', sys.stderr) if arguments['--worker']: return delete_things_worker(ckan, thing, arguments) log = None if arguments['--log']: log = open(arguments['--log'], 'ab') jsonl_input = stdin if arguments['--input']: jsonl_input = open(arguments['--input'], 'rb') if arguments['--gzip']: jsonl_input = gzip.GzipFile(fileobj=jsonl_input) def name_reader(): """ handle start-record and max-records options and extract all ids or names from each line (e.g. package_search, package_show or package_list output) record numbers here correspond to names/ids extracted not lines """ start_record = int(arguments['--start-record']) max_records = arguments['--max-records'] if max_records is not None: max_records = int(max_records) for num, name in enumerate(chain.from_iterable( extract_ids_or_names(line) for line in jsonl_input), 1): if num < start_record: continue if max_records is not None and num >= start_record + max_records: break yield num, compact_json(name) cmd = _worker_command_line(thing, arguments) processes = int(arguments['--processes']) if hasattr(ckan, 'parallel_limit'): # add your sites to CKANAPI_MY_SITES instead of removing processes = min(processes, ckan.parallel_limit) stats = completion_stats(processes) if not arguments['ID_OR_NAME']: pool = worker_pool(cmd, processes, name_reader()) else: pool = worker_pool(cmd, processes, enumerate( (compact_json(n) + b'\n' for n in arguments['ID_OR_NAME']), 1)) with quiet_int_pipe() as errors: for job_ids, finished, result in pool: if not result: # child exited with traceback return 1 timestamp, error, response = json.loads( result.decode('utf-8')) if not arguments['--quiet']: stderr.write(('%s %s %s %s %s\n' % ( finished, job_ids, next(stats), error, compact_json(response).decode('utf-8') if response else '' )).encode('utf-8')) if log: log.write(compact_json([ timestamp, finished, error, response, ]) + b'\n') log.flush() if 'pipe' in errors: return 1 if 'interrupt' in errors: return 2 def extract_ids_or_names(line): """ Be generous in what we accept: line may contain 1. a JSON object with an "id" or "name" value (e.g. package_show result) 2. a JSON object with a "results" value containing a list of objects with "id" values (e.g. package_search result) 3. a JSON string id or name value 4. a JSON list of string id or name values (e.g. package_list) 5. a simple string id or name value Returns a list of ids or names found in line """ try: j = json.loads(line) except ValueError: return [line.strip()] # 5 if isinstance(j, list) and all( isinstance(e, str) for e in j): return j # 4 elif isinstance(j, str): return [j] # 3 elif isinstance(j, dict): if 'id' in j and isinstance(j['id'], str): return [j['id']] # 1 if 'name' in j and isinstance(j['name'], str): return [j['name']] # 1 again if 'results' in j and isinstance(j['results'], list): out = [] for r in j['results']: if (not isinstance(r, dict) or 'id' not in r or not isinstance(r['id'], str)): break out.append(r['id']) else: return out # 5 again (e.g. "true" or "null" or something stranger) return [line.strip()] def delete_things_worker(ckan, thing, arguments, stdin=None, stdout=None): """ a process that accepts lines of json on stdin which is parsed and passed to the {thing}_delete actions. it produces lines of json which are the responses from each action call. """ if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) # hack so that pdb can be used in extension/ckan # code called by this worker try: sys.stdin = open('/dev/tty', 'rb') except IOError: pass if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) # hack so that "print debugging" can work in extension/ckan # code called by this worker sys.stdout = sys.stderr thing_delete = { 'datasets': 'package_delete', 'groups': 'group_delete', 'organizations': 'organization_delete', 'users': 'user_delete', 'related': 'related_delete', }[thing] def reply(error, response): """ format messages to be sent back to parent process """ stdout.write(compact_json([ datetime.now().isoformat(), error, response]) + b'\n') stdout.flush() for line in iter(stdin.readline, b''): try: name = json.loads(line.decode('utf-8')) except UnicodeDecodeError as e: reply('UnicodeDecodeError', str(e)) continue try: requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} ckan.call_action(thing_delete, {'id': name}, requests_kwargs=requests_kwargs) except NotAuthorized as e: reply('NotAuthorized', str(e)) except NotFound: reply('NotFound', name) else: reply(None, name) def _worker_command_line(thing, arguments): """ Create a worker command line suitable for Popen with only the options the worker process requires """ def a(name): "options with values" return [name, arguments[name]] * (arguments[name] is not None) return ( ['ckanapi', 'delete', thing, '--worker'] + a('--config') + a('--ckan-user') + a('--remote') + a('--apikey') ) ================================================ FILE: ckanapi/cli/dump.py ================================================ """ implementation of dump cli command """ import sys import gzip import json from datetime import datetime import os from ckanapi.errors import (CKANAPIError, NotFound, NotAuthorized, ValidationError, SearchIndexError) from ckanapi.cli import workers from ckanapi.cli.utils import completion_stats, compact_json, \ quiet_int_pipe from ckanapi.datapackage import create_datapackage, \ populate_datastore_res_fields def dump_things(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None): """ dump all datasets, groups, orgs or users accessible by the connected user The parent process creates a pool of worker processes and hands out ids to each worker. Status of last record completed and records being processed is displayed on stderr. """ if worker_pool is None: worker_pool = workers.worker_pool if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) if stderr is None: stderr = getattr(sys.stderr, 'buffer', sys.stderr) if arguments['--worker']: return dump_things_worker(ckan, thing, arguments) log = None if arguments['--log']: log = open(arguments['--log'], 'ab') jsonl_output = stdout if arguments['--datapackages']: # TODO: do we want to just divert this to devnull? jsonl_output = open(os.devnull, 'wb') if arguments['--output']: jsonl_output = open(arguments['--output'], 'wb') if arguments['--gzip']: jsonl_output = gzip.GzipFile(fileobj=jsonl_output) if arguments['--all']: params = None get_thing_list = { 'datasets': 'package_list', 'groups': 'group_list', 'organizations': 'organization_list', 'users': 'user_list', 'related' :'related_list', }[thing] if get_thing_list == "user_list": params = dict( all_fields=False ) elif get_thing_list == "package_list": params = dict( include_private=arguments['--include-private'] if '--include-private' in arguments else False, include_drafts=arguments['--include-drafts'] if '--include-drafts' in arguments else False, include_deleted=arguments['--include-deleted'] if '--include-deleted' in arguments else False, ) names = ckan.call_action(get_thing_list, params) else: names = arguments['ID_OR_NAME'] if names and isinstance(names[0], dict): names = [rec.get('name',rec.get('id')) for rec in names] if arguments['--datapackages']: arguments['--datastore-fields'] = True cmd = _worker_command_line(thing, arguments) processes = int(arguments['--processes']) if hasattr(ckan, 'parallel_limit'): # add your sites to CKANAPI_MY_SITES instead of removing processes = min(processes, ckan.parallel_limit) stats = completion_stats(processes) pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names)) results = {} expecting_number = 0 with quiet_int_pipe() as errors: for job_ids, finished, result in pool: if not result: # child exited with traceback return 1 timestamp, error, record = json.loads(result.decode('utf-8')) results[finished] = record if not arguments['--quiet']: stderr.write('{0} {1} {2} {3} {4}\n'.format( finished, job_ids, next(stats), error, record.get('name', '') if record else '', ).encode('utf-8')) if log: log.write(compact_json([ timestamp, finished, error, record.get('name', '') if record else None, ]) + b'\n') datapackages_path = arguments['--datapackages'] apikey = arguments['--apikey'] if datapackages_path: create_datapackage(record, datapackages_path, stderr, apikey) # keep the output in the same order as names while expecting_number in results: record = results.pop(expecting_number) if record: # sort keys so we can diff output jsonl_output.write(compact_json(record, sort_keys=True) + b'\n') expecting_number += 1 if jsonl_output != stdout: jsonl_output.close() if 'pipe' in errors: return 1 if 'interrupt' in errors: return 2 def dump_things_worker(ckan, thing, arguments, stdin=None, stdout=None): """ a process that accepts names on stdin which are passed to the {thing}_show actions. it produces lines of json which are the responses from each action call. """ if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) # hack so that pdb can be used in extension/ckan # code called by this worker try: sys.stdin = open('/dev/tty', 'rb') except IOError: pass if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) # hack so that "print debugging" can work in extension/ckan # code called by this worker sys.stdout = sys.stderr thing_show = { 'datasets': 'package_show', 'groups': 'group_show', 'organizations': 'organization_show', 'users': 'user_show', 'related':'related_show' }[thing] def reply(error, record=None): """ format messages to be sent back to parent process """ stdout.write(compact_json([ datetime.now().isoformat(), error, record]) + b'\n') stdout.flush() for line in iter(stdin.readline, b''): try: name = json.loads(line.decode('utf-8')) except UnicodeDecodeError as e: reply('UnicodeDecodeError') continue try: requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} include_users = False if '--include-users' in arguments \ and arguments['--include-users']: include_users = True obj = ckan.call_action(thing_show, {'id': name, 'include_datasets': False, 'include_password_hash': True, 'include_users': include_users, }, requests_kwargs=requests_kwargs) except NotFound: reply('NotFound') except NotAuthorized: reply('NotAuthorized') else: if thing == 'datasets' and arguments['--datastore-fields']: for res in obj.get('resources', []): populate_datastore_res_fields(ckan, res) if thing == 'datasets' and arguments['--resource-views']: for res in obj.get('resources', []): populate_res_views(ckan, res) reply(None, obj) def _worker_command_line(thing, arguments): """ Create a worker command line suitable for Popen with only the options the worker process requires """ def a(name): "options with values" return [name, arguments[name]] * (arguments[name] is not None) def b(name): "boolean options" return [name] * bool(arguments[name]) return ( ['ckanapi', 'dump', thing, '--worker'] + a('--config') + a('--ckan-user') + a('--remote') + a('--apikey') + b('--get-request') + b('--datastore-fields') + b('--resource-views') + b('--include-users') + ['value-here-to-make-docopt-happy'] ) def populate_res_views(ckan, res): """ update resource dict in-place with resource_view_list values in every resource with views using ckan LocalCKAN/RemoteCKAN instance """ try: views = ckan.call_action('resource_view_list', { 'id': res['id'], 'limit':0}) except CKANAPIError: return except NotFound: return # with localckan we'll get the real CKAN exception not a CKANAPIError subclass if not views: return # return if the resource views list is empty res['resource_views'] = views ================================================ FILE: ckanapi/cli/load.py ================================================ """ implementation of load cli command """ import sys import gzip import json import requests from datetime import datetime import re from urllib.parse import urlparse from ckanapi.common import REQUEST_TIMEOUT from ckanapi.errors import (NotFound, NotAuthorized, ValidationError, SearchIndexError) from ckanapi.cli import workers from ckanapi.cli.utils import completion_stats, compact_json, quiet_int_pipe def load_things(ckan, thing, arguments, worker_pool=None, stdin=None, stdout=None, stderr=None): """ create and update datasets, groups, orgs and users The parent process creates a pool of worker processes and hands out json lines to each worker as they finish a task. Status of last record completed and records being processed is displayed on stderr. """ if worker_pool is None: worker_pool = workers.worker_pool if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) if stderr is None: stderr = getattr(sys.stderr, 'buffer', sys.stderr) if arguments['--worker']: return load_things_worker(ckan, thing, arguments) log = None if arguments['--log']: log = open(arguments['--log'], 'ab') jsonl_input = stdin if arguments['--input']: jsonl_input = open(arguments['--input'], 'rb') if arguments['--gzip']: jsonl_input = gzip.GzipFile(fileobj=jsonl_input) def line_reader(): """ handle start-record and max-records options """ start_record = int(arguments['--start-record']) max_records = arguments['--max-records'] if max_records is not None: max_records = int(max_records) for num, line in enumerate(jsonl_input, 1): # records start from 1 if num < start_record: continue if max_records is not None and num >= start_record + max_records: break yield num, line cmd = _worker_command_line(thing, arguments) processes = int(arguments['--processes']) if hasattr(ckan, 'parallel_limit'): # add your sites to CKANAPI_MY_SITES instead of removing processes = min(processes, ckan.parallel_limit) stats = completion_stats(processes) pool = worker_pool(cmd, processes, line_reader()) failures = 0 with quiet_int_pipe() as errors: for job_ids, finished, result in pool: if not result: # child exited with traceback return 1 timestamp, action, error, response = json.loads( result.decode('utf-8')) if error: failures += 1 if not arguments['--quiet']: stderr.write(('%s %s %s %s %s %s\n' % ( finished, job_ids, next(stats), action, error, compact_json(response).decode('utf-8') if response else '' )).encode('utf-8')) if log: log.write(compact_json([ timestamp, finished, action, error, response, ]) + b'\n') log.flush() if 'pipe' in errors: return 1 if 'interrupt' in errors: return 2 if failures: return 3 def load_things_worker(ckan, thing, arguments, stdin=None, stdout=None): """ a process that accepts lines of json on stdin which is parsed and passed to the {thing}_create/update actions. it produces lines of json which are the responses from each action call. """ if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) # hack so that pdb can be used in extension/ckan # code called by this worker try: sys.stdin = open('/dev/tty', 'rb') except IOError: pass if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) # hack so that "print debugging" can work in extension/ckan # code called by this worker sys.stdout = sys.stderr thing_show, thing_create, thing_update = { 'datasets': ( 'package_show', 'package_create', 'package_update'), 'groups': ( 'group_show', 'group_create', 'group_update'), 'organizations': ( 'organization_show', 'organization_create', 'organization_update'), 'users': ( 'user_show', 'user_create', 'user_update'), 'related':( 'related_show','related_create','related_update'), }[thing] def reply(action, error, response): """ format messages to be sent back to parent process """ stdout.write(compact_json([ datetime.now().isoformat(), action, error, response]) + b'\n') stdout.flush() for line in iter(stdin.readline, b''): try: obj = json.loads(line.decode('utf-8')) except UnicodeDecodeError as e: obj = None reply('read', 'UnicodeDecodeError', str(e)) continue requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} if obj is not None: existing = None if not arguments['--create-only']: # use either id or name to locate existing records name = obj.get('id') if name: try: existing = ckan.call_action(thing_show, {'id': name, 'include_datasets': False, 'include_password_hash': True, 'include_users': True, }, requests_kwargs=requests_kwargs) except NotFound: pass except NotAuthorized as e: reply('show', 'NotAuthorized', str(e)) continue name = obj.get('name') if not existing and name: try: existing = ckan.call_action(thing_show, {'id': name}, requests_kwargs=requests_kwargs) except NotFound: pass except NotAuthorized as e: reply('show', 'NotAuthorized', str(e)) continue if existing: _copy_from_existing_for_update(obj, existing, thing) # FIXME: compare and reply when 'unchanged'? if not existing and arguments['--update-only']: reply('show', 'NotFound', [obj.get('id'), obj.get('name')]) continue act = 'update' if existing else 'create' try: if existing: r = ckan.call_action(thing_update, obj, requests_kwargs=requests_kwargs) else: r = ckan.call_action(thing_create, obj) if thing == 'datasets' and 'resources' in obj:# check if it is needed to upload resources when creating/updating packages _upload_resources(ckan,obj,arguments) elif thing in ['groups','organizations'] and 'image_display_url' in obj: #load images for groups and organizations if arguments['--upload-logo']: users = obj['users'] obj = _upload_logo(ckan,obj) obj.pop('image_upload') obj['users'] = users ckan.call_action(thing_update, obj, requests_kwargs=requests_kwargs) except ValidationError as e: reply(act, 'ValidationError', e.error_dict) except SearchIndexError as e: reply(act, 'SearchIndexError', str(e)) except NotAuthorized as e: reply(act, 'NotAuthorized', str(e)) except NotFound: reply(act, 'NotFound', obj) else: reply(act, None, r.get('name',r.get('id'))) def _worker_command_line(thing, arguments): """ Create a worker command line suitable for Popen with only the options the worker process requires """ def a(name): "options with values" return [name, arguments[name]] * (arguments[name] is not None) def b(name): "boolean options" return [name] * bool(arguments[name]) return ( ['ckanapi', 'load', thing, '--worker'] + a('--config') + a('--ckan-user') + a('--remote') + a('--apikey') + b('--create-only') + b('--update-only') + b('--upload-resources') + b('--upload-logo') ) def _copy_from_existing_for_update(obj, existing, thing): """ modifies obj dict in place, copying values from existing. the id is alwasys copied from existing to make sure update updates the correct object. users lists for groups and orgs are maintained if not present in obj """ if 'id' in existing: obj['id'] = existing['id'] if thing in ('organizations', 'groups'): if 'users' not in obj and 'users' in existing: obj['users'] = existing['users'] def _upload_resources(ckan,obj,arguments): resources = obj['resources'] if not arguments['--upload-resources']: return requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} for resource in resources: if resource.get('url_type') != 'upload': continue f = requests.get(resource['url'], stream=True, timeout=REQUEST_TIMEOUT) name = resource['url'].rsplit('/',1)[-1] ckan.call_action('resource_patch', {'id':resource['id']}, files={'upload':(name, f.raw)}, requests_kwargs=requests_kwargs) def _upload_logo(ckan,obj_orig): obj = obj_orig.copy() for key in obj_orig.keys(): if isinstance(obj[key],(dict,list)): obj.pop(key) #dict/list objects can't be encoded if urlparse(obj['image_url']).netloc: # logo is an external link obj['clear_upload'] = True obj['image_upload'] = obj['image_url'] else: f = requests.get(obj['image_display_url'], stream=True, timeout=REQUEST_TIMEOUT) name,ext = obj['image_url'].rsplit('.',1) #reformulate image_url for new site new_name = re.sub('[0-9.-]','',name) new_url = new_name+'.'+ext obj['image_upload'] = (new_url, f.raw) ckan.action.group_update(**obj) return obj ================================================ FILE: ckanapi/cli/main.py ================================================ """ckanapi command line inter face Usage: ckanapi action ACTION_NAME [(KEY=STRING | KEY:JSON | KEY@FILE ) ... | -i | -I JSON_INPUT] [-j | -J] [-P PROFILE ] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [-g] [--insecure]] ckanapi batch [-I JSONL_INPUT] [-s START] [-m MAX] [--local-files] [-p PROCESSES] [-l LOG_FILE] [-qwz] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [--insecure]] ckanapi delete (datasets | groups | organizations | users | related) (ID_OR_NAME ... | [-I JSONL_INPUT] [-s START] [-m MAX]) [-p PROCESSES] [-l LOG_FILE] [-qwz] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [--insecure]] ckanapi dump (datasets | groups | organizations | users | related) (ID_OR_NAME ... | --all) ([-O JSONL_OUTPUT] | [-D DIRECTORY]) [-p PROCESSES] [-dqwzRU --include-private --include-drafts --include-deleted] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [-g] [--insecure]] ckanapi load datasets [--upload-resources] [-I JSONL_INPUT] [-s START] [-m MAX] [-p PROCESSES] [-l LOG_FILE] [-n | -o] [-qwz] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [--insecure]] ckanapi load (groups | organizations) [--upload-logo] [-I JSONL_INPUT] [-s START] [-m MAX] [-p PROCESSES] [-l LOG_FILE] [-n | -o] [-qwzU] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [--insecure]] ckanapi load (users | related) [-I JSONL_INPUT] [-s START] [-m MAX] [-p PROCESSES] [-l LOG_FILE] [-n | -o] [-qwz] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [--insecure]] ckanapi search datasets [(KEY=STRING | KEY:JSON ) ... | -i | -I JSON_INPUT] [-O JSONL_OUTPUT] [-z] [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [-g] [--insecure]] ckanapi (-h | --help) ckanapi --version Options: -h --help show this screen --version show version -a --apikey=APIKEY API key to use for remote actions --all all the things -c --config=CONFIG CKAN configuration file for local actions, defaults to $CKAN_INI or development.ini -d --datastore-fields export datastore field information along with resource metadata as datastore_fields lists --include-private include private datasets in the dump --include-drafts include draft datasets in the dump --include-deleted include deleted datasets in the dump -D --datapackages=DIR download resources and output as datapackages in DIR instead of metadata-only json lines -g --get-request use GET instead of POST for API calls -i --input-json read json from stdin to send to action -I --input=INPUT input json/ json lines from file instead of stdin -j --output-json output plain json instead of pretty-printed json -J --output-jsonl output list responses as json lines instead of pretty-printed json --local-files allow batch instructions to reference local files for file uploads -l --log=LOG_FILE append messages generated to LOG_FILE -m --max-records=MAX exit after processing MAX records -n --create-only create new records, don't update existing records --insecure ignore verifying the SSL certificate for sites using https -o --update-only update existing records, don't create new records -O --output=JSONL_OUTPUT output to json lines file instead of stdout -p --processes=PROCESSES set the number of worker processes [default: 1] -P --profile=PROFILE run action with cProfile and output to PROFILE only local actions (no -r) will show internals -q --quiet don't display progress messages -r --remote=URL URL of CKAN server for remote actions -R --resource-views export resource views information along with resource metadata as resource_views lists -s --start-record=START start from record number START, where the first record is number 1 [default: 1] -u --ckan-user=USER perform actions as user with this name, uses the site sysadmin user when not specified -U --include-users include users of a group/organization --upload-logo upload logo image of a group/organization if the image is stored in the original server, otherwise its image url will be used --upload-resources upload resources of a dataset that were uploaded to server. Resources originally linked by external urls will keep the urls,will not be uploaded -w --worker launch worker process - used internally by load, dump, delete and batch commands -z --gzip read/write gzipped data """ import sys import os from docopt import docopt import subprocess from ckanapi.version import __version__ from ckanapi.remoteckan import RemoteCKAN from ckanapi.localckan import LocalCKAN from ckanapi.errors import CLIError from ckanapi.cli.load import load_things from ckanapi.cli.dump import dump_things from ckanapi.cli.delete import delete_things from ckanapi.cli.action import action from ckanapi.cli.search import search_datasets from ckanapi.cli.batch import batch_actions from logging import getLogger # explicit logger namespace for easy logging handlers log = getLogger('ckan.ckanapi') def parse_arguments(): # docopt is awesome return docopt(__doc__, version=__version__) def main(running_with_ckan_command=False): """ ckanapi command line entry point """ arguments = parse_arguments() if not running_with_ckan_command and not arguments['--remote']: return _switch_to_ckan_click(arguments) if arguments['--remote']: ckan = RemoteCKAN(arguments['--remote'], apikey=arguments['--apikey'], user_agent="ckanapi-cli/{version} (+{url})".format( version=__version__, url='https://github.com/open-data/ckanapi'), get_only=arguments['--get-request'], ) else: ckan = LocalCKAN(username=arguments['--ckan-user']) # log execution of LocalCKAN commands from ckan.plugins.toolkit import config, asbool if asbool(config.get('ckanapi.log_local')) and len(sys.argv) > 1: cmd = ['who', 'am', 'i'] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() if not out or err: # fallback to whoami if `who am i` is empty or errored cmd = ['whoami'] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() if not out or err: # cannot find user out = '' else: # decode and remove line breaks from whoami's out = out.decode().replace('\n', '').replace('\r', '') # split the `who am i` out = out.split()[0] log.info('OS User %s executed LocalCKAN: ckanapi %s', out, ' '.join(sys.argv[1:])) stdout = getattr(sys.stdout, 'buffer', sys.stdout) if arguments['action']: try: for r in action(ckan, arguments): stdout.write(r) return except CLIError as e: sys.stderr.write(e.args[0] + '\n') return 1 things = ['datasets', 'groups', 'organizations', 'users', 'related'] thing = [x for x in things if arguments[x]] if (arguments['load'] or arguments['dump'] or arguments['delete'] ) and arguments['--processes'] != '1' and os.name == 'nt': sys.stderr.write( "multiple worker processes are not supported on windows\n") arguments['--processes'] = '1' if arguments['load']: return load_things(ckan, thing[0], arguments) if arguments['dump']: return dump_things(ckan, thing[0], arguments) if arguments['delete']: return delete_things(ckan, thing[0], arguments) if arguments['search']: return search_datasets(ckan, arguments) if arguments['batch']: return batch_actions(ckan, arguments) assert 0, arguments # we shouldn't be here def _switch_to_ckan_click(arguments): """ Local commands must be run through ckan CLI to set up environment """ if arguments['--config']: # config needs to come before "api" for ckan click CLI sys.exit(os.execvp("ckan", ["ckan", "-c", arguments['--config'], "api"] + sys.argv[1:])) sys.exit(os.execvp("ckan", ["ckan", "api"] + sys.argv[1:])) ================================================ FILE: ckanapi/cli/search.py ================================================ """ implementation of the search datasets cli command """ import sys import gzip import json from os.path import expanduser from ckanapi.cli.utils import compact_json, pretty_json from ckanapi.errors import CLIError ROWS_PER_QUERY = 1000 # match hard limit in some versions of ckan def search_datasets(ckan, arguments, stdin=None, stdout=None, stderr=None): """ call package_search with KEY=STRING, KEY:JSON or JSON args, paginate over the results yield the result """ if stdin is None: stdin = getattr(sys.stdin, 'buffer', sys.stdin) if stdout is None: stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) if stderr is None: stderr = getattr(sys.stderr, 'buffer', sys.stderr) requests_kwargs = None if arguments['--insecure']: requests_kwargs = {'verify': False} if arguments['--input-json']: action_args = json.loads(stdin.read().decode('utf-8')) elif arguments['--input']: action_args = {} with open(expanduser(arguments['--input'])) as in_f: action_args = json.loads( in_f.read()) else: action_args = {} for kv in arguments['KEY=STRING']: if hasattr(kv, 'decode'): kv = kv.decode('utf-8') skey, p, svalue = kv.partition('=') jkey, p, jvalue = kv.partition(':') if len(jkey) > len(skey): action_args[skey] = svalue elif len(skey) > len(jkey): try: value = json.loads(jvalue) except ValueError: raise CLIError("KEY:JSON argument %r has invalid JSON " "value %r" % (jkey, jvalue)) action_args[jkey] = value else: raise CLIError("argument not in the form KEY=STRING, " "or KEY:JSON %r" % kv) jsonl_output = stdout if arguments['--output']: jsonl_output = open(arguments['--output'], 'wb') if arguments['--gzip']: jsonl_output = gzip.GzipFile(fileobj=jsonl_output) start = int(action_args.get('start', 0)) while True: args = action_args if 'rows' not in action_args: args = dict(action_args, start=start, rows=ROWS_PER_QUERY) result = ckan.call_action( 'package_search', args, requests_kwargs=requests_kwargs ) rows = result['results'] for r in rows: jsonl_output.write(compact_json(r, sort_keys=True) + b'\n') if not rows or 'rows' in action_args: break start += len(rows) if jsonl_output != stdout: jsonl_output.close() ================================================ FILE: ckanapi/cli/utils.py ================================================ """ useful bits of code not tied to ckanapi in any way """ import time import simplejson as json from contextlib import contextmanager def completion_stats(window=1): """ Generate completions/second reports on each iteration. window - window size for completion reports """ stamps = [] while True: stamps.append(time.time()) if len(stamps) < window + 1: yield '---' else: yield '%4.2fs' % ((stamps[-1] - stamps[0]) / window) stamps = stamps[-window:] @contextmanager def quiet_int_pipe(): """ let pipe errors and KeyboardIterrupt exceptions cause silent exit """ errors = [] try: yield errors except KeyboardInterrupt: errors.append('interrupt') except IOError as e: if e.errno != 32: raise errors.append('pipe') def compact_json(r, sort_keys=False): """ JSON as small as we can make it, with UTF-8 """ return json.dumps(r, ensure_ascii=False, separators=(',', ':'), sort_keys=sort_keys).encode('utf-8') def pretty_json(r): """ legible sorted JSON, with UTF-8 """ return json.dumps(r, ensure_ascii=False, separators=(',', ': '), indent=2, sort_keys=True).encode('utf-8') ================================================ FILE: ckanapi/cli/workers.py ================================================ import select import subprocess def worker_pool(popen_arg, num_workers, job_iterable, stop_when_jobs_done=True, stop_on_keyboard_interrupt=True, popen=None): """ Coroutine to manage a pool of workers that accept jobs as single lines of input on stdin and produces results as single lines of output. popen_arg - parameter to pass to subprocess.Popen when creating workers num_workers - maximum number of workers to create job_iterable - iterable producing (job id, job string) tuples where job string should include a single trailing newline stop_when_jobs_done - True: generator exits when all jobs are done stop_on_keyboard_interrupt - True: generator exits on KeyboardIterrupt accepted to send(): job iterable or None, when a new job iterable is sent it will replace the previous one used for assigning jobs to workers This generator blocks until there is a result from one of the workers. yields (currently processing job id list, finished job id, job result) tuples as jobs are completed, or (None, None, None) when no jobs remain to be completed and stop_when_jobs_done is False. currently processing job id list will include None if some workers are idle. job result will include trailing newline. when no jobs remain to be completed and stop_when_jobs_done is False a new job iterable must be sent to this generator with send(). """ if popen is None: popen = subprocess.Popen workers = [] job_ids = [] worker_fds = {} job_iter = iter(job_iterable) def start_job(worker=None): """ assign a job to exiting or newly created worker subprocess. returns (job_id, worker) or (None, None) when no more jobs """ job_id, job_str = next(job_iter, (None, None)) if job_str is None: return None, None job_str = job_str.rstrip(b'\n') + b'\n' if not worker: worker = popen( popen_arg, stdin=subprocess.PIPE, stdout=subprocess.PIPE, ) worker.stdin.write(job_str) worker.stdin.flush() return (job_id, worker) def assign_jobs(): """ start as many jobs as possible given maximum/idle workers and available jobs """ while None in job_ids: wnum = job_ids.index(None) job_ids[wnum], w = start_job(workers[wnum]) if w is None: return while len(workers) < num_workers: job_id, w = start_job() if w is None: return worker_fds[w.stdout] = len(workers) workers.append(w) job_ids.append(job_id) try: assign_jobs() while True: if all(i is None for i in job_ids): if stop_when_jobs_done: return new_jobs = yield (None, None, None) # require new jobs to be submitted job_iter = iter(new_jobs) assign_jobs() continue try: readable, _, _ = select.select(worker_fds, [], []) except select.error as e: if e.args[0] == 10038: # XXX: no many-worker support on windows yet readable = list(worker_fds)[:1] else: raise except KeyboardInterrupt: if stop_on_keyboard_interrupt: return raise fd = readable[0] wnum = worker_fds[fd] w = workers[wnum] result = w.stdout.readline() finished = job_ids[wnum] job_ids[wnum], _ = start_job(w) new_jobs = yield (job_ids, finished, result) if new_jobs: job_iter = iter(new_jobs) assign_jobs() finally: for w in workers: w.stdin.close() ================================================ FILE: ckanapi/common.py ================================================ """ Code shared by LocalCKAN, RemoteCKAN and TestCKAN """ import json import os from ckanapi.errors import (CKANAPIError, NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, SearchIndexError, ServerIncompatibleError) if request_connection_timeout := os.getenv("CKANAPI_REQUEST_TIMEOUT"): request_connection_timeout = int(request_connection_timeout) request_read_timeout= int(os.getenv("CKANAPI_REQUEST_READ_TIMEOUT", default=request_connection_timeout)) REQUEST_TIMEOUT = (request_connection_timeout, request_read_timeout) else: REQUEST_TIMEOUT = None class ActionShortcut(object): """ ActionShortcut(foo).bar(baz=2) <=> foo.call_action('bar', {'baz':2}) An instance of this class is used as the .action attribute of LocalCKAN and RemoteCKAN instances to provide a short way to call actions, e.g:: pkg = demo.action.package_show(id='adur_district_spending') instead of:: pkg = demo.call_action('package_show', {'id':'adur_district_spending'}) File-like values (objects with a 'read' attribute) are sent as file-uploads:: pkg = demo.action.resource_update(package_id='foo', upload=open(..)) becomes:: pkg = demo.call_action('resource_update', {'package_id': 'foo'}, files={'upload': open(..)}) """ def __init__(self, ckan): self._ckan = ckan def __getattr__(self, name): def action(**kwargs): files = {} for k, v in kwargs.items(): if is_file_like(v): files[k] = v if files: nonfiles = dict((k, v) for k, v in kwargs.items() if k not in files) return self._ckan.call_action(name, data_dict=nonfiles, files=files) return self._ckan.call_action(name, data_dict=kwargs) return action def is_file_like(v): """ Return True if this object is file-like or is a tuple in a format that the requests library would accept for uploading. """ # see http://docs.python-requests.org/en/latest/user/quickstart/#more-complicated-post-requests return hasattr(v, 'read') or ( isinstance(v, tuple) and len(v) >= 2 and hasattr(v[1], 'read')) def prepare_action(action, data_dict=None, apikey=None, files=None, base_url='api/action/'): """ Return action_url, data_json, http_headers """ if not data_dict: data_dict = {} headers = {} if files: # when uploading files all parameters must be strings and # no nesting is allowed because request is sent as multipart items = data_dict.items() data_dict = {} for (k, v) in items: if v is None: continue # assuming missing will work the same as None if isinstance(v, (int, float)): v = str(v) data_dict[k.encode('utf-8')] = v.encode('utf-8') else: data_dict = json.dumps(data_dict).encode('ascii') headers['Content-Type'] = 'application/json' if apikey: apikey = str(apikey) headers['X-CKAN-API-Key'] = apikey headers['Authorization'] = apikey url = base_url + action return url, data_dict, headers def reverse_apicontroller_action(url, status, response): """ Make an API call look like a direct action call by reversing the exception -> HTTP response translation that ApiController.action does """ try: parsed = json.loads(response) if parsed.get('success'): return parsed['result'] if hasattr(parsed, 'get'): err = parsed.get('error', {}) else: err = {} except (AttributeError, ValueError): err = {} if not isinstance(err, dict): # possibly a Socrata API. raise ServerIncompatibleError(repr([url, status, response])) etype = err.get('__type') emessage = err.get('message', '') if hasattr(emessage, 'split'): emessage = emessage.split(': ', 1)[-1] if etype == 'Search Query Error': # I refuse to eval(emessage), even if it would be more correct raise SearchQueryError(emessage) elif etype == 'Search Error': # I refuse to eval(emessage), even if it would be more correct raise SearchError(emessage) elif etype == 'Search Index Error': raise SearchIndexError(emessage) elif etype == 'Validation Error': raise ValidationError(err) elif etype == 'Not Found Error': raise NotFound(emessage) elif etype == 'Authorization Error': raise NotAuthorized(err) # don't recognize the error raise CKANAPIError(repr([url, status, response])) ================================================ FILE: ckanapi/datapackage.py ================================================ import os import requests import json import slugify from ckanapi.common import REQUEST_TIMEOUT from ckanapi.cli.utils import pretty_json from ckanapi.errors import CKANAPIError, NotFound DL_CHUNK_SIZE = 100 * 1024 DATAPACKAGE_TYPES = { # map datastore types to datapackage types 'text': 'string', 'numeric': 'number', 'timestamp': 'datetime', } def create_resource(resource, filename, datapackage_dir, stderr, apikey): '''Downloads the resource['url'] to disk. ''' path = os.path.join('data', filename) headers = {} headers['X-CKAN-API-Key'] = apikey headers['Authorization'] = apikey try: r = requests.get(resource['url'], headers=headers, stream=True, timeout=REQUEST_TIMEOUT) with open(os.path.join(datapackage_dir, path), 'wb') as f: for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE): if chunk: # filter out keep-alive new chunks f.write(chunk) return dict(resource, path=path) except requests.ConnectionError: stderr.write('URL {url} refused connection. The resource will not be downloaded\n'.format(url=resource['url'])) except requests.exceptions.RequestException as e: stderr.write(str(e.args[0]) if len(e.args) > 0 else '') stderr.write('\n') except Exception as e: stderr.write(str(e.args[0]) if len(e.args) > 0 else '') return resource def create_datapackage(record, base_path, stderr, apikey): # TODO: how are we going to handle which resources to # leave alone? They're very inconsistent in some instances # And I can't imagine anyone wants to download a copy # of, for example, the API base endpoint resource_formats_to_ignore = ['API', 'api'] dataset_name = record.get('name', '') datapackage_dir = os.path.join(base_path, dataset_name) os.makedirs(os.path.join(datapackage_dir, 'data')) # filter out some resources ckan_resources = [] for resource in record.get('resources', []): if resource['format'] in resource_formats_to_ignore: continue ckan_resources.append(resource) dataset = dict(record, resources=ckan_resources) # get the datapackage (metadata) datapackage = dataset_to_datapackage(dataset) for cres, dres in zip(ckan_resources, datapackage.get('resources', [])): filename = resource_filename(dres) # download the resource cres = \ create_resource(resource, filename, datapackage_dir, stderr, apikey) dres['path'] = 'data/' + filename populate_schema_from_datastore(cres, dres) json_path = os.path.join(datapackage_dir, 'datapackage.json') with open(json_path, 'wb') as out: out.write(pretty_json(datapackage)) return datapackage_dir, datapackage, json_path def resource_filename(dres): # prefer resource names from datapackage metadata, because those have been # made unique name = dres['name'] ext = slugify.slugify(dres['format']) if name.endswith(ext): name = name[:-len(ext)] return name + '.' + ext def populate_schema_from_datastore(cres, dres): """ populate the data schema in a datapackage resource, from the Datastore. This info must already be added to the cres using 'populate_datastore_res_fields' :param cres: CKAN resource dict :param dres: datapackage.json style resource dict, for the same resource as the cres """ # convert datastore data dictionary to datapackage schema if 'schema' not in dres and 'datastore_fields' in cres: fields = [] for f in cres['datastore_fields']: if f['id'] == '_id': continue df = {'name': f['id']} dtyp = DATAPACKAGE_TYPES.get(f['type']) if dtyp: df['type'] = dtyp dtit = f.get('info', {}).get('label', '') if dtit: df['title'] = dtit ddesc = f.get('info', {}).get('notes', '') if ddesc: df['description'] = ddesc fields.append(df) dres['schema'] = {'fields': fields} def populate_datastore_res_fields(ckan, res): """ update resource dict in-place with datastore_fields values in every resource with datastore active using ckan LocalCKAN/RemoteCKAN instance """ if not res.get('datastore_active', False): return try: ds = ckan.call_action('datastore_search', { 'resource_id': res['id'], 'limit':0}) except CKANAPIError: return except NotFound: return # with localckan we'll get the real CKAN exception not a CKANAPIError subclass res['datastore_fields'] = ds['fields'] # functions below are from https://github.com/frictionlessdata/ckan-datapackage-tools # commit c87e07d0d0 # we can't import and use until dependency issue is resolved: # https://github.com/frictionlessdata/ckan-datapackage-tools/issues/11 def _convert_to_datapackage_resource(resource_dict): '''Convert a CKAN resource dict into a Data Package resource dict. from https://github.com/frictionlessdata/ckan-datapackage-tools ''' resource = {} if resource_dict.get('url'): resource['path'] = resource_dict['url'] # TODO: DataStore only resources? if resource_dict.get('description'): resource['description'] = resource_dict['description'] if resource_dict.get('format'): resource['format'] = resource_dict['format'] if resource_dict.get('hash'): resource['hash'] = resource_dict['hash'] if resource_dict.get('name'): resource['name'] = slugify.slugify(resource_dict['name']).lower() resource['title'] = resource_dict['name'] else: resource['name'] = resource_dict['id'] schema = resource_dict.get('schema') if isinstance(schema, str): try: resource['schema'] = json.loads(schema) except ValueError: # Assume it's a path or URL resource['schema'] = schema elif isinstance(schema, dict): resource['schema'] = schema return resource def dataset_to_datapackage(dataset_dict): '''Convert the given CKAN dataset dict into a Data Package dict. :returns: the datapackage dict :rtype: dict ''' PARSERS = [ _rename_dict_key('title', 'title'), _rename_dict_key('version', 'version'), _parse_ckan_url, _parse_notes, _parse_license, _parse_author_and_source, _parse_maintainer, _parse_tags, _parse_extras, ] dp = { 'name': dataset_dict['name'] } for parser in PARSERS: dp.update(parser(dataset_dict)) resources = dataset_dict.get('resources') if resources: dp['resources'] = [_convert_to_datapackage_resource(r) for r in resources] # Ensure unique resource names names = {} for resource in dp.get('resources', []): if resource['name'] in names.keys(): old_resource_name = resource['name'] resource['name'] = resource['name'] + str(names[old_resource_name]) names[old_resource_name] += 1 else: names[resource['name']] = 0 return dp def _rename_dict_key(original_key, destination_key): def _parser(the_dict): result = {} if the_dict.get(original_key): result[destination_key] = the_dict[original_key] return result return _parser def _parse_ckan_url(dataset_dict): result = {} if dataset_dict.get('ckan_url'): result['homepage'] = dataset_dict['ckan_url'] return result def _parse_notes(dataset_dict): result = {} if dataset_dict.get('notes'): result['description'] = dataset_dict['notes'] return result def _parse_license(dataset_dict): result = {} license = {} if dataset_dict.get('license_id'): license['type'] = dataset_dict['license_id'] if dataset_dict.get('license_title'): license['title'] = dataset_dict['license_title'] if dataset_dict.get('license_url'): license['url'] = dataset_dict['license_url'] if license: result['license'] = license return result def _parse_author_and_source(dataset_dict): result = {} source = {} if dataset_dict.get('author'): source['name'] = dataset_dict['author'] if dataset_dict.get('author_email'): source['email'] = dataset_dict['author_email'] if dataset_dict.get('url'): source['web'] = dataset_dict['url'] if source: result['sources'] = [source] return result def _parse_maintainer(dataset_dict): result = {} author = {} if dataset_dict.get('maintainer'): author['name'] = dataset_dict['maintainer'] if dataset_dict.get('maintainer_email'): author['email'] = dataset_dict['maintainer_email'] if author: result['author'] = author return result def _parse_tags(dataset_dict): result = {} keywords = [tag['name'] for tag in dataset_dict.get('tags', [])] if keywords: result['keywords'] = keywords return result def _parse_extras(dataset_dict): result = {} extras = [[extra['key'], extra['value']] for extra in dataset_dict.get('extras', [])] for extra in extras: try: extra[1] = json.loads(extra[1]) except (ValueError, TypeError): pass if extras: result['extras'] = dict(extras) return result ================================================ FILE: ckanapi/errors.py ================================================ class ServerIncompatibleError(Exception): """ The error raised from RemoteCKAN.call_action when the API doesn't behave like a CKAN API. """ class CKANAPIError(Exception): """ The error raised from RemoteCKAN.call_action when no other error is recognized. If importing CKAN source fails then new versions of NotAuthorized, ValidationError, NotFound, SearchQueryError, SearchError and SearchIndexError are created as subclasses of this class so that they provide a helpful str() for tracebacks. """ def __init__(self, extra_msg=None): self.extra_msg = extra_msg def __str__(self): return str(self.extra_msg) class CLIError(Exception): pass try: from ckan.logic import (NotAuthorized, NotFound, ValidationError) from ckan.lib.search import (SearchQueryError, SearchError, SearchIndexError) except ImportError: # Implement the minimum to be compatible with existing errors # without requiring CKAN class NotAuthorized(CKANAPIError): pass class ValidationError(CKANAPIError): def __init__(self, error_dict): self.error_dict = error_dict def __str__(self): return repr(self.error_dict) class NotFound(CKANAPIError): def __init__(self, extra_msg=None): self.extra_msg = extra_msg def __str__(self): return self.extra_msg class SearchQueryError(CKANAPIError): pass class SearchError(CKANAPIError): pass class SearchIndexError(CKANAPIError): pass ================================================ FILE: ckanapi/localckan.py ================================================ from tempfile import TemporaryFile from ckanapi.errors import CKANAPIError from ckanapi.common import ActionShortcut COPY_CHUNK = 1024*1024 class LocalCKAN(object): """ An interface to calling actions with get_action() for CKAN plugins. :param username: perform action as this user, defaults to the site user and stored as self.username :param context: a default context dict to use when calling actions, stored as self.context with username added as its 'user' value """ def __init__(self, username=None, context=None): from ckan.logic import get_action self._get_action = get_action if username is None: username = self.get_site_username() self.username = username self.context = dict(context or [], user=self.username) self.action = ActionShortcut(self) def get_site_username(self): user = self._get_action('get_site_user')({'ignore_auth': True}, ()) return user['name'] def call_action(self, action, data_dict=None, context=None, apikey=None, files=None, requests_kwargs=None): """ :param action: the action name, e.g. 'package_create' :param data_dict: the dict to pass to the action, defaults to {} :param context: an override for the context to use for this action, remember to include a 'user' when necessary :param apikey: not supported :param files: None or {field-name: file-to-be-sent, ...} :param requests_kwargs: ignored for LocalCKAN (requests not used) """ # copy dicts because actions may modify the dicts they are passed # (CKAN...you so crazy) data_dict = dict(data_dict or []) context = dict(self.context if context is None else context) if apikey: # FIXME: allow use of apikey to set a user in context? raise CKANAPIError("LocalCKAN.call_action does not support " "use of apikey parameter, use context['user'] instead") to_close = [] try: for fieldname in files or []: f = files[fieldname] if isinstance(f, tuple): # requests accepts (filename, file...) tuples filename, f = f[:2] else: filename = f.name try: f.seek(0) except (AttributeError, IOError): f = _write_temp_file(f) to_close.append(f) from werkzeug.datastructures import FileStorage file_storage = FileStorage() file_storage.stream = f file_storage.filename = filename data_dict[fieldname] = file_storage return self._get_action(action)(context, data_dict) finally: for f in to_close: f.close() def _write_temp_file(f): """ Pull all data from stream f into a temporary file Caller must close file returned. """ out = TemporaryFile() while True: # FIXME: check for maximum size? chunk = f.read(COPY_CHUNK) if not chunk: break out.write(chunk) return out ================================================ FILE: ckanapi/remoteckan.py ================================================ from urllib.request import Request, urlopen, HTTPError from urllib.parse import urlparse from ckanapi.errors import CKANAPIError from ckanapi.common import (ActionShortcut, prepare_action, reverse_apicontroller_action, REQUEST_TIMEOUT) from ckanapi.version import __version__ import os # add your sites to remove parallel limits on ckanapi cli MY_SITES = ['localhost', '127.0.0.1', '[::1]'] CKANAPI_MY_SITES = os.getenv('CKANAPI_MY_SITES') if CKANAPI_MY_SITES: additional_sites = CKANAPI_MY_SITES.split() MY_SITES.extend(additional_sites) # add your site above instead of changing this PARALLEL_LIMIT = os.getenv('CKANAPI_PARALLEL_LIMIT', default = 3) import requests class RemoteCKAN(object): """ An interface to the the CKAN API actions on a remote CKAN instance. :param address: the web address of the CKAN instance, e.g. 'http://demo.ckan.org', stored as self.address :param apikey: the API key to pass as an 'X-CKAN-API-Key' header when actions are called, stored as self.apikey :param user_agent: the User-agent to report when making requests :param get_only: only use GET requests (default: False) :param session: session to use (default: None) """ base_url = 'api/action/' def __init__(self, address, apikey=None, user_agent=None, get_only=False, session=None): self.address = address self.apikey = apikey self.get_only = get_only self.session = session if not user_agent: user_agent = "ckanapi/{version} (+{url})".format( version=__version__, url='https://github.com/ckan/ckanapi') self.user_agent = user_agent self.action = ActionShortcut(self) net_loc = urlparse(address) if ']' in net_loc: net_loc = net_loc[:net_loc.index(']') + 1] elif ':' in net_loc: net_loc = net_loc[:net_loc.index(':')] if net_loc not in MY_SITES: # add your sites to MY_SITES above instead of removing this self.parallel_limit = PARALLEL_LIMIT def call_action(self, action, data_dict=None, context=None, apikey=None, files=None, requests_kwargs=None): """ :param action: the action name, e.g. 'package_create' :param data_dict: the dict to pass to the action as JSON, defaults to {} :param context: always set to None for RemoteCKAN :param apikey: API key for authentication :param files: None or {field-name: file-to-be-sent, ...} :param requests_kwargs: kwargs for requests get/post calls This function parses the response from the server as JSON and returns the decoded value. When an error is returned this function will convert it back to an exception that matches the one the action function itself raised. """ if context: raise CKANAPIError("RemoteCKAN.call_action does not support " "use of context parameter, use apikey instead") if files and self.get_only: raise CKANAPIError("RemoteCKAN: files may not be sent when " "get_only is True") url, data, headers = prepare_action( action, data_dict, apikey or self.apikey, files, base_url=self.base_url) headers['User-Agent'] = self.user_agent url = self.address.rstrip('/') + '/' + url requests_kwargs = requests_kwargs or {} requests_kwargs.setdefault("timeout", REQUEST_TIMEOUT) if not self.session: self.session = requests.Session() if self.get_only: status, response = self._request_fn_get(url, data_dict, headers, requests_kwargs) else: status, response = self._request_fn(url, data, headers, files, requests_kwargs) return reverse_apicontroller_action(url, status, response) def _request_fn(self, url, data, headers, files, requests_kwargs): r = self.session.post(url, data=data, headers=headers, files=files, allow_redirects=False, **requests_kwargs) # allow_redirects=False because: if a post is redirected (e.g. 301 due # to a http to https redirect), then the second request is made to the # new URL, but *without* the data. This gives a confusing "No request # body data" error. It is better to just return the 301 to the user, so # we disallow redirects. return r.status_code, r.text def _request_fn_get(self, url, data_dict, headers, requests_kwargs): r = self.session.get(url, params=data_dict, headers=headers, **requests_kwargs) return r.status_code, r.text def close(self): """Close session""" if self.session: self.session.close() self.session = None def __enter__(self): return self def __exit__(self, *args): self.close() ================================================ FILE: ckanapi/testappckan.py ================================================ import os.path from ckanapi.errors import CKANAPIError from ckanapi.common import (ActionShortcut, prepare_action, reverse_apicontroller_action) class TestAppCKAN(object): """ An interface to the the CKAN API actions on a paste TestApp :param test_app: the paste.fixture.TestApp instance, stored as self.test_app :param apikey: the API key to pass as an 'X-CKAN-API-Key' header when actions are called, stored as self.apikey """ def __init__(self, test_app, apikey=None): self.test_app = test_app self.apikey = apikey self.action = ActionShortcut(self) def call_action(self, action, data_dict=None, context=None, apikey=None, files=None): """ :param action: the action name, e.g. 'package_create' :param data_dict: the dict to pass to the action as JSON, defaults to {} :param context: not supported :param files: None or {field-name: file-to-be-sent, ...} This function parses the response from the server as JSON and returns the decoded value. When an error is returned this function will convert it back to an exception that matches the one the action function itself raised. """ if context: raise CKANAPIError("TestAppCKAN.call_action does not support " "use of context parameter, use apikey instead") url, data, headers = prepare_action(action, data_dict, apikey or self.apikey, files) kwargs = {} if files: # Convert the list of (fieldname, file_object) tuples into the # (fieldname, filename, file_contents) tuples that webtests needs. upload_files = [] for fieldname, file_ in files.items(): if hasattr(file_, 'name'): filename = os.path.split(file_.name)[1] else: filename = fieldname upload_files.append( (fieldname, filename, file_.read()) ) kwargs['upload_files'] = upload_files r = self.test_app.post('/' + url, params=data, headers=headers, expect_errors=True, **kwargs) return reverse_apicontroller_action(url, r.status, r.body) ================================================ FILE: ckanapi/tests/__init__.py ================================================ ================================================ FILE: ckanapi/tests/mock/mock_ckan.py ================================================ import json import csv from io import StringIO from werkzeug.formparser import parse_form_data from wsgiref.simple_server import make_server def mock_ckan(environ, start_response): status = '200 OK' headers = [ ('Content-type', 'application/json;charset=utf-8'), ] if environ['PATH_INFO'] == '/api/action/site_read': start_response(status, headers) return [json.dumps(True).encode('utf-8')] if environ['PATH_INFO'] == '/api/action/organization_list': start_response(status, headers) return [json.dumps({ "help": "none", "success": True, "result": ["aa", "bb", "cc"] }).encode('utf-8')] if environ['PATH_INFO'] == '/api/action/test_echo_user_agent': start_response(status, headers) return [json.dumps({ "help": "none", "success": True, "result": environ['HTTP_USER_AGENT'] }).encode('utf-8')] if environ['PATH_INFO'] == '/api/action/test_echo_content_type': start_response(status, headers) return [json.dumps({ "help": "none", "success": True, "result": environ['CONTENT_TYPE'] }).encode('utf-8')] if environ['PATH_INFO'] == '/api/action/test_upload': _, form, files = parse_form_data(environ) upload_data = files['upload'].stream.read().decode('utf-8').splitlines() csv_file = StringIO() writer = csv.writer(csv_file) for line_data in upload_data: row_data = line_data.split(',') writer.writerow(row_data) csv_file.seek(0) records = list(csv.reader(csv_file)) start_response(status, headers) return [json.dumps({ "help": "none", "success": True, "result": { 'option': form['option'], 'last_row': records[-1], }, }).encode('utf-8')] if environ['PATH_INFO'].startswith('/api/action/'): start_response(status, headers) return [json.dumps({ "help": "none", "success": False, "error": {'__type': 'Not Found Error'}, }).encode('utf-8')] start_response('404 Not Found', headers) return [] httpd = make_server('localhost', 8901, mock_ckan) httpd.serve_forever() ================================================ FILE: ckanapi/tests/test_call.py ================================================ import ckanapi import unittest class TestCallAction(unittest.TestCase): def test_local_fail(self): try: import ckan except ImportError: raise unittest.SkipTest('ckan not importable') self.assertRaises( ckanapi.CKANAPIError, ckanapi.LocalCKAN('fake').call_action, 'fake', {}, {}, 'apikey not allowed') def test_remote_fail(self): self.assertRaises( ckanapi.CKANAPIError, ckanapi.RemoteCKAN('fake').call_action, 'fake', {}, 'context not allowed') def test_test_fail(self): self.assertRaises( ckanapi.CKANAPIError, ckanapi.TestAppCKAN('fake').call_action, 'fake', {}, 'context not allowed') ================================================ FILE: ckanapi/tests/test_cli_action.py ================================================ from ckanapi.cli.action import action from ckanapi.errors import CLIError import unittest from io import BytesIO class MockCKAN(object): def __init__(self, expected_name, expected_args, response, expected_files=None): self._expected_name = expected_name self._expected_args = expected_args self._expected_files = expected_files or {} self._response = response def call_action(self, name, args, context=None, apikey=None, files=None, requests_kwargs=None): if name != self._expected_name: return ["wrong name", name, self._expected_name] if args != self._expected_args: return ["wrong args", args, self._expected_args] files = dict((f, v.name) for f,v in files.items()) if files != self._expected_files: return ["wrong files", files, self._expected_files] return self._response class TestCLIAction(unittest.TestCase): def test_pretty(self): ckan = MockCKAN('shake_it', {'who': 'me'}, {"oh": ["right", "on"]}) rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who=me'], '--output-json': False, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b""" { "oh": [ "right", "on" ] } """.lstrip()) def test_compact(self): ckan = MockCKAN('shake_it', {'who': 'me'}, ["right", "on"]) rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who=me'], '--output-json': True, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b'["right","on"]\n') def test_compact_fallback(self): ckan = MockCKAN('shake_it', {'who': 'me'}, {"oh": ["right", "on"]}) rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who=me'], '--output-json': False, '--output-jsonl': True, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b'{"oh":["right","on"]}\n') def test_jsonl(self): ckan = MockCKAN('shake_it', {'who': 'me'}, [99,98,97]) rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who=me'], '--output-json': False, '--output-jsonl': True, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b'99\n98\n97\n') def test_stdin_json(self): ckan = MockCKAN('shake_it', {'who': ['just', 'me']}, "yeah") rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who=me'], '--output-json': False, '--output-jsonl': False, '--input-json': True, '--input': None, '--insecure': False, '--profile': None, }, stdin=BytesIO(b'{"who":["just","me"]}'), ) self.assertEqual(b''.join(rval), b'"yeah"\n') def test_key_json(self): ckan = MockCKAN('shake_it', {'who': ['just', 'me']}, "yeah") rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who:["just", "me"]'], '--output-json': False, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b'"yeah"\n') def test_bad_arg(self): ckan = MockCKAN('shake_it', {'who': 'me'}, "yeah") rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who'], '--output-json': False, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertRaises(CLIError, list, rval) def test_bad_key_json(self): ckan = MockCKAN('shake_it', {'who': 'me'}, "yeah") rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who:me'], '--output-json': False, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertRaises(CLIError, list, rval) def test_key_string_or_json(self): ckan = MockCKAN('shake_it', {'who': 'me=you'}, "yeah") rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who:"me=you"'], '--output-json': False, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b'"yeah"\n') def test_key_json_or_string(self): ckan = MockCKAN('shake_it', {'who': 'me:you'}, "yeah") rval = action(ckan, { 'ACTION_NAME': 'shake_it', 'KEY=STRING': ['who=me:you'], '--output-json': False, '--output-jsonl': False, '--input-json': False, '--input': None, '--insecure': False, '--profile': None, }) self.assertEqual(b''.join(rval), b'"yeah"\n') ================================================ FILE: ckanapi/tests/test_cli_dump.py ================================================ from ckanapi.cli.dump import dump_things, dump_things_worker from ckanapi.errors import NotFound import json import tempfile import shutil from os.path import exists import unittest from io import BytesIO class MockCKAN(object): def call_action(self, name, data_dict, requests_kwargs=None): try: return { 'package_list': { None: ['12', '34', 'dp']}, 'package_show': { '12': { 'id': '12', 'name': 'twelve', 'title': "Twelve"}, '34': { 'id': '34', 'name': 'thirtyfour', 'title': "Thirty-four"}, 'dp': { 'id': 'dp', 'name': 'dp', 'title': 'Test for datapackage', 'resources': [ {'name': 'resource1', 'id': 'd902fafc-5717-4dd0-87f2-7a6fc96989b7', 'format': 'csv', 'datastore_active': True, 'url': 'https://google.com'}]}}, 'group_show': { 'ab': {'title': "ABBA"}}, 'organization_show': { 'cd': {'title': "Super Trouper"}}, 'datastore_search': { 'd902fafc-5717-4dd0-87f2-7a6fc96989b7': {'fields': [{ 'id': 'col1', 'type': 'text', 'info': { 'label': 'Column One', 'notes': 'Description One', }}]}}, 'resource_view_list': { 'd902fafc-5717-4dd0-87f2-7a6fc96989b7': [{ 'description': 'Test view', 'filterable': True, 'id': 'd902fafc-5717-4dd0-87f2-7a6fc96989d9', 'package_id': 'dp', 'resource_id': 'd902fafc-5717-4dd0-87f2-7a6fc96989b7', 'responsive': True, 'show_fields': ['_id']}]}, }[name][data_dict.get('id') or data_dict.get('resource_id')] except KeyError: raise NotFound() class TestCLIDump(unittest.TestCase): def setUp(self): self.ckan = MockCKAN() self.stdout = BytesIO() self.stderr = BytesIO() def test_worker_one(self): rval = dump_things_worker(self.ckan, 'datasets', {'--datastore-fields': False, '--resource-views': False, '--insecure': False}, stdin=BytesIO(b'"34"\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(error, None) self.assertEqual(data["title"], "Thirty-four") def test_worker_two(self): rval = dump_things_worker(self.ckan, 'datasets', {'--datastore-fields': False, '--resource-views': False, '--insecure': False}, stdin=BytesIO(b'"12"\n"34"\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response.count(b'\n'), 2, response) self.assertEqual(response[-1:], b'\n') r1, r2 = response.split(b'\n', 1) timstamp, error, data = json.loads(r1.decode('UTF-8')) self.assertEqual(error, None) self.assertEqual(data["title"], "Twelve") timstamp, error, data = json.loads(r2.decode('UTF-8')) self.assertEqual(error, None) self.assertEqual(data["title"], "Thirty-four") def test_worker_error(self): dump_things_worker(self.ckan, 'datasets', {'--insecure': False}, stdin=BytesIO(b'"99"\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(error, "NotFound") self.assertEqual(data, None) def test_worker_group(self): dump_things_worker(self.ckan, 'groups', {'--insecure': False}, stdin=BytesIO(b'"ab"\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(error, None) self.assertEqual(data, {"title":"ABBA"}) def test_worker_organization(self): dump_things_worker(self.ckan, 'organizations', {'--insecure': False}, stdin=BytesIO(b'"cd"\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(error, None) self.assertEqual(data, {"title":"Super Trouper"}) def test_parent_dump_all(self): dump_things(self.ckan, 'datasets', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': None, '--gzip': False, '--all': True, '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, }, worker_pool=self._mock_worker_pool, stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'dump', 'datasets', '--worker', 'value-here-to-make-docopt-happy']) self.assertEqual(self.worker_processes, 1) self.assertEqual(self.worker_jobs, [(0, b'"12"\n'), (1, b'"34"\n'), (2, b'"dp"\n')]) def test_parent_parallel_limit(self): self.ckan.parallel_limit = 2 dump_things(self.ckan, 'datasets', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': None, '--gzip': False, '--all': False, 'ID_OR_NAME': ['12'], '--processes': '5', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, }, worker_pool=self._mock_worker_pool, stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'dump', 'datasets', '--worker', 'value-here-to-make-docopt-happy']) self.assertEqual(self.worker_processes, 2) def test_parent_id_argument(self): dump_things(self.ckan, 'groups', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': None, '--gzip': False, '--all': False, 'ID_OR_NAME': ['ab'], '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, }, worker_pool=self._mock_worker_pool, stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'dump', 'groups', '--worker', 'value-here-to-make-docopt-happy']) self.assertEqual(self.worker_processes, 1) self.assertEqual(self.worker_jobs, [(0, b'"ab"\n')]) def test_parent_maintain_order(self): dump_things(self.ckan, 'organizations', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': None, '--gzip': False, '--all': False, 'ID_OR_NAME': ['P', 'Q', 'R', 'S'], '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, }, worker_pool=self._mock_worker_pool_reversed, stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'dump', 'organizations', '--worker', 'value-here-to-make-docopt-happy']) self.assertEqual(self.worker_processes, 1) self.assertEqual(self.stdout.getvalue(), b'{"id":"P"}\n' b'{"id":"Q"}\n' b'{"id":"R"}\n' b'{"id":"S"}\n') def test_parent_datapackages(self): target = tempfile.mkdtemp() try: dump_things(self.ckan, 'datasets', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': target, '--gzip': False, '--all': True, '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, }, worker_pool=self._worker_pool_with_data, stdout=self.stdout, stderr=self.stderr) assert exists(target + '/twelve/datapackage.json') assert exists(target + '/thirtyfour/datapackage.json') assert exists(target + '/dp/datapackage.json') assert exists(target + '/dp/data/resource1.csv') with open(target + '/dp/datapackage.json') as dpf: dp = json.load(dpf) self.assertEqual(dp, { 'name': 'dp', 'title': 'Test for datapackage', 'resources': [{ 'name': 'resource1', 'format': 'csv', 'path': 'data/resource1.csv', 'title': 'resource1', 'schema': { 'fields': [{ 'name': 'col1', 'title': 'Column One', 'description': 'Description One', 'type': 'string', }], } }] }) finally: shutil.rmtree(target) def test_resource_views(self): target = tempfile.mkdtemp() try: dump_things(self.ckan, 'datasets', { 'ID_OR_NAME': ['dp'], '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': target + '/dpf.jsonl', '--datapackages': None, '--gzip': False, '--all': False, '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': True, '--insecure': False, '--include-users': False, }, worker_pool=self._worker_pool_with_resource_views, stdout=self.stdout, stderr=self.stderr) assert exists(target + '/dpf.jsonl') with open(target + '/dpf.jsonl') as dpf: dp = json.load(dpf) self.assertEqual(dp, { 'id': 'dp', 'name': 'dp', 'title': 'Test for datapackage', 'resources': [{ 'name': 'resource1', 'format': 'csv', 'id': 'd902fafc-5717-4dd0-87f2-7a6fc96989b7', 'url': 'https://google.com', 'datastore_active': True, 'resource_views': [{ 'description': 'Test view', 'filterable': True, 'id': 'd902fafc-5717-4dd0-87f2-7a6fc96989d9', 'package_id': 'dp', 'resource_id': 'd902fafc-5717-4dd0-87f2-7a6fc96989b7', 'responsive': True, 'show_fields': ['_id'] }] }] }) finally: shutil.rmtree(target) def test_include_params_default(self): ckan = unittest.mock.MagicMock() ckan.parallel_limit = 1 dump_things(ckan, 'datasets', { '--all': True, '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': None, '--gzip': False, '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, }) action = ckan.method_calls[0].args[0] data_dict = ckan.method_calls[0].args[1] self.assertEqual(action, "package_list") self.assertEqual(data_dict["include_private"], False) self.assertEqual(data_dict["include_drafts"], False) self.assertEqual(data_dict["include_deleted"], False) def test_include_params_true(self): ckan = unittest.mock.MagicMock() ckan.parallel_limit = 1 dump_things(ckan, 'datasets', { '--all': True, '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--output': None, '--datapackages': None, '--gzip': False, '--processes': '1', '--get-request': False, '--datastore-fields': False, '--resource-views': False, '--insecure': False, '--include-users': False, '--include-private': True, '--include-drafts': True, '--include-deleted': True, }) action = ckan.method_calls[0].args[0] data_dict = ckan.method_calls[0].args[1] self.assertEqual(action, "package_list") self.assertEqual(data_dict["include_private"], True) self.assertEqual(data_dict["include_drafts"], True) self.assertEqual(data_dict["include_deleted"], True) def _mock_worker_pool(self, cmd, processes, job_iter): self.worker_cmd = cmd self.worker_processes = processes self.worker_jobs = list(job_iter) for i, j in self.worker_jobs: jname = json.loads(j.decode('UTF-8')) yield [[], i, json.dumps(['some-date', None, {'id': jname}] ).encode('UTF-8') + b'\n'] def _mock_worker_pool_reversed(self, cmd, processes, job_iter): return reversed(list( self._mock_worker_pool(cmd, processes, job_iter))) def _worker_pool_with_data(self, cmd, processes, job_iter): worker_stdin = BytesIO(b''.join(v for i, v in job_iter)) worker_stdout = BytesIO() dump_things_worker(self.ckan, 'datasets', { '--datastore-fields': True, '--resource-views': False, '--insecure': False, '--include-users': False,}, stdin=worker_stdin, stdout=worker_stdout) for i, v in enumerate(worker_stdout.getvalue().strip().split(b'\n')): yield [[], i, v] def _worker_pool_with_resource_views(self, cmd, proccesses, job_iter): worker_stdin = BytesIO(b''.join(v for i, v in job_iter)) worker_stdout = BytesIO() dump_things_worker(self.ckan, 'datasets', { '--datastore-fields': False, '--resource-views': True, '--insecure': False, '--include-users': False,}, stdin=worker_stdin, stdout=worker_stdout) for i, v in enumerate(worker_stdout.getvalue().strip().split(b'\n')): yield [[], i, v] ================================================ FILE: ckanapi/tests/test_cli_load.py ================================================ from ckanapi.cli.load import load_things, load_things_worker from ckanapi.errors import NotFound, ValidationError, NotAuthorized import json import unittest from io import BytesIO class MockCKAN(object): def call_action(self, name, data_dict, requests_kwargs=None): if name == 'package_show' and data_dict['id'] == 'seekrit': raise NotAuthorized('naughty user') if name == 'package_create' and data_dict.get('name') == '34': raise ValidationError({'name': 'That URL is already in use.'}) if name == 'organization_update': if data_dict['id'] == 'used' and data_dict.get('users') != [ 'people']: raise ValidationError({'users': 'should be unchanged'}) if data_dict['id'] == 'unused' and data_dict.get('users') != []: raise ValidationError({'users': 'should be cleared'}) try: return { 'package_show': { '12': {'title': "Twelve"}, '30ish': {'id': '34', 'title': "Thirty-four"}, '34': {'id': '34', 'title': "Thirty-four"}, }, 'group_show': { 'ab': {'title': "ABBA"}, }, 'organization_show': { 'cd': {'id': 'cd', 'title': "Super Trouper"}, 'used': {'users': ['people']}, 'unused': {'users': ['people']}, }, 'package_create': { None: {'name': 'something-new'}, }, 'package_update': { '34': {'name': 'something-updated'}, }, 'group_update': { 'ab': {'name': 'group-updated'}, }, 'organization_update': { 'cd': {'name': 'org-updated'}, 'used': {'name': 'users-unchanged'}, 'unused': {'name': 'users-cleared'}, }, 'organization_create': { None: {'name': 'org-created'}, }, }[name][data_dict.get('id')] except KeyError: raise NotFound() class TestCLILoad(unittest.TestCase): def setUp(self): self.ckan = MockCKAN() self.stdout = BytesIO() self.stderr = BytesIO() def test_create_with_no_resources(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "45","title":"Forty-five"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, None) self.assertEqual(data, 'something-new') def test_create_with_corrupted_resources(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "45","title":"Forty-five","resources":[{"id":"123"}]}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, None) self.assertEqual(data, 'something-new') def test_create_with_complete_resources(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO( b'{"name": "45","title":"Forty-five",' b'"resources":[{"id":"123","url_type":"","url":"http://example.com"}]}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, None) self.assertEqual(data, 'something-new') def test_create_only(self): load_things_worker(self.ckan, 'datasets', { '--create-only': True, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "45","title":"Forty-five"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, None) self.assertEqual(data, 'something-new') def test_create_empty_dict(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, None) self.assertEqual(data, 'something-new') def test_create_bad_option(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': True, '--insecure': False, }, stdin=BytesIO(b'{"name": "45","title":"Forty-five"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'show') self.assertEqual(error, 'NotFound') self.assertEqual(data, [None, '45']) def test_update_with_no_resources(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "30ish","title":"3.4 times ten"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'something-updated') def test_update_with_corrupted_resources(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "30ish","title":"3.4 times ten","resources":[{"id":"123"}]}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'something-updated') def test_update_with_complete_resources(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO( b'{"name": "30ish","title":"3.4 times ten",' b'"resources":[{"id":"123","url_type":"","url":"http://example.com"}]}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'something-updated') def test_update_only(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': True, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "34","title":"3.4 times ten"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'something-updated') def test_update_bad_option(self): load_things_worker(self.ckan, 'datasets', { '--create-only': True, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "34","title":"3.4 times ten"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, 'ValidationError') self.assertEqual(data, {'name': 'That URL is already in use.'}) def test_update_unauthorized(self): load_things_worker(self.ckan, 'datasets', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"name": "seekrit", "title": "Things"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'show') self.assertEqual(error, 'NotAuthorized') self.assertEqual(data, 'naughty user') def test_update_group(self): load_things_worker(self.ckan, 'groups', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"id": "ab","title":"a balloon"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'group-updated') def test_update_organization_two(self): load_things_worker(self.ckan, 'organizations', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO( b'{"name": "cd", "title": "Go"}\n' b'{"name": "ef", "title": "Play"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response.count(b'\n'), 2, response) self.assertEqual(response[-1:], b'\n') r1, r2 = response.split(b'\n', 1) timstamp, action, error, data = json.loads(r1.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'org-updated') timstamp, action, error, data = json.loads(r2.decode('UTF-8')) self.assertEqual(action, 'create') self.assertEqual(error, None) self.assertEqual(data, 'org-created') def test_update_organization_with_users_unchanged(self): load_things_worker(self.ckan, 'organizations', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"id": "used", "title": "here"}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'users-unchanged') def test_update_organization_with_users_cleared(self): load_things_worker(self.ckan, 'organizations', { '--create-only': False, '--update-only': False, '--upload-resources': False, '--insecure': False, }, stdin=BytesIO(b'{"id": "unused", "users": []}\n'), stdout=self.stdout) response = self.stdout.getvalue() self.assertEqual(response[-1:], b'\n') timstamp, action, error, data = json.loads(response.decode('UTF-8')) self.assertEqual(action, 'update') self.assertEqual(error, None) self.assertEqual(data, 'users-cleared') def test_parent_load_two(self): load_things(self.ckan, 'datasets', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--gzip': False, '--processes': '1', '--input': None, '--create-only': False, '--update-only': False, '--start-record': '1', '--max-records': None, '--upload-resources': False, '--upload-logo': False, '--insecure': False, }, worker_pool=self._mock_worker_pool, stdin=BytesIO( b'{"name": "cd", "title": "Go"}\n' b'{"name": "ef", "title": "Play"}\n' ), stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'load', 'datasets', '--worker']) self.assertEqual(self.worker_processes, 1) self.assertEqual(self.worker_jobs, [ (1, b'{"name": "cd", "title": "Go"}\n'), (2, b'{"name": "ef", "title": "Play"}\n'), ]) def test_parent_load_start_max(self): load_things(self.ckan, 'groups', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--gzip': False, '--processes': '1', '--input': None, '--create-only': False, '--update-only': False, '--start-record': '2', '--max-records': '2', '--upload-resources': False, '--upload-logo': False, '--insecure': False, }, worker_pool=self._mock_worker_pool, stdin=BytesIO( b'{"name": "cd", "title": "Go"}\n' b'{"name": "ef", "title": "Play"}\n' b'{"name": "gh", "title": "Hotel"}\n' b'{"name": "ij", "title": "Ambient"}\n' ), stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'load', 'groups', '--worker']) self.assertEqual(self.worker_processes, 1) self.assertEqual(self.worker_jobs, [ (2, b'{"name": "ef", "title": "Play"}\n'), (3, b'{"name": "gh", "title": "Hotel"}\n'), ]) def test_parent_parallel_limit(self): self.ckan.parallel_limit = 2 load_things(self.ckan, 'datasets', { '--quiet': False, '--ckan-user': None, '--config': None, '--remote': None, '--apikey': None, '--worker': False, '--log': None, '--gzip': False, '--processes': '5', '--input': None, '--create-only': False, '--update-only': False, '--start-record': '1', '--max-records': None, '--upload-resources': False, '--upload-logo': False, '--insecure': False, }, worker_pool=self._mock_worker_pool, stdin=BytesIO( b'{"name": "cd", "title": "Go"}\n' b'{"name": "ef", "title": "Play"}\n' ), stdout=self.stdout, stderr=self.stderr) self.assertEqual(self.worker_cmd, [ 'ckanapi', 'load', 'datasets', '--worker']) self.assertEqual(self.worker_processes, 2) def _mock_worker_pool(self, cmd, processes, job_iter): self.worker_cmd = cmd self.worker_processes = processes self.worker_jobs = list(job_iter) for i, j in self.worker_jobs: jname = json.loads(j.decode('UTF-8')) yield [[], i, json.dumps(['some-date', None, None, {'id':jname}] ).encode('UTF-8') + b'\n'] ================================================ FILE: ckanapi/tests/test_cli_workers.py ================================================ from ckanapi.cli.workers import worker_pool import os import unittest class _MockPopen(object): def __init__(self, popen_args, stdin, stdout): read1fd, write1fd = os.pipe() read2fd, write2fd = os.pipe() self.stdin = os.fdopen(write1fd, 'wb') self.stdin_inside = os.fdopen(read1fd, 'rb') self.stdout = os.fdopen(read2fd, 'rb') self.stdout_inside = os.fdopen(write2fd, 'wb') # use popen_args as an after-create callback popen_args(self) def stdout_write(self, data): self.stdout_inside.write(data) self.stdout_inside.flush() def stdin_readline(self): return self.stdin_inside.readline() def close_pipes(self): for f in (self.stdin, self.stdin_inside, self.stdout, self.stdout_inside): f.close() class TestCLIWorkers(unittest.TestCase): def test_one(self): children = [] def child_created(child): # need to respond or pool will block test child.stdout_write(b'AA\n') children.append(child) pool = worker_pool( child_created, 1, enumerate((b"job1\n", b"job2\n")), popen=_MockPopen, ) response = next(pool) self.assertEqual(len(children), 1) c = children[0] self.assertEqual(c.stdin_readline(), b'job1\n') self.assertEqual(response, ([1], 0, b'AA\n')) self.assertEqual(c.stdin_readline(), b'job2\n') c.stdout_write(b'BB\n') self.assertEqual(next(pool), ([None], 1, b'BB\n')) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes() def test_two(self): children = [] def child_created(child): # first child responds if not children: child.stdout_write(b'AA\n') children.append(child) pool = worker_pool( child_created, 2, enumerate((b"job1\n", b"job2\n", b"job3\n", b"job4\n")), popen=_MockPopen, ) response = next(pool) self.assertEqual(len(children), 2) c0, c1 = children self.assertEqual(c0.stdin_readline(), b'job1\n') self.assertEqual(c1.stdin_readline(), b'job2\n') self.assertEqual(response, ([2, 1], 0, b'AA\n')) self.assertEqual(c0.stdin_readline(), b'job3\n') c1.stdout_write(b'BB\n') self.assertEqual(next(pool), ([2, 3], 1, b'BB\n')) self.assertEqual(c1.stdin_readline(), b'job4\n') c0.stdout_write(b'CC\n') self.assertEqual(next(pool), ([None, 3], 2, b'CC\n')) c1.stdout_write(b'DD\n') self.assertEqual(next(pool), ([None, None], 3, b'DD\n')) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes() def test_uneven(self): children = [] def child_created(child): # second child responds if children: child.stdout_write(b'AA\n') children.append(child) pool = worker_pool( child_created, 2, enumerate((b"job1\n", b"job2\n", b"job3\n", b"job4\n")), popen=_MockPopen, ) response = next(pool) self.assertEqual(len(children), 2) c0, c1 = children self.assertEqual(c0.stdin_readline(), b'job1\n') self.assertEqual(c1.stdin_readline(), b'job2\n') self.assertEqual(response, ([0, 2], 1, b'AA\n')) self.assertEqual(c1.stdin_readline(), b'job3\n') c1.stdout_write(b'BB\n') self.assertEqual(next(pool), ([0, 3], 2, b'BB\n')) self.assertEqual(c1.stdin_readline(), b'job4\n') c1.stdout_write(b'CC\n') self.assertEqual(next(pool), ([0, None], 3, b'CC\n')) c0.stdout_write(b'DD\n') self.assertEqual(next(pool), ([None, None], 0, b'DD\n')) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes() def test_overkill(self): children = [] def child_created(child): if not children: child.stdout_write(b'AA\n') children.append(child) pool = worker_pool( child_created, 10, enumerate((b"job1\n",)), popen=_MockPopen, ) response = next(pool) self.assertEqual(len(children), 1) c = children[0] self.assertEqual(c.stdin_readline(), b'job1\n') self.assertEqual(response, ([None], 0, b'AA\n')) self.assertRaises(StopIteration, next, pool) for c in children: c.close_pipes() def test_batch(self): children = [] def child_created(child): if not children: child.stdout_write(b'AA\n') children.append(child) pool = worker_pool( child_created, 2, enumerate((b"job1\n",)), stop_when_jobs_done=False, popen=_MockPopen, ) response = next(pool) self.assertEqual(len(children), 1) c0 = children[0] self.assertEqual(c0.stdin_readline(), b'job1\n') self.assertEqual(response, ([None], 0, b'AA\n')) self.assertEqual(next(pool), (None, None, None)) # need to write in advance to avoid blocking test c0.stdout_write(b'BB\n') response = pool.send(enumerate((b"job2\n", b"job3\n"), 1)) self.assertEqual(response, ([None, 2], 1, b'BB\n')) for c in children: c.close_pipes() ================================================ FILE: ckanapi/tests/test_datapackage.py ================================================ from ckanapi.datapackage import ( dataset_to_datapackage, create_resource, create_datapackage, resource_filename, populate_schema_from_datastore) import unittest from io import BytesIO import os from pyfakefs import fake_filesystem_unittest class TestDatasetToDataPackage(unittest.TestCase): def test_simple_dataset(self): dataset_dict = { u'extras': [{u'key': u'subject', u'value': u'science'}], u'name': u'test_dataset_00', u'notes': u'Just another test dataset.', u'resources': [{ u'format': u'PNG', u'name': u'Image 1', u'url': u'http://example.com/image.png', }], u'tags': [{ u'display_name': u'science', u'id': u'59f9359c-002b-4166-a519-755f89a631da', u'name': u'science', }], u'title': u'Test Dataset', u'type': u'dataset', } datapackage = dataset_to_datapackage(dataset_dict) # code copied from test_package_show_with_full_dataset() assert datapackage == { u'description': u'Just another test dataset.', u'extras': {u'subject': u'science'}, u'keywords': [u'science'], u'name': u'test_dataset_00', u'resources': [{u'format': u'PNG', u'name': u'image-1', u'path': u'http://example.com/image.png', u'title': u'Image 1'}], u'title': u'Test Dataset'} def test_full_dataset(self): # This sample dataset_dict was generated in CKAN along the lines of # ckan/tests/logic/action/test_get.py # TestPackageShow.test_package_show_with_full_dataset() dataset_dict = { u'author': None, u'author_email': None, u'creator_user_id': u'3267d399-5517-47ef-ac02-13bb29372428', u'extras': [{u'key': u'subject', u'value': u'science'}], u'groups': [{u'description': u'A test description for this test group.', u'display_name': u'Test Group 00', u'id': u'cca3543f-0ba0-4194-b2f3-326498eb88b7', u'image_display_url': u'', u'name': u'test_group_00', u'title': u'Test Group 00'}], u'id': u'a7165429-dde3-4a5f-ba7d-c690209200cf', u'isopen': False, u'license_id': None, u'license_title': None, u'maintainer': None, u'maintainer_email': None, u'metadata_created': u'2019-05-24T16:30:43.889152', u'metadata_modified': u'2019-05-24T16:30:43.889161', u'name': u'test_dataset_00', u'notes': u'Just another test dataset.', u'num_resources': 1, u'num_tags': 1, u'organization': { u'approval_status': u'approved', u'created': u'2019-05-24T16:30:43.608032', u'description': u'Just another test organization.', u'id': u'aa878f8c-1f6e-4e87-b08e-67272d9c3d16', u'image_url': u'http://placekitten.com/g/200/100', u'is_organization': True, u'name': u'test_org_00', u'revision_id': u'bb31cfee-aee9-4031-9333-ed922bf3f049', u'state': u'active', u'title': u'Test Organization', u'type': u'organization'}, u'owner_org': u'aa878f8c-1f6e-4e87-b08e-67272d9c3d16', u'private': False, u'relationships_as_object': [], u'relationships_as_subject': [], u'resources': [{ u'cache_last_updated': None, u'cache_url': None, u'created': u'2019-05-24T16:30:43.894623', u'description': u'', u'format': u'PNG', u'hash': u'', u'id': u'a8e2f627-0450-4728-a0a4-ed3a091c303c', u'last_modified': None, u'mimetype': None, u'mimetype_inner': None, u'name': u'Image 1', u'package_id': u'a7165429-dde3-4a5f-ba7d-c690209200cf', u'position': 0, u'resource_type': None, u'revision_id': u'990df889-690c-412e-a7ad-f848c9927218', u'size': None, u'state': u'active', u'url': u'http://example.com/image.png', u'url_type': None}], u'revision_id': u'990df889-690c-412e-a7ad-f848c9927218', u'state': u'active', u'tags': [{ u'display_name': u'science', u'id': u'59f9359c-002b-4166-a519-755f89a631da', u'name': u'science', u'state': u'active', u'vocabulary_id': None}], u'title': u'Test Dataset', u'type': u'dataset', u'url': None, u'version': None } datapackage = dataset_to_datapackage(dataset_dict) assert datapackage == { u'description': u'Just another test dataset.', u'extras': {u'subject': u'science'}, u'keywords': [u'science'], u'name': u'test_dataset_00', u'resources': [{u'format': u'PNG', u'name': u'image-1', u'path': u'http://example.com/image.png', u'title': u'Image 1'}], u'title': u'Test Dataset'} def test_resource_names_are_unique(self): # Somehow these resources got the same name dataset_dict = { u'name': u'test_dataset_00', u'notes': u'Just another test dataset.', u'resources': [ { u'format': u'PNG', u'name': u'Image', u'url': u'http://example.com/imageA.png', }, { u'format': u'PNG', u'name': u'Image', u'url': u'http://example.com/imageB.png', }, { u'format': u'PNG', u'name': u'Image', u'url': u'http://example.com/imageC.png', }, ], u'tags': [{ u'display_name': u'science', u'id': u'59f9359c-002b-4166-a519-755f89a631da', u'name': u'science', }], u'title': u'Test Dataset', u'type': u'dataset', } datapackage = dataset_to_datapackage(dataset_dict) assert [res['name'] for res in datapackage['resources']] == \ [u'image', u'image0', u'image1'] class TestCreateResource(fake_filesystem_unittest.TestCase): def setUp(self): self.setUpPyfakefs() def test_simple(self): resource = { u'format': u'PNG', u'name': u'Image', u'url': u'http://example.com/image.png', } filename = 'image_saved.png' os.makedirs('/test/data') stderr = BytesIO() # TODO mock the HTTP request to example.com returned_resource = create_resource( resource, filename='image_saved.png', datapackage_dir='/test', stderr=stderr, apikey='') stderr.seek(0) assert not stderr.read() assert returned_resource == { u'url': u'http://example.com/image.png', u'name': u'Image', u'format': u'PNG', u'path': u'data/image_saved.png', } class TestCreateDataPackage(fake_filesystem_unittest.TestCase): def setUp(self): self.setUpPyfakefs() def test_simple(self): dataset = { u'extras': [{u'key': u'subject', u'value': u'science'}], u'name': u'test_dataset_00', u'notes': u'Just another test dataset.', u'resources': [{ u'format': u'PNG', u'name': u'Image 1', u'url': u'http://example.com/image.png', }], u'tags': [{ u'display_name': u'science', u'id': u'59f9359c-002b-4166-a519-755f89a631da', u'name': u'science', }], u'title': u'Test Dataset', u'type': u'dataset', } stderr = BytesIO() os.makedirs('/test/data') # TODO mock the HTTP request to example.com datapackage_dir, datapackage, json_path = \ create_datapackage(record=dataset, base_path='/test/', stderr=stderr, apikey='') stderr.seek(0) assert not stderr.read() assert datapackage_dir == u'/test/test_dataset_00' assert datapackage == { u'name': u'test_dataset_00', u'description': u'Just another test dataset.', u'title': u'Test Dataset', u'extras': {u'subject': u'science'}, u'keywords': [u'science'], u'resources': [{ u'path': u'data/image-1.png', # i.e. it was downloaded u'title': u'Image 1', u'name': u'image-1', u'format': u'PNG'}], } assert json_path == u'/test/test_dataset_00/datapackage.json' class TestResourceFilename(unittest.TestCase): def test_simple(self): datapackage_resource = { u'title': u'Image 1', u'name': u'image-1', u'format': u'PNG' } filename = resource_filename(dres=datapackage_resource) assert filename == u'image-1.png' class TestPopulateSchemaFromDatastore(unittest.TestCase): def test_simple(self): ckan_resource = { u'format': u'CSV', u'name': u'Buildings 1', u'url': u'http://example.com/buildings.csv', # example datastore fields from: # curl 'https://data.boston.gov/api/3/action/datastore_search?resource_id=28ca9f8d-f6ad-4855-bf14-90d2d0bc85ca&limit=0' |jq '.result.fields' u'datastore_fields': [ { u'id': u'country', u'type': u'int', u'info': { u'label': u'The country', u'notes': u'iso code', } }, { u'id': u'NUM_FLOORS', u'type': u'text', u'info': { u'type_override': {} }, } ] } datapackage_resource = { u'title': u'Buildings 1', u'name': u'buildings-1', u'format': u'CSV' } populate_schema_from_datastore(cres=ckan_resource, dres=datapackage_resource) assert datapackage_resource == { u'title': u'Buildings 1', u'name': u'buildings-1', u'format': u'CSV', u'schema': {'fields': [{'description': u'iso code', 'name': u'country', 'title': u'The country'}, {'name': u'NUM_FLOORS', 'type': 'string'}] } } ================================================ FILE: ckanapi/tests/test_remote.py ================================================ import subprocess import time import os import atexit import socket import requests import json from ckanapi import RemoteCKAN, NotFound from ckanapi.common import REQUEST_TIMEOUT import unittest from unittest import mock from subprocess import DEVNULL from urllib.request import urlopen, URLError from io import StringIO TEST_CKAN = 'http://localhost:8901' NUMBER_THING_CSV = """ Number,Thing 5,sasquach """.lstrip() class TestRemoteAction(unittest.TestCase): @classmethod def setUpClass(cls): script = os.path.join(os.path.dirname(__file__), 'mock/mock_ckan.py') _mock_ckan = subprocess.Popen(['python', script], stdout=DEVNULL, stderr=DEVNULL) def kill_child(): try: _mock_ckan.kill() _mock_ckan.wait() except OSError: pass # alread cleaned up from tearDownClass atexit.register(kill_child) cls._mock_ckan = _mock_ckan while True: # wait for the server to start try: r = urlopen(TEST_CKAN + '/api/action/site_read') if r.getcode() == 200: break except URLError as e: pass time.sleep(0.1) def test_good_oldstyle(self): ckan = RemoteCKAN(TEST_CKAN) self.assertEqual( ckan.action.organization_list(), ['aa', 'bb', 'cc']) ckan.close() def test_good(self): with RemoteCKAN(TEST_CKAN) as ckan: self.assertEqual( ckan.action.organization_list(), ['aa', 'bb', 'cc']) def test_missing(self): with RemoteCKAN(TEST_CKAN) as ckan: self.assertRaises( NotFound, ckan.action.organization_show, id='qqq') def test_default_ua(self): with RemoteCKAN(TEST_CKAN) as ckan: self.assertTrue( ckan.action.test_echo_user_agent().startswith('ckanapi')) def test_custom_ua(self): ua = 'testckanapibot/1.0 (+https://github.com/ckan/ckanapi)' with RemoteCKAN(TEST_CKAN, user_agent=ua) as ckan: self.assertEqual(ckan.action.test_echo_user_agent(), ua) def test_default_content_type(self): with RemoteCKAN(TEST_CKAN) as ckan: self.assertEqual(ckan.action.test_echo_content_type(), "application/json") def test_resource_upload(self): with RemoteCKAN(TEST_CKAN) as ckan: res = ckan.call_action('test_upload', {'option': "42"}, files={'upload': StringIO(NUMBER_THING_CSV)}) self.assertEqual(res.get('last_row'), ['5', 'sasquach']) def test_resource_upload_extra_param(self): with RemoteCKAN(TEST_CKAN) as ckan: res = ckan.call_action('test_upload', {'option': "42"}, files={'upload': StringIO(NUMBER_THING_CSV)}) self.assertEqual(res.get('option'), "42") def test_resource_upload_unicode_param(self): uname = b't\xc3\xab\xc3\x9ft resource'.decode('utf-8') with RemoteCKAN(TEST_CKAN) as ckan: res = ckan.call_action('test_upload', {'option': uname}, files={'upload': StringIO(NUMBER_THING_CSV)}) self.assertEqual(res.get('option'), uname) def test_resource_upload_content_type(self): with RemoteCKAN(TEST_CKAN) as ckan: res = ckan.call_action('test_echo_content_type', {'option': "42"}, files={'upload': StringIO(NUMBER_THING_CSV)}) self.assertEqual(res.split(';')[0], "multipart/form-data") def test_default_timeout(self): mock_response = mock.MagicMock() mock_response.status_code = 200 mock_response.text = json.dumps({"success": True, "result": []}) with mock.patch('requests.Session.post', return_value=mock_response) as mock_post: with RemoteCKAN(TEST_CKAN) as ckan: ckan.action.organization_list() _, kwargs = mock_post.call_args self.assertIs(REQUEST_TIMEOUT, None) self.assertEqual(kwargs.get('timeout'), REQUEST_TIMEOUT) def test_custom_timeout(self): mock_response = mock.MagicMock() mock_response.status_code = 200 mock_response.text = json.dumps({"success": True, "result": []}) # We patch at the module level because the env var is read at import time and # can't be patched with mock.patch("ckanapi.remoteckan.REQUEST_TIMEOUT", (2, 30)): with mock.patch('requests.Session.post', return_value=mock_response) as mock_post: with RemoteCKAN(TEST_CKAN) as ckan: ckan.action.organization_list() _, kwargs = mock_post.call_args self.assertEqual(kwargs.get('timeout'), (2, 30)) @classmethod def tearDownClass(cls): cls._mock_ckan.kill() cls._mock_ckan.wait() ================================================ FILE: ckanapi/version.py ================================================ from importlib.metadata import version __version__ = version("ckanapi") ================================================ FILE: examples/update_single_field.py ================================================ #!/usr/bin/python3 from ckanapi import RemoteCKAN server_url='https://ckan.my-domain.com' token = 'very_secret_token' selected_id = '0f800659-16d2-449a-923f-a6d04f8edbb9' with RemoteCKAN(server_url, apikey=token) as ckan: ckan.action.package_patch(id=selected_id, title='New title') ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [project] name = "ckanapi" version = "4.11" description = "A command line interface and Python module for accessing the CKAN Action API" license = {text = "MIT"} authors = [ {name = "Ian Ward", email = "ian@excess.org"}, ] classifiers = [ "Intended Audience :: Developers", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", ] keywords = [ "ckan", "ckanext", "API", ] requires-python = ">=3.9" dependencies = [ "setuptools", "docopt", "requests", "python-slugify>=1.0", "simplejson", ] [project.readme] file = "README.md" content-type = "text/markdown" [project.urls] Homepage = "https://github.com/ckan/ckanapi" [project.optional-dependencies] testing = [ "pyfakefs==5.10.2", "werkzeug", ] [project.scripts] ckanapi = "ckanapi.cli.main:main" [project.entry-points."ckan.click_command"] api = "ckanapi.cli.ckan_click:api" [tool.setuptools.packages.find] include = ["ckanapi*"] ================================================ FILE: requirements.txt ================================================ setuptools docopt requests simplejson