Repository: marcosschroh/python-schema-registry-client Branch: master Commit: 8e3d7bc75ae9 Files: 89 Total size: 267.4 KB Directory structure: gitextract_wyjir8km/ ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── dependabot.yml │ └── workflows/ │ ├── bump_version.yaml │ ├── docs-preview.yaml │ ├── docs-publish.yaml │ ├── publish.yaml │ └── python-package.yml ├── .gitignore ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── docker-compose.yaml ├── docs/ │ ├── client.md │ ├── exceptions.md │ ├── faust.md │ ├── index.md │ ├── schemaregistry_server.md │ ├── schemas.md │ └── serializer.md ├── mkdocs.yml ├── pyproject.toml ├── schema_registry/ │ ├── __init__.py │ ├── client/ │ │ ├── __init__.py │ │ ├── client.py │ │ ├── errors.py │ │ ├── paths.py │ │ ├── schema.py │ │ ├── status.py │ │ ├── urls.py │ │ └── utils.py │ ├── py.typed │ └── serializers/ │ ├── __init__.py │ ├── errors.py │ ├── faust.py │ └── message_serializer.py ├── scripts/ │ ├── README.md │ ├── clean │ ├── format │ ├── publish │ ├── test │ └── wait_for_services └── tests/ ├── __init__.py ├── avro_schemas/ │ ├── adv_schema.avsc │ ├── basic_schema.avsc │ ├── invalid_schema.avsc │ ├── logical_types_schema.avsc │ ├── nested_schema.avsc │ ├── order_schema.avsc │ ├── primitive_float.avsc │ ├── primitive_string.avsc │ ├── user_v1.avsc │ └── user_v2.avsc ├── certificates/ │ ├── cert.pem │ └── key.pem ├── client/ │ ├── __init__.py │ ├── async_client/ │ │ ├── __init__.py │ │ ├── test_http_client.py │ │ ├── test_schema.py │ │ ├── test_schema_compatibility.py │ │ ├── test_schema_delete.py │ │ ├── test_schema_getters.py │ │ ├── test_schema_registration.py │ │ └── test_schema_version.py │ ├── sync_client/ │ │ ├── __init__.py │ │ ├── test_http_client.py │ │ ├── test_schema.py │ │ ├── test_schema_compatibility.py │ │ ├── test_schema_delete.py │ │ ├── test_schema_getters.py │ │ ├── test_schema_registration.py │ │ └── test_schema_version.py │ └── test_urls.py ├── conftest.py ├── data_gen.py ├── json_schemas/ │ ├── adv_schema.json │ ├── basic_schema.json │ ├── invalid_schema.json │ ├── nested_schema.json │ ├── order_schema.json │ ├── user_v1.json │ └── user_v2.json └── serializer/ ├── __init__.py ├── test_async_message_serializer.py ├── test_faust_serializer.py ├── test_faust_serializer_clean_payload.py └── test_message_serializer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/FUNDING.yml ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: **Expected behavior** A clear and concise description of what you expected to happen. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: pip directory: "/" schedule: interval: weekly - package-ecosystem: "github-actions" directory: "/" schedule: interval: weekly ================================================ FILE: .github/workflows/bump_version.yaml ================================================ name: Bump version on: push: branches: - master jobs: bump-version: if: "!startsWith(github.event.head_commit.message, 'bump:')" runs-on: ubuntu-latest name: "Bump version and create changelog with commitizen" steps: - name: Check out uses: actions/checkout@v4 with: fetch-depth: 0 token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} - name: Create bump and changelog uses: commitizen-tools/commitizen-action@master with: github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }} changelog_increment_filename: body.md - name: Release uses: softprops/action-gh-release@v2 with: body_path: "body.md" tag_name: ${{ env.REVISION }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/docs-preview.yaml ================================================ name: Deploy PR previews on: pull_request: types: - opened - reopened - synchronize - closed concurrency: preview-${{ github.ref }} jobs: deploy-preview: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Setup python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install Dependencies run: | python -m pip install -U pip poetry poetry install - name: Build docs run: | poetry run mkdocs build - name: Deploy preview uses: rossjrw/pr-preview-action@v1 with: source-dir: ./site/ ================================================ FILE: .github/workflows/docs-publish.yaml ================================================ name: Publish documentation on: push: branches: ["master"] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Setup python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install Dependencies run: | python -m pip install -U pip poetry poetry install - name: Build docs run: | poetry run mkdocs build - name: Push doc to Github Page uses: peaceiris/actions-gh-pages@v4 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PUBLISH_BRANCH: gh-pages PUBLISH_DIR: ./site ================================================ FILE: .github/workflows/publish.yaml ================================================ name: Publish Package on: push: tags: - '*' workflow_dispatch: inputs: release: description: 'Release package' required: true default: false type: boolean jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Setup python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install Dependencies run: | python -m pip install -U pip poetry poetry install --only ci-publish - name: Publish env: TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }} TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} run: | ./scripts/publish ================================================ FILE: .github/workflows/python-package.yml ================================================ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Python package on: push: branches: ["master", "release/v3"] pull_request: branches: ["master", "release/v3"] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Setup python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} architecture: x64 - name: Run docker-compose uses: hoverkraft-tech/compose-action@v2.1.0 with: compose-file: "docker-compose.yaml" - name: Build and test env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} PYTHON_VERSION: ${{ matrix.python-version }} run: | python -m pip install -U pip poetry poetry install --all-extras ./scripts/test ================================================ FILE: .gitignore ================================================ ### Python template # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *.pyc *$py.class # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ # PyBuilder target/ # pyenv .python-version # Environments .venv venv/ ENV/ # mkdocs documentation /site # mypy .mypy_cache/ # Runtime data pids *.pid *.seed *.pid.lock # Coverage directory used by tools like istanbul coverage ### VisualStudioCode template .vscode/ !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json # Workspace files are user-specific *.sublime-workspace # Project files should be checked into the repository, unless a significant # proportion of contributors will probably not be using Sublime Text # *.sublime-project # SFTP configuration file sftp-config.json # Package control specific files Package Control.last-run Package Control.ca-list Package Control.ca-bundle Package Control.system-ca-bundle Package Control.cache/ Package Control.ca-certs/ Package Control.merged-ca-bundle Package Control.user-ca-bundle oscrypto-ca-bundle.crt bh_unicode_properties.cache # Sublime-github package stores a github token in this file # https://packagecontrol.io/packages/sublime-github GitHub.sublime-settings ### Vim template # Swap [._]*.s[a-v][a-z] [._]*.sw[a-p] [._]s[a-v][a-z] [._]sw[a-p] # Session Session.vim # Auto-generated tag files tags .pytest_cache/ .ipython/ # PyCharm .idea ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Fix - dependabot added. Documentation improved with mkdocstrings (#148) ## v2.6.1 (2025-04-04) ### Fix - use anyio instead of aiofiles for async file I/O (#434) ## v2.6.0 (2024-06-19) ### Feat - add verbose option for test_compatibility (#336) ## v2.5.6 (2024-02-12) ### Fix - bump fastavro to 1.9.3 and correct typing (#250) ## v2.5.5 (2024-02-09) ### Fix - **dependencies**: bump httpx version constraints to 0.26 (#244) ## v2.5.4 (2024-01-12) ### Fix - allow for minor version upgrades of httpx (#231) ## v2.5.3 (2024-01-04) ### Fix - order of arguments in SchemaVersion constructor (#227) ## v2.5.2 (2023-12-11) ### Fix - pin python version in publish workflow (#218) ## v2.5.1 (2023-12-02) ### Fix - remove print statement from serializer (#214) ## v2.5.0 (2023-06-26) ### Feat - Add support for generic httpx.Auth (#174) ## v2.4.4 (2023-06-26) ### Fix - dependabot added. Documentation improved with mkdocstrings (#148) ## [2.4.3] - 2023-04-19 - include properly `py.typed` marker file ### Fixed ## [2.4.2] - 2023-04-18 - `py.typed` marker file added [142](https://github.com/marcosschroh/python-schema-registry-client/pull/142) ### Fixed ## [2.4.1] - 2022-09-08 - schema getters when fetching JSON schemas without cached results [138](https://github.com/marcosschroh/python-schema-registry-client/pull/138) ### Fixed ## [2.4.0] - 2022-04-22 - `Auth` parameter added to simplify [auth credentials](https://marcosschroh.github.io/python-schema-registry-client/client/#auth) (`username` and `password`) [127](https://github.com/marcosschroh/python-schema-registry-client/pull/127) ### Fixed ## [2.2.2] - 2022-02-09 - Support latest httpx version [117](https://github.com/marcosschroh/python-schema-registry-client/pull/117) ### Fixed ## [2.2.1] - 2021-12-29 - Support for python 3.7+ and latest httpx added [116](https://github.com/marcosschroh/python-schema-registry-client/pull/116) ### Added ## [2.2.0] - 2021-11-24 - `AsyncJsonMessageSerializer`, `AsyncAvroMessageSerializer` and `AsyncSchemaRegistryClient` added [102](https://github.com/marcosschroh/python-schema-registry-client/pull/102) ### Fixed ## [2.1.1] - 2021-11-15 - `httpx` requirement updated [114](https://github.com/marcosschroh/python-schema-registry-client/pull/114) ## [2.1.0] - 2021-11-12 - new function `get_schema_subject_versions` [109](https://github.com/marcosschroh/python-schema-registry-client/pull/109) ### Added - fix serializers API when faust is not installed [104](https://github.com/marcosschroh/python-schema-registry-client/pull/104) ## [2.0.0] - 2021-10-14 ### Added - fix serializers API when faust is not installed [104](https://github.com/marcosschroh/python-schema-registry-client/pull/104) ## [1.9.0] - 2021-10-07 ### Added - Support for json schemas added [100](https://github.com/marcosschroh/python-schema-registry-client/pull/100) ## [1.8.2] - 2021-05-07 ### Fixed - Unpin fastavro dependency ## [1.8.1] - 2021-02-26 ### Fixed - Type check [#95](https://github.com/marcosschroh/python-schema-registry-client/pull/95) - Logging levels [#94](https://github.com/marcosschroh/python-schema-registry-client/pull/94) ## [1.8.0] - 2020-01-29 ### Added - support return_record_name [#89](https://github.com/marcosschroh/python-schema-registry-client/pull/89) - Update pinned fastavro version to match dataclasses-avroschema [#91](https://github.com/marcosschroh/python-schema-registry-client/pull/91) ## [1.7.2] - 2020-12-22 ### Fixed - Checks if Schema is already registered before trying to register. This allows Schema Registry to be readonly in production environment, with only CI/CD being allowed to make changes. ## [1.7.1] - 2020-12-07 ### Fixed - [faust] extra now depends on [faust-streaming fork](https://github.com/faust-streaming/faust) ## [1.7.0] - 2020-10-17 ### Added - Integration with [dataclasses-avroschema](https://github.com/marcosschroh/dataclasses-avroschema) added to serializers ### Fixed - Requirements updated: `fastavro==1.0.0.post1` and `mypy==0.782` ## [1.6.1] - 2020-10-16 ### Fixed - Requirements updated: `fastavro==1.0.0.post1` and `mypy==0.782` ## [1.6.0] - 2020-09-18 ### Added - Integration with [dataclasses-avroschema](https://github.com/marcosschroh/dataclasses-avroschema) added ## [1.5.0] - 2020-09-12 ### Added - `AsyncSchemaRegistryClient` added ## [1.4.7] - 2020-09-12 ### Fixed - Submit raw schema instead of the `fastavro-parsed` version [#77](https://github.com/marcosschroh/python-schema-registry-client/issues/77) ## [1.4.6] - 2020-09-07 ### Fixed - `is_key` removed from signature methods - documentation updated ## [1.4.5] - 2020-08-19 ### Fixed - Pin dependency versions ## [1.4.4] - 2020-08-14 ### Fixed - Corrects `Accept headers` to conform to specification [#73](https://github.com/marcosschroh/python-schema-registry-client/pull/73) ## [1.4.3] - 2020-08-13 ### Fixed - `requests` dependency removed [#70](https://github.com/marcosschroh/python-schema-registry-client/pull/70) ## [1.4.2] - 2020-08-10 ### Fixed - Fix `client.register cache lookup` [#62](https://github.com/marcosschroh/python-schema-registry-client/pull/62) - Support for new release of `httpx`. For `httpx < 0.14.0` versions usage of `python-schema-registry-client==1.4.1` - Don't rely on httpx's private config values [#66](https://github.com/marcosschroh/python-schema-registry-client/pull/66) ## [1.4.1] - 2020-07-14 ### Changed - `Avro` serialization for complex `faust` records that contains nested `records`, `Mapping` or `Sequences fixed` [#59](https://github.com/marcosschroh/python-schema-registry-client/issues/59) ## [1.4.0] - 2020-05-07 ### Added - timeout and pool_limits added to client ## [1.3.2] - 2020-05-06 ### Fixed - Allow SchemaRegistryClient to be picklable fixed #24 ## [1.3.1] - 2020-05-03 ### Changed - `requests` library has been replaced with `httpx` ## [1.3.0] - 2020-04-25 ### Added - new properties added to AvroSchema: raw_schema, flat_schema and expanded_schema - documentation updated ### Fixed ## [1.2.10] - 2020-04-20 - bad import check fixed causing faust crash ### Fixed ## [1.2.9] - 2020-04-20 - fixed `Importing MessageSerializer` without Faust is Broken [#47](https://github.com/marcosschroh/python-schema-registry-client/issues/47) ### Fixed ## [1.2.8] - 2020-04-18 - fix Base URL was overwritten [#46](https://github.com/marcosschroh/python-schema-registry-client/issues/46) ### Changed ## [1.2.7] - 2020-03-29 - `faust serializer` updated in order to be compatible with latest Faust version ### Fixed ## [1.2.6] - 2020-03-13 - Incorrect message on get_subjects fixed ### Changed ## [1.2.5] - 2020-02-19 - is_key was removed from serializer, meaning that the subject itself will have to express wheter the schema is key or not. Related to [#40](https://github.com/marcosschroh/python-schema-registry-client/issues/40) - Requirements updated to latest versions: fastavro and requests ### Changed ## [1.2.4] - 2019-11-16 - Faust requirement updated to <= 1.9.0 ### Fixed ## [1.2.3] - 2019-11-02 - fix force `fastavro` to parse always the schemas in order to avoid errors when a process B get the schema from the server that was previously processed by process A. ### Changed ## [1.2.2] - 2019-10-26 - requirements updated ### Changed ## [1.2.1] - 2019-07-23 - Typing added (mypy) ### Added ## [1.2.0] - 2019-07-22 - Missing endpoints added: - GET /subjects - GET /subjects/{subject}/versions - DELETE /subjects/{subject}/versions/{version} ### Added ## [1.1.0] - 2019-07-19 - Urls manager added to have more control over which endpoint the client is using ### Added ## [1.0.0] - 2019-07-17 - Production ready - Move to FastAvro - Dependency `avro-python3` removed - Support for `logicalTypes` added - `AvroSchema` class added to manage schema parse and hasing - Tests added - Faker lib added to create fixtures - Documentation updated ### Fixed ## [0.3.1] - 2019-07-17 - Error mapping proxy fixed when try to register a schema that contains `logicalTypes` ### Added ## [0.3.0] - 2019-07-11 - Faust Serializer has been added - Optional Faust requirement added ### Changed ## [0.2.5] - 2019-06-10 - Documentation about `MessageSerializer` and `Faust` Integration updated ### Changed ## [0.2.4] - 2019-06-05 - Missing Compatibility levels added. - `ClienError` added to the documentation - Tests refactored ### Fixed ## [0.2.3] - 2019-05-31 - HTTP code and `server_traceback` added to the `ClientError` when Schema Server returns a not success when a schema compatibility check is requested. ### Fixed ## [0.2.2] - 2019-05-29 - Missing tests added. - Bug in register fixed. ### Changed ## [0.2.1] - 2019-05-27 - Documentation updated. - Missing Python syntax highlighted added. ### Added ## [0.2.0] - 2019-05-23 - Now all the tests are run against a `Schema Registry Server` and not a mock server. This allows us to be aware of when the server changes. The requirements to run the tests are Docker and `Docker Compose` ### Changed ## [0.1.1] - 2019-05-22 - Http `Client` now is a subclass of `request.Session` ### Added ## [0.1.0] - 2019-05-22 - Now is possible to inisialize SchemaRegistryClient with custom headers. This headers will be included on every requests. ### Changed ## [0.0.3] - 2019-05-22 - small twaeks ### Changed ## [0.0.2] - 2019-05-19 - First release - Http `Client` added. - `MessageSerializer` added. - tests added - Documentation added ================================================ FILE: Dockerfile ================================================ ARG PYTHON_VERSION FROM python:${PYTHON_VERSION} ARG POETRY_VERSION=1.4.2 RUN apt-get update && apt-get install -y netcat git && apt-get autoremove -y RUN pip install "poetry==${POETRY_VERSION}" # Create unprivileged user RUN adduser --disabled-password --gecos '' myuser COPY wait_for_services.sh . COPY /scripts scripts/ COPY setup.cfg . COPY README.md . COPY pyproject.toml . COPY poetry.lock . COPY .git . COPY /schema_registry /schema_registry COPY /tests /tests # create a file in order to have coverage RUN touch .coverage RUN poetry install --no-interaction --no-ansi --all-extras ENTRYPOINT ["./wait_for_services.sh"] ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2023, Marcos Schroh Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Python Rest Client Schema Registry [![Python package](https://github.com/marcosschroh/python-schema-registry-client/actions/workflows/python-package.yml/badge.svg)](https://github.com/marcosschroh/python-schema-registry-client/actions/workflows/python-package.yml) [![GitHub license](https://img.shields.io/github/license/marcosschroh/python-schema-registry-client.svg)](https://github.com/marcosschroh/python-schema-registry-client/blob/master/LICENSE) [![codecov](https://codecov.io/gh/marcosschroh/python-schema-registry-client/branch/master/graph/badge.svg)](https://codecov.io/gh/marcosschroh/python-schema-registry-client) [![Python Version](https://img.shields.io/badge/python-3.8+-blue.svg)](https://img.shields.io/badge/python-3.8+-blue.svg) Python Rest Client to interact against [schema-registry](https://docs.confluent.io/current/schema-registry/index.html) confluent server to manage [Avro](https://docs.oracle.com/database/nosql-12.1.3.1/GettingStartedGuide/avroschemas.html) and [JSON](https://json-schema.org/) schemas resources. ## Requirements python 3.8+ ## Installation ```bash pip install python-schema-registry-client ``` If you want the `Faust` functionality: ```bash pip install python-schema-registry-client[faust] ``` Note that this will automatically add a dependency on the [faust-streaming](https://github.com/faust-streaming/faust) fork of faust. If you want to use the old faust version, simply install it manually and then install `python-schema-registry-client` without the `faust` extra enabled, the functionality will be the same. ## Client API, Serializer, Faust Integration and Schema Server description **Documentation**: [https://marcosschroh.github.io/python-schema-registry-client.io](https://marcosschroh.github.io/python-schema-registry-client) ## Avro Schema Usage ```python from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) schema_id = client.register("test-deployment", avro_schema) ``` or async ```python from schema_registry.client import AsyncSchemaRegistryClient, schema async_client = AsyncSchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) schema_id = await async_client.register("test-deployment", avro_schema) ``` ## JSON Schema Usage ```python from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "definitions" : { "JsonDeployment" : { "type" : "object", "required" : ["image", "replicas", "port"], "properties" : { "image" : {"type" : "string"}, "replicas" : {"type" : "integer"}, "port" : {"type" : "integer"} } } }, "$ref" : "#/definitions/JsonDeployment" } json_schema = schema.JsonSchema(deployment_schema) schema_id = client.register("test-deployment", json_schema) ``` or async ```python from schema_registry.client import AsyncSchemaRegistryClient, schema async_client = AsyncSchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "definitions" : { "JsonDeployment" : { "type" : "object", "required" : ["image", "replicas", "port"], "properties" : { "image" : {"type" : "string"}, "replicas" : {"type" : "integer"}, "port" : {"type" : "integer"} } } }, "$ref" : "#/definitions/JsonDeployment" } json_schema = schema.JsonSchema(deployment_schema) schema_id = await async_client.register("test-deployment", json_schema) ``` ## Usage with dataclasses-avroschema for avro schemas You can generate the `avro schema` directely from a python class using [dataclasses-avroschema](https://github.com/marcosschroh/dataclasses-avroschema) and use it in the API for `register schemas`, `check versions` and `test compatibility`: ```python import dataclasses from dataclasses_avroschema import AvroModel, types from schema_registry.client import SchemaRegistryClient client = SchemaRegistryClient(url="http://127.0.0.1:8081") @dataclasses.dataclass class UserAdvance(AvroModel): name: str age: int pets: typing.List[str] = dataclasses.field(default_factory=lambda: ["dog", "cat"]) accounts: typing.Dict[str, int] = dataclasses.field(default_factory=lambda: {"key": 1}) has_car: bool = False favorite_colors: types.Enum = types.Enum(["BLUE", "YELLOW", "GREEN"], default="BLUE") country: str = "Argentina" address: str = None # register the schema schema_id = client.register(subject, UserAdvance.avro_schema()) print(schema_id) # >>> 12 result = client.check_version(subject, UserAdvance.avro_schema()) print(result) # >>> SchemaVersion(subject='dataclasses-avroschema-subject-2', schema_id=12, schema=1, version={"type":"record" ...') compatibility = client.test_compatibility(subject, UserAdvance.avro_schema()) print(compatibility) # >>> True ``` ## Usage with pydantic for json schemas You can generate the json schema directely from a python class using pydantic and use it in the API for register schemas, check versions and test compatibility: ```python import typing from enum import Enum from pydantic import BaseModel from schema_registry.client import SchemaRegistryClient client = SchemaRegistryClient(url="http://127.0.0.1:8081") class ColorEnum(str, Enum): BLUE = "BLUE" YELLOW = "YELLOW" GREEN = "GREEN" class UserAdvance(BaseModel): name: str age: int pets: typing.List[str] = ["dog", "cat"] accounts: typing.Dict[str, int] = {"key": 1} has_car: bool = False favorite_colors: ColorEnum = ColorEnum.BLUE country: str = "Argentina" address: str = None # register the schema schema_id = client.register(subject, UserAdvance.model_json_schema(), schema_type="JSON") print(schema_id) # >>> 12 result = client.check_version(subject, UserAdvance.model_json_schema(), schema_type="JSON") print(result) # >>> SchemaVersion(subject='pydantic-jsonschema-subject', schema_id=12, schema=1, version=) compatibility = client.test_compatibility(subject, UserAdvance.model_json_schema(), schema_type="JSON") print(compatibility) # >>> True ``` ## Serializers You can use `AvroMessageSerializer` to encode/decode messages in `avro` ```python from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers import AvroMessageSerializer client = SchemaRegistryClient("http://127.0.0.1:8081") avro_message_serializer = AvroMessageSerializer(client) avro_user_schema = schema.AvroSchema({ "type": "record", "namespace": "com.example", "name": "AvroUsers", "fields": [ {"name": "first_name", "type": "string"}, {"name": "last_name", "type": "string"}, {"name": "age", "type": "int"}, ], }) # We want to encode the user_record with avro_user_schema user_record = { "first_name": "my_first_name", "last_name": "my_last_name", "age": 20, } # Encode the record message_encoded = avro_message_serializer.encode_record_with_schema( "user", avro_user_schema, user_record) print(message_encoded) # >>> b'\x00\x00\x00\x00\x01\x1amy_first_name\x18my_last_name(' ``` or with `json schemas` ```python from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers import JsonMessageSerializer client = SchemaRegistryClient("http://127.0.0.1:8081") json_message_serializer = JsonMessageSerializer(client) json_schema = schema.JsonSchema({ "definitions" : { "record:python.test.basic.basic" : { "description" : "basic schema for tests", "type" : "object", "required" : [ "number", "name" ], "properties" : { "number" : { "oneOf" : [ { "type" : "integer" }, { "type" : "null" } ] }, "name" : { "oneOf" : [ { "type" : "string" } ] } } } }, "$ref" : "#/definitions/record:python.test.basic.basic" }) # Encode the record basic_record = { "number": 10, "name": "a_name", } message_encoded = json_message_serializer.encode_record_with_schema( "basic", json_schema, basic_record) print(message_encoded) # >>> b'\x00\x00\x00\x00\x02{"number": 10, "name": "a_name"}' ``` ## When use this library Usually, we have a situation like this: ![Confluent Architecture](docs/img/confluent_architecture.png) So, our producers/consumers have to serialize/deserialize messages every time that they send/receive from Kafka topics. In this picture, we can imagine a `Faust` application receiving messages (encoded with an Avro schema) and we want to deserialize them, so we can ask the `schema server` to do that for us. In this scenario, the `MessageSerializer` is perfect. Also, could be a use case that we would like to have an Application only to administrate `Avro Schemas` (register, update compatibilities, delete old schemas, etc.), so the `SchemaRegistryClient` is perfect. ## Development [Poetry](https://python-poetry.org/docs/) is needed to install the dependencies and develope locally 1. Install dependencies: `poetry install --all-extras` 2. Code linting: `./scripts/format` 3. Run tests: `./scripts/test` For commit messages we use [commitizen](https://commitizen-tools.github.io/commitizen/) in order to standardize a way of committing rules *Note*: The tests are run against the `Schema Server` using `docker compose`, so you will need `Docker` and `Docker Compose` installed. In a terminal run `docker-compose up`. Then in a different terminal run the tests: ```bash ./scripts/test ``` All additional args will be passed to pytest, for example: ```bash ./scripts/test ./tests/client/ ``` ### Tests usind the python shell To perform tests using the python shell you can run the project using `docker-compose`. 1. Execute `docker-compose up`. Then, the `schema registry server` will run on `http://127.0.0.1:8081`, then you can interact against it using the `SchemaRegistryClient`: 1. Use the python interpreter (get a python shell typing `python` in your command line) 1. Play with the `schema server` ```python from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") # do some operations with the client... deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) client.register("test-deployment", avro_schema) # >>>> Out[5]: 1 ``` Then, you can check the schema using your browser going to the url `http://127.0.0.1:8081/schemas/ids/1` ================================================ FILE: docker-compose.yaml ================================================ version: '3.5' services: zookeeper: image: "confluentinc/cp-zookeeper" hostname: zookeeper ports: - 32181:32181 environment: - ZOOKEEPER_CLIENT_PORT=32181 kafka: # pinned due to https://github.com/confluentinc/kafka-images/issues/127 image: confluentinc/cp-kafka:7.0.0 hostname: kafka container_name: kafka ports: - 9092:9092 - 29092:29092 depends_on: - zookeeper environment: - KAFKA_ZOOKEEPER_CONNECT=zookeeper:32181 - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - KAFKAD_LISTENERS=PLAINTEXT_HOST://localhost:29092,PLAINTEXT://kafka:9092 - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT_HOST://localhost:29092,PLAINTEXT://kafka:9092 - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT - KAFKA_BROKER_ID=1 schema-registry-server: image: confluentinc/cp-schema-registry hostname: schema-registry-server container_name: schema-registry-server depends_on: - kafka - zookeeper ports: - 8081:8081 environment: - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=kafka:9092 - SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:32181 - SCHEMA_REGISTRY_HOST_NAME=schema-registry-server - SCHEMA_REGISTRY_DEBUG=true - SCHEMA_REGISTRY_ACCESS_CONTROL_ALLOW_ORIGIN=* ================================================ FILE: docs/client.md ================================================ # Schema Registry Client The `Schema Registry Client` consumes the API exposed by the `schema-registry` to operate resources that are `avro` and `json` schemas. You probably won't use this but is good to know that exists. The `MessageSerializer` is whom interact with the `SchemaRegistryClient` ::: schema_registry.client.SchemaRegistryClient options: show_root_heading: true docstring_section_style: table show_signature_annotations: false ::: schema_registry.client.AsyncSchemaRegistryClient options: show_root_heading: true docstring_section_style: table show_signature_annotations: false ## Auth Credentials can be supplied in `two` different ways: using the `url` or the `schema_registry.client.Auth`. ```python title="Credentials using Auth" from schema_registry.client import SchemaRegistryClient, Auth SchemaRegistryClient( url="https://user_url:secret_url@127.0.0.1:65534", auth=Auth(username="secret-user", password="secret"), ) ``` ```python title="Credentials using the url" from schema_registry.client import SchemaRegistryClient username="secret-username" password="secret" SchemaRegistryClient({"url": f"https://{username}:{password}@127.0.0.1:65534"}) ``` !!! note This auth methods are the same for `AsyncSchemaRegistryClient` ================================================ FILE: docs/exceptions.md ================================================ # ClientError An instance of ClientError is returned when an error occurs. ```python class ClientError(Exception): """ Error thrown by Schema Registry clients """ def __init__(self, message, http_code=None, server_traceback=None): self.message = message self.server_traceback = server_traceback self.http_code = http_code super(ClientError, self).__init__(self.__str__()) def __repr__(self): return f"ClientError(error={self.message})" def __str__(self): return self.message ``` For example: ```python try: # Try to get the compatibility level of a subject that does not exist # The Schema Registry Server returns 404 compatibility = client.get_compatibility("subject-does-not-exists") except Exception as error: # Print 404 print(error.http_code) ``` ================================================ FILE: docs/faust.md ================================================ # How to use it with Faust This section describe how integrate this library with [Faust](https://faust.readthedocs.io/en/latest/) ## Schemas, Custom Codecs and Serializers Because we want to be sure that the message that we encode are valid, we can use [Avro](https://docs.oracle.com/database/nosql-12.1.3.1/GettingStartedGuide/avroschemas.html) or [JSON](https://json-schema.org/) schemas. Also, [Introduction to Schemas in Apache Kafka with the Confluent Schema Registry](https://medium.com/@stephane.maarek/introduction-to-schemas-in-apache-kafka-with-the-confluent-schema-registry-3bf55e401321) is a good post to start with `schemas`. Avro and JSON can be used to define the data schema for a record's value. This schema describes the fields allowed in the value, along with their data types. In order to use `avro schemas` or `json schemas` with `Faust`, we need to define a custom codec and a custom serializer able to talk with the `schema-registry`, and to do that, we will use the `MessageSerializer`. For serializing `avro schemas` we should use the `FaustSerializer`. For serializing `json schemas` we should use the `FaustJsonSerializer`. For our demonstration, let's imagine that we have the following `avro schema`: ```json { "type": "record", "namespace": "com.example", "name": "AvroUsers", "fields": [ {"name": "first_name", "type": "string"}, {"name": "last_name", "type": "string"} ] } ``` Let's register the custom `codec` ```python title="Trivial Usage" # codecs.codec.py from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers.faust import FaustSerializer # create an instance of the `SchemaRegistryClient` client = SchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL) # schema that we want to use. For this example we # are using a dict, but this schema could be located in a file called avro_user_schema.avsc avro_user_schema = schema.AvroSchema({ "type": "record", "namespace": "com.example", "name": "AvroUsers", "fields": [ {"name": "first_name", "type": "string"}, {"name": "last_name", "type": "string"} ] }) avro_user_serializer = FaustSerializer(client, "users", avro_user_schema) # function used to register the codec def avro_user_codec(): return avro_user_serializer ``` and add in `setup.py` the following code in order to tell faust where to find the custom codecs. ```python title="setup.py" setup( ... entry_points={ 'console_scripts': [ 'example = example.app:main', ], 'faust.codecs': [ 'avro_users = example.codecs.avro:avro_user_codec', ], }, ) ``` or if you are using `poetry` an a [`pyproject.toml`](https://python-poetry.org/docs/pyproject/) you can add in `pyproject.toml` the following code to tell faust where to find the custom codecs: ```toml title="pyproject.toml" [tool.poetry.scripts] example = "example.app:main" [tool.poetry.plugins."faust.codecs"] avro_users = "example.codecs.avro:avro_user_codec" ``` Now the final step is to integrate the faust model with the AvroSerializer. ```python title="user.models.py" import faust class UserModel(faust.Record, serializer='avro_users'): first_name: str last_name: str ``` Now our application is able to send and receive message using arvo schemas!!!! :-) ```python title="application" import logging from your_project.app import app from .codecs.codec import avro_user_serializer from .models import UserModel users_topic = app.topic('avro_users', partitions=1, value_type=UserModel) logger = logging.getLogger(__name__) @app.agent(users_topic) async def users(users): async for user in users: logger.info("Event received in topic avro_users") logger.info(f"First Name: {user.first_name}, last name {user.last_name}") @app.timer(5.0, on_leader=True) async def publish_users(): logger.info('PUBLISHING ON LEADER FOR USERS APP!') user = {"first_name": "foo", "last_name": "bar"} await users.send(value=user, value_serializer=avro_user_serializer) ``` The full example is [here](https://github.com/marcosschroh/faust-docker-compose-example/blob/master/faust-project/example/codecs/avro.py) ### Usage with dataclasses-avroschema for avro schemas You can also use this funcionality with [dataclasses-avroschema](https://github.com/marcosschroh/dataclasses-avroschema) and you won't have to provide the avro schema. The only thing that you need to do is add the `AvroModel` class and use its methods: ```python title="user.models.py" import faust from dataclasses_avroschema.faust import AvroRecord class UserModel(AvroRecord, serializer='avro_users'): first_name: str last_name: str # codecs.codec.py from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers.faust import FaustSerializer from users.models import UserModel # create an instance of the `SchemaRegistryClient` client = SchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL) avro_user_serializer = FaustSerializer(client, "users", UserModel.avro_schema()) # usign the method avro_schema to get the avro schema representation # function used to register the codec def avro_user_codec(): return avro_user_serializer ``` ### Usage with pydantic for json schemas You can also use this funcionality with [dataclasses-pydantic](https://github.com/samuelcolvin/pydantic) and you won't have to provide the json schema. The only thing that you need to do is add the `BaseModel` class and use its methods: ```python title="users.models.py" import faust from pydantic import BaseModel class UserModel(faust.Record, BaseModel, serializer='json_users'): first_name: str last_name: str # codecs.codec.py from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers.faust import FaustJsonSerializer from users.models import UserModel # create an instance of the `SchemaRegistryClient` client = SchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL) json_user_serializer = FaustJsonSerializer(client, "users", UserModel.model_json_schema()) # using the method model_json_schema to get the json schema representation # function used to register the codec def json_user_codec(): return json_user_serializer ``` ================================================ FILE: docs/index.md ================================================ # Python Rest Client Schema Registry [![Python package](https://github.com/marcosschroh/python-schema-registry-client/actions/workflows/python-package.yml/badge.svg)](https://github.com/marcosschroh/python-schema-registry-client/actions/workflows/python-package.yml) [![GitHub license](https://img.shields.io/github/license/marcosschroh/python-schema-registry-client.svg)](https://github.com/marcosschroh/python-schema-registry-client/blob/master/LICENSE) [![codecov](https://codecov.io/gh/marcosschroh/python-schema-registry-client/branch/master/graph/badge.svg)](https://codecov.io/gh/marcosschroh/python-schema-registry-client) [![Python Version](https://img.shields.io/badge/python-3.8+-blue.svg)](https://img.shields.io/badge/python-3.8+-blue.svg) Python Rest Client to interact against [schema-registry](https://docs.confluent.io/current/schema-registry/index.html) confluent server to manage [Avro](https://docs.oracle.com/database/nosql-12.1.3.1/GettingStartedGuide/avroschemas.html) and [JSON](https://json-schema.org/) schemas resources. ## Requirements python 3.8+ ## Installation ```bash pip install python-schema-registry-client ``` If you want the `Faust` functionality: ```bash pip install python-schema-registry-client[faust] ``` ## Usage ```python title="Trival Usage with avro" from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) schema_id = client.register("test-deployment", avro_schema) ``` ```python title="Trival Usage Async with avro" from schema_registry.client import AsyncSchemaRegistryClient, schema async_client = AsyncSchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) schema_id = await async_client.register("test-deployment", avro_schema) ``` ```python title="Trival Usage with json schemas" from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "definitions" : { "JsonDeployment" : { "type" : "object", "required" : ["image", "replicas", "port"], "properties" : { "image" : {"type" : "string"}, "replicas" : {"type" : "integer"}, "port" : {"type" : "integer"} } } }, "$ref" : "#/definitions/JsonDeployment" } json_schema = schema.JsonSchema(deployment_schema) schema_id = client.register("test-deployment", json_schema) ``` ```python title="Trival Usage Asynv with json schemas" from schema_registry.client import AsyncSchemaRegistryClient, schema async_client = AsyncSchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "definitions" : { "JsonDeployment" : { "type" : "object", "required" : ["image", "replicas", "port"], "properties" : { "image" : {"type" : "string"}, "replicas" : {"type" : "integer"}, "port" : {"type" : "integer"} } } }, "$ref" : "#/definitions/JsonDeployment" } json_schema = schema.JsonSchema(deployment_schema) schema_id = await async_client.register("test-deployment", json_schema) ``` ## Usage with dataclasses-avroschema You can generate the `avro schema` and `json schemas` directely from a python class using [dataclasses-avroschema](https://github.com/marcosschroh/dataclasses-avroschema) and use it in the API for `register schemas`, `check versions` and `test compatibility`: ```python title="Trival Usage with dataclasses-avroschema" import dataclasses from enum import Enum import typing from dataclasses_avroschema import AvroModel from schema_registry.client import SchemaRegistryClient client = SchemaRegistryClient(url="http://127.0.0.1:8081") class ColorEnum(str, Enum): BLUE = "BLUE" YELLOW = "YELLOW" GREEN = "GREEN" @dataclasses.dataclass class UserAdvance(AvroModel): name: str age: int pets: typing.List[str] = dataclasses.field(default_factory=lambda: ["dog", "cat"]) accounts: typing.Dict[str, int] = dataclasses.field(default_factory=lambda: {"key": 1}) has_car: bool = False favorite_colors: ColorEnum = ColorEnum.BLUE country: str = "Argentina" address: str = None subject = "subject" # register the schema schema_id = client.register(subject, UserAdvance.avro_schema()) print(schema_id) # >>> 12 result = client.check_version(subject, UserAdvance.avro_schema()) print(result) # >>> SchemaVersion(subject='dataclasses-avroschema-subject-2', schema_id=12, schema=1, version={"type":"record" ...') compatible = client.test_compatibility(subject, UserAdvance.avro_schema()) print(compatible) # >>> True ``` !!! note You can generate json schemas with `dataclasses-avroschema` adding the *[pydantic batteries](https://marcosschroh.github.io/dataclasses-avroschema/pydantic/)* ## Usage with pydantic for json schemas You can generate the json schema directely from a python class using pydantic and use it in the API for register schemas, check versions and test compatibility: ```python title="Trival Usage with pydantic" import typing from enum import Enum from pydantic import BaseModel from schema_registry.client import SchemaRegistryClient client = SchemaRegistryClient(url="http://127.0.0.1:8081") class ColorEnum(str, Enum): BLUE = "BLUE" YELLOW = "YELLOW" GREEN = "GREEN" class UserAdvance(BaseModel): name: str age: int pets: typing.List[str] = ["dog", "cat"] accounts: typing.Dict[str, int] = {"key": 1} has_car: bool = False favorite_colors: ColorEnum = ColorEnum.BLUE country: str = "Argentina" address: str = None subject = "subject" # register the schema schema_id = client.register(subject, UserAdvance.model_json_schema(), schema_type="JSON") print(schema_id) # >>> 12 result = client.check_version(subject, UserAdvance.model_json_schema(), schema_type="JSON") print(result) # >>> SchemaVersion(subject='pydantic-jsonschema-subject', schema_id=12, schema=1, version=) compatible = client.test_compatibility(subject, UserAdvance.model_json_schema(), schema_type="JSON") print(compatible) # >>> True ``` ## When use this library Usually, we have a situacion like this: ![Confluent Architecture](img/confluent_architecture.png) So, our producers/consumers have to serialize/deserialize messages every time that they send/receive from Kafka topics. In this picture, we can imagine a `Faust` application receiving messages (encoded with an Avro schema) and we want to deserialize them, so we can ask the `schema server` to do that for us. In this scenario, the `MessageSerializer` is perfect. Also, could be a use case that we would like to have an Application only to administrate `Avro Schemas` (register, update compatibilities, delete old schemas, etc.), so the `SchemaRegistryClient` is perfect. ## Development [Poetry](https://python-poetry.org/docs/) is needed to install the dependencies and develope locally 1. Install dependencies: `poetry install --all-extras` 2. Code linting: `./scripts/format` 3. Run tests: `./scripts/test` For commit messages we use [commitizen](https://commitizen-tools.github.io/commitizen/) in order to standardize a way of committing rules *Note*: The tests are run against the `Schema Server` using `docker compose`, so you will need `Docker` and `Docker Compose` installed. In a terminal run `docker-compose up`. Then in a different terminal run the tests: ```bash ./scripts/test ``` All additional args will be passed to pytest, for example: ```bash ./scripts/test ./tests/client/ ``` ### Tests usind the python shell To perform tests using the python shell you can run the project using `docker-compose`. 1. Build: `docker-compose build --build-arg PYTHON_VERSION=$PYTHON_VERSION` 2. Execute `docker-compose up`. Then, the `schema registry server` will run on `http://127.0.0.1:8081`, then you can interact against it using the `SchemaRegistryClient`: 3. Use the python interpreter (get a python shell typing `python` in your command line) 4. Play with the `schema server` ```python from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") # do some operations with the client... deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) client.register("test-deployment", avro_schema) # >>>> Out[5]: 1 ``` Then, you can check the schema using your browser going to the url `http://127.0.0.1:8081/schemas/ids/1` ================================================ FILE: docs/schemaregistry_server.md ================================================ # Schema Registry Server This section provides you just an introduction about the `Schema Server`. Schema Registry provides a serving layer for your metadata. It provides a RESTful interface for storing and retrieving Avro or JSON schemas. It stores a versioned history of all schemas, provides multiple compatibility settings and allows evolution of schemas according to the configured compatibility settings and expanded Avro or JSON support. It provides serializers that plug into Apache Kafka® clients that handle schema storage and retrieval for Kafka messages that are sent in the Avro or JSON format. Schema Registry is a distributed storage layer for Avro or JSON Schemas which uses Kafka as its underlying storage mechanism. Some key design decisions: 1. Assigns globally unique ID to each registered schema. Allocated IDs are guaranteed to be monotonically increasing but not necessarily consecutive. 2. Kafka provides the durable backend, and functions as a write-ahead changelog for the state of Schema Registry and the schemas it contains. 3. Schema Registry is designed to be distributed, with single-primary architecture, and ZooKeeper/Kafka coordinates primary election (based on the configuration). ## API ### Schemas `GET /schemas/ids/{int: id}` - Get the schema string identified by the input ID ### Subjects `GET /subjects` - Get a list of registered subjects. `GET /subjects/(string: subject)/versions` - Get a list of versions registered under the specified subject `GET /schemas/ids/{int: id}/versions` - Get the subject-version pairs identified by the input ID. `DELETE /subjects/(string: subject)` - Deletes the specified subject and its associated compatibility level if registered. It is recommended to use this API only when a topic needs to be recycled or in development environment. `GET /subjects/(string: subject)/versions/(versionId: version)` - Get a specific version of the schema registered under this subject *Check response* `GET /subjects/(string: subject)/versions/(versionId: version)/schema` - Get the avro or json schema for the specified version of this subject. The unescaped schema only is returned. *[Missing]* `POST /subjects/(string: subject)/versions` - Register a new schema under the specified subject and receive a schema id `POST /subjects/(string: subject)` - Check if a schema has already been registered under the specified subject. If so, this returns the schema string along with its globally unique identifier, its version under this subject and the subject name. `DELETE /subjects/(string: subject)/versions/(versionId: version)` - Deletes a specific version of the schema registered under this subject. This only deletes the version and the schema ID remains intact making it still possible to decode data using the schema ID. This API is recommended to be used only in development environments or under extreme circumstances where-in, its required to delete a previously registered schema for compatibility purposes or re-register previously registered schema. *[Missing]* ### Compatibility `POST /compatibility/subjects/(string: subject)/versions/(versionId: version)` - Test input schema against a particular version of a subject's schema for compatibility. Note that the compatibility level applied for the check is the configured compatibility level for the subject (http:get:: /config/(string: subject)). If this subject's compatibility level was never changed, then the global compatibility level applies (http:get:: /config). These are the compatibility types: *BACKWARD*: (default) consumers using the new schema can read data written by producers using the latest registered schema *BACKWARD_TRANSITIVE*: consumers using the new schema can read data written by producers using all previously registered schemas *FORWARD*: consumers using the latest registered schema can read data written by producers using the new schema *FORWARD_TRANSITIVE*: consumers using all previously registered schemas can read data written by producers using the new schema *FULL*: the new schema is forward and backward compatible with the latest registered schema *FULL_TRANSITIVE*: the new schema is forward and backward compatible with all previously registered schemas *NONE*: schema compatibility checks are disabled ### Config `GET /config` - Get global compatibility level. `PUT /config` - Update global compatibility level. *[Missing]* `GET /config/(string: subject)` - Get compatibility level for a subject. *[Missing]* `PUT /config/(string: subject)` - Update compatibility level for the specified subject. Too know more about the API go [here](https://docs.confluent.io/current/schema-registry/develop/api.html) ================================================ FILE: docs/schemas.md ================================================ # Schemas ## BaseSchema `BaseSchema` an abstract base class from which `AvroSchema` and `JsonSchema` inherit. Requires concrete classes implement the following methods. ```python @abstractmethod def parse_schema(self, schema: typing.Dict) -> typing.Dict: pass @staticmethod @abstractmethod def load(fp: str) -> BaseSchema: """Parse a schema from a file path""" pass @staticmethod @abstractmethod async def async_load(fp: str) -> BaseSchema: """Parse a schema from a file path""" pass @property @abstractmethod def name(self) -> typing.Optional[str]: pass @property @abstractmethod def schema_type(self) -> str: pass ``` ## AvroSchema `AvroSchema` parses strings into avro schemas to assure validation. Properties: `raw_schema`: The input string that will be parsed `schema`: Result of parsing the raw_schema with `fastavro` `flat_schema`: Parsed schema without `__fastavro_parsed` flag `expanded_schema`: Parsed schema where all named types are expanded to their real schema ## JsonSchema `JsonSchema` parses strings into json schemas to assure validation. Properties: `raw_schema`: The input string that will be parsed `schema`: Result of parsing the raw_schema with `jsonschema.Draft7Validator.check_schema` ## SchemaVersion `SchemaVersion` is a `namedtuple` that contains the `subject`, `schema_id`, `version` and either `AvroSchema` or `JsonSchema`. The `SchemaVersion` is returned by `get_schema` and `check_version` client methods ================================================ FILE: docs/serializer.md ================================================ # Serializers To serialize and deserialize messages you can use `AvroMessageSerializer` and `JsonMessageSerializer`. They interact with the `SchemaRegistryClient` to get `avro Schemas` and `json schemas` in order to process messages. *If you want to run the following examples run `docker-compose up` and the `schema registry server` will run on `http://127.0.0.1:8081`* !!! warning The `AvroMessageSerializer` uses the same `protocol` as confluent, meaning that the event will contain the schema id in the payload. If you produce an event with the `AvroMessageSerializer` you have to consume it with the `AvroMessageSerializer` as well, otherwise you have to implement the parser on the consumer side. ::: schema_registry.serializers.AvroMessageSerializer options: show_root_heading: true docstring_section_style: table show_signature_annotations: true ::: schema_registry.serializers.JsonMessageSerializer options: show_root_heading: true docstring_section_style: table show_signature_annotations: false show_base_classes: true ## Async implementations `JsonMessageSerializer`, `AvroMessageSerializer` and `SchemaRegistryClient` have their asynchronous counterparts `AsyncJsonMessageSerializer`, `AsyncAvroMessageSerializer` and `AsyncSchemaRegistryClient` and all examples above should work if you replace them with their async variations ::: schema_registry.serializers.AsyncAvroMessageSerializer options: show_root_heading: true docstring_section_style: table show_signature_annotations: false ::: schema_registry.serializers.AsyncJsonMessageSerializer options: show_root_heading: true docstring_section_style: table show_signature_annotations: false ================================================ FILE: mkdocs.yml ================================================ site_name: Python Schema Registry Client site_description: avro, kafka, client, faust, schema theme: name: 'material' palette: - scheme: default primary: blue grey accent: indigo toggle: icon: material/lightbulb name: Switch to dark mode - scheme: slate primary: blue grey accent: indigo toggle: icon: material/lightbulb-outline name: Switch to light mode features: - search.suggest - search.highlight - content.tabs.link - content.code.annotate repo_name: marcosschroh/python-schema-registry-client repo_url: https://github.com/marcosschroh/python-schema-registry-client nav: - Introduction: 'index.md' - SchemaRegistryClient Api: 'client.md' - Serializers: 'serializer.md' - Faust integration: 'faust.md' - Schemas: 'schemas.md' - Exceptions: 'exceptions.md' - Schema Registry Server: 'schemaregistry_server.md' markdown_extensions: - pymdownx.highlight - pymdownx.inlinehilite - pymdownx.superfences - pymdownx.snippets - pymdownx.critic - pymdownx.caret - pymdownx.keys - pymdownx.mark - pymdownx.tilde - pymdownx.details - tables - attr_list - md_in_html - admonition - codehilite plugins: - autorefs - mkdocstrings ================================================ FILE: pyproject.toml ================================================ [tool.poetry] name = "python-schema-registry-client" version = "2.6.1" description = "Python Rest Client to interact against Schema Registry confluent server" authors = ["Marcos Schroh "] license = "MIT" readme = "README.md" packages = [{include = "schema_registry"}] classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3 :: Only", "Topic :: Software Development", ] [tool.poetry.dependencies] python = "^3.8" fastavro = "^1.7.3" jsonschema = "^4.17.3" httpx = ">=0.28,<0.29" anyio = ">=2,<5" faust-streaming = {version = ">=0.10.11,<0.12.0", optional = true} [tool.poetry.group.dev.dependencies] mypy = "^1" ruff = ">=0.8,<0.10" pytest = ">=7,<9" pytest-cov = ">=4,<6" pytest-mock = "^3.10.0" pytest-asyncio = ">=0.21,<0.24" dataclasses-avroschema = {version = ">=0.57,<0.62", extras = ["pydantic", "faker"]} codecov = "^2.1.13" types-jsonschema = "^4.17.0.7" [tool.poetry.group.docs.dependencies] mkdocs = "^1" mkdocs-material = "^9" mkdocstrings = {extras = ["python"], version = ">=0.21.2,<0.27.0"} [tool.poetry.group.ci-publish.dependencies] commitizen = "^3" [tool.poetry.extras] faust = ["faust-streaming"] [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.mypy] allow_empty_bodies = true [tool.ruff] line-length = 120 exclude = [ ".bzr", ".direnv", ".eggs", ".git", ".hg", ".mypy_cache", ".nox", ".pants.d", ".ruff_cache", ".svn", ".tox", ".venv", "__pypackages__", "_build", "buck-out", "build", "dist", "node_modules", ".venv", ] [tool.ruff.lint] fixable = ["ALL"] unfixable = [] select = [ "B", # flake8-bugbear "C", # flake8-comprehensions "D100", # pydocstyle: public module docstring "D101", # pydocstyle: docstring in public class "E", # pycodestyle errors "F", # pyflakes "I", # isort "S", # bandit "W", # pycodestyle warnings ] [tool.ruff.lint.per-file-ignores] "tests/*.py" = [ "D", "S", ] [tool.ruff.lint.pydocstyle] convention = "google" [tool.commitizen] version_provider = "poetry" tag_format = "v$version" update_changelog_on_bump = true ================================================ FILE: schema_registry/__init__.py ================================================ ================================================ FILE: schema_registry/client/__init__.py ================================================ from . import errors, schema # noqa from .client import AsyncSchemaRegistryClient, SchemaRegistryClient # noqa __all__ = ["SchemaRegistryClient", "AsyncSchemaRegistryClient"] ================================================ FILE: schema_registry/client/client.py ================================================ """Store client to interact with Schema Registry HTTP API.""" import json import logging import os import ssl import typing from abc import abstractmethod from collections import defaultdict from urllib.parse import urlparse import certifi import httpx from httpx import USE_CLIENT_DEFAULT, Auth, BasicAuth from httpx._client import UseClientDefault from httpx._types import TimeoutTypes from . import status, utils from .errors import ClientError from .paths import paths from .schema import BaseSchema, SchemaFactory, SubjectVersion from .urls import UrlManager logger = logging.getLogger(__name__) def get_response_and_status_code( response: httpx.Response, ) -> typing.Tuple[typing.Any, int]: """Returns a tuple containing response json and status code. Args: response: Http response object Returns: A tuple of the JSON response and the status code """ return ( response.json(), response.status_code, ) class BaseClient: """A client that talks to a Schema Registry over HTTP. Args: url: Url to schema registry or dictionary containing client configuration. ca_location: File or directory path to CA certificate(s) for verifying the Schema Registry key. cert_location: Path to public key used for authentication. key_location: Path to private key used for authentication. key_password: Key password extra_headers: Extra headers to add on every requests. timeout: The timeout configuration to use when sending requests. pool_limits: The connection pool configuration to use when determining the maximum number of concurrently open HTTP connections. auth: Auth credentials. """ def __init__( self, url: typing.Union[str, typing.Dict], ca_location: typing.Optional[str] = None, cert_location: typing.Optional[str] = None, key_location: typing.Optional[str] = None, key_password: typing.Optional[str] = None, extra_headers: typing.Optional[typing.Dict] = None, timeout: typing.Optional[httpx.Timeout] = None, pool_limits: typing.Optional[httpx.Limits] = None, auth: typing.Optional[Auth] = None, ) -> None: if isinstance(url, str): conf = { utils.URL: url, utils.SSL_CA_LOCATION: ca_location, utils.SSL_CERTIFICATE_LOCATION: cert_location, utils.SSL_KEY_LOCATION: key_location, utils.SSL_KEY_PASSWORD: key_password, } else: conf = url self.conf = conf schema_server_url = conf.get(utils.URL, "") self.url_manager = UrlManager(schema_server_url, paths) # type: ignore self.extra_headers = extra_headers self.timeout = timeout self.pool_limits = pool_limits self.auth = auth self.client_kwargs = self._get_client_kwargs() # Cache Schemas: subj => { schema => id } self.subject_to_schema_ids: typing.Dict[str, typing.Dict[str, int]] = defaultdict(dict) # Cache Schemas: subj => { schema => version } self.subject_to_schema_versions: typing.Dict[str, typing.Dict[str, typing.Union[str, int]]] = defaultdict(dict) # Cache Schemas: id => avro_schema self.id_to_schema: typing.Dict[int, BaseSchema] = {} def __eq__(self, obj: typing.Any) -> bool: return self.conf == obj.conf and self.extra_headers == obj.extra_headers @staticmethod def _schema_from_result(result: typing.Dict) -> BaseSchema: schema: str = result["schema"] schema_type = result.get("schemaType", utils.AVRO_SCHEMA_TYPE) return SchemaFactory.create_schema(schema, schema_type) def _configure_auth(self) -> Auth: # Check first if the credentials are sent in Auth if self.auth is not None: return self.auth # This part should be deprecated with a new mayor version. Url should be only a string url = self.conf["url"] auth_provider = self.conf.pop("basic.auth.credentials.source", "URL").upper() # type: ignore if auth_provider not in utils.VALID_AUTH_PROVIDERS: raise ValueError( f""" schema.registry.basic.auth.credentials.source must be one of {utils.VALID_AUTH_PROVIDERS} """ ) if auth_provider == "USER_INFO": logger.warning("Deprecation warning: This will be deprecated in future versions. Use httpx.Auth instead") auth = BasicAuth(*self.conf.pop("basic.auth.user.info", "").split(":")) # type: ignore else: # Credentials might be in the url. parsed_url = urlparse(url) auth = BasicAuth(parsed_url.username or "", parsed_url.password or "") # remove ignore after mypy fix https://github.com/python/mypy/issues/4805 return auth # type: ignore @staticmethod def _configure_client_tls( conf: dict, ) -> typing.Optional[typing.Union[typing.Tuple[str, str], typing.Tuple[str, str, str]]]: cert = conf.get(utils.SSL_CERTIFICATE_LOCATION) certificate = cert if cert is not None: key_path = conf.get(utils.SSL_KEY_LOCATION) key_password = conf.get(utils.SSL_KEY_PASSWORD) certificate = (cert,) if key_path is not None: certificate = ( cert, key_path, ) if key_password is not None: certificate += (key_password,) return certificate def _get_client_kwargs(self) -> typing.Dict: auth = self._configure_auth() client_kwargs: typing.Dict = { "auth": auth, } certificate = self._configure_client_tls(self.conf) if certificate: global_ca_location = self.conf.get(utils.SSL_CA_LOCATION, certifi.where()) ctx = ssl.create_default_context( cafile=os.environ.get("SSL_CERT_FILE", global_ca_location), capath=os.environ.get("SSL_CERT_DIR"), ) ctx.load_cert_chain(*certificate) client_kwargs["verify"] = ctx # If these values haven't been explicitly defined let httpx sort out # the default values. if self.extra_headers is not None: client_kwargs["headers"] = self.extra_headers if self.timeout is not None: client_kwargs["timeout"] = self.timeout if self.pool_limits is not None: client_kwargs["limits"] = self.pool_limits return client_kwargs def prepare_headers( self, body: typing.Optional[typing.Dict] = None, headers: typing.Optional[typing.Dict] = None, ) -> typing.Dict[str, str]: """Combines parameters to form a HTTP Header. Args: body: Body of the future request. Defaults to None. headers: Additional information to combine. Defaults to None. Returns: The header in a dictionnary format """ _headers = {"Accept": utils.ACCEPT_HEADERS} if body: _headers["Content-Type"] = utils.HEADERS if headers: _headers.update(headers) return _headers def _cache_subject_to_schema_ids(self, subject: str, schema: BaseSchema, value: int) -> None: self.subject_to_schema_ids[subject][str(schema)] = value def _cache_subject_to_schema_versions( self, subject: str, schema: BaseSchema, value: typing.Union[str, int] ) -> None: self.subject_to_schema_versions[subject][str(schema)] = value def _cache_schema( self, schema: BaseSchema, schema_id: int, subject: typing.Optional[str] = None, version: typing.Union[str, int, None] = None, ) -> None: if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: self.id_to_schema[schema_id] = schema if subject: self._cache_subject_to_schema_ids(subject, schema, schema_id) if version: self._cache_subject_to_schema_versions(subject, schema, version) @abstractmethod def request( self, url: str, method: str = "GET", body: typing.Optional[typing.Dict] = None, params: typing.Optional[typing.Dict] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Union[tuple, httpx.Response, typing.Coroutine[typing.Any, typing.Any, typing.Any]]: _headers = self.prepare_headers(body=body, headers=headers) with httpx.Client(**self.client_kwargs) as client: response = client.request(method, url, headers=_headers, json=body, params=params, timeout=timeout) return response class SchemaRegistryClient(BaseClient): """A client that talks to a Schema Registry over HTTP. !!! Example ```python title="Usage" from schema_registry.client import SchemaRegistryClient, schema client = SchemaRegistryClient(url="http://127.0.0.1:8081") deployment_schema = { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], } avro_schema = schema.AvroSchema(deployment_schema) schema_id = client.register("test-deployment", avro_schema) ``` Args: url: Url to schema registry or dictionary containing client configuration. ca_location: File or directory path to CA certificate(s) for verifying the Schema Registry key. cert_location: Path to public key used for authentication. key_location: Path to private key used for authentication. key_password: Key password extra_headers: Extra headers to add on every requests. timeout: The timeout configuration to use when sending requests. pool_limits: The connection pool configuration to use when determining the maximum number of concurrently open HTTP connections. auth: Auth credentials. """ def request( self, url: str, method: str = "GET", body: typing.Optional[typing.Dict] = None, params: typing.Optional[typing.Dict] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> httpx.Response: if method not in utils.VALID_METHODS: raise ClientError(f"Method {method} is invalid; valid methods include {utils.VALID_METHODS}") _headers = self.prepare_headers(body=body, headers=headers) with httpx.Client(**self.client_kwargs) as client: response = client.request(method, url, headers=_headers, json=body, params=params, timeout=timeout) return response def register( self, subject: str, schema: typing.Union[BaseSchema, str, typing.Dict[str, typing.Any]], headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, schema_type: typing.Literal["AVRO", "JSON"] = utils.AVRO_SCHEMA_TYPE, ) -> int: """Register a schema for a subject. Schema can be avro or json, and can be presented as a parsed schema or a string. If schema is a string, the `schema_type` kwarg must be used to indicate what type of schema the string is (`AVRO` by default). If the schema is already parsed, the schema_type is inferred directly from the parsed schema. Multiple instances of the same schema will result in cache misses. Args: subject: subject name schema: Avro or JSON schema to be registered headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT schema_type: The type of schema to parse if `schema` is a string. Default "AVRO" Returns: schema_id """ if isinstance(schema, str) or isinstance(schema, dict): schema = SchemaFactory.create_schema(schema, schema_type) schema_id = self.subject_to_schema_ids[subject].get(str(schema)) if schema_id is not None: return schema_id # Check if schema is already registered. This should normally work even if # the schema registry we're talking to is readonly, enabling a setup where # only CI/CD can do changes to Schema Registry, and production code has readonly # access response = self.check_version(subject, schema, headers=headers, timeout=timeout) if response is not None: return response.schema_id url, method = self.url_manager.url_for("register", subject=subject) body = { "schema": json.dumps(schema.raw_schema), "schemaType": schema.schema_type, } ( result, code, ) = get_response_and_status_code(self.request(url, method=method, body=body, headers=headers, timeout=timeout)) msg = None if code in (status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN): msg = "Unauthorized access" elif code == status.HTTP_409_CONFLICT: msg = "Incompatible schema" elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: msg = "Invalid schema" elif not status.is_success(code): msg = "Unable to register schema" if msg is not None: raise ClientError(message=msg, http_code=code, server_traceback=result) schema_id = result["id"] self._cache_schema(schema, schema_id, subject) return schema_id def get_subjects( self, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> list: """Get list of all registered subjects in your Schema Registry. GET /subjects/(string: subject) Args: headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List of registered subjects. """ url, method = self.url_manager.url_for("get_subjects") result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if status.is_success(code): return result raise ClientError("Unable to get subjects", http_code=code, server_traceback=result) def delete_subject( self, subject: str, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> list: """Deletes the specified subject and its associated compatibility level if registered. It is recommended to use this API only when a topic needs to be recycled or in development environments. DELETE /subjects/(string: subject) Args: subject: subject name headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List version of the schema deleted under this subject """ url, method = self.url_manager.url_for("delete_subject", subject=subject) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if status.is_success(code): return result elif code == status.HTTP_404_NOT_FOUND: return [] raise ClientError("Unable to delete subject", http_code=code, server_traceback=result) def get_by_id( self, schema_id: int, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[BaseSchema]: """Retrieve a parsed avro schema by id or None if not found. GET /schemas/ids/{int: id} Args: schema_id: Schema Id headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: Avro or JSON schema """ if schema_id in self.id_to_schema: return self.id_to_schema[schema_id] url, method = self.url_manager.url_for("get_by_id", schema_id=schema_id) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Schema {schema_id} not found: {code}") return None elif status.is_success(code): schema = self._schema_from_result(result) self._cache_schema(schema, schema_id) return schema raise ClientError( f"Received bad schema (id {schema_id})", http_code=code, server_traceback=result, ) def get_schema_subject_versions( self, schema_id: int, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[typing.List[SubjectVersion]]: """Get the subject-version pairs identified by the input ID. GET /schemas/ids/{int: id}/versions Args: schema_id: Schema Id headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List of Subject/Version pairs where Schema Id is registered """ url, method = self.url_manager.url_for("get_schema_subject_versions", schema_id=schema_id) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if code == status.HTTP_404_NOT_FOUND: logger.warning(f"Schema {schema_id} not found: {code}") return None elif status.is_success(code): ret = [] for entry in result: ret.append(SubjectVersion(entry["subject"], entry["version"])) return ret raise ClientError( f"Received bad schema (id {schema_id})", http_code=code, server_traceback=result, ) def get_schema( self, subject: str, version: typing.Union[int, str] = "latest", headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[utils.SchemaVersion]: """Get a specific version of the schema registered under this subject. GET /subjects/(string: subject)/versions/(versionId: version) Args: subject: subject name version: version id. If is None, the latest schema is returned headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: The SchemaVersion utils.SchemaVersion if response was succeeded """ url, method = self.url_manager.url_for("get_schema", subject=subject, version=version) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Schema version {version} under subjet {subject} not found: {code}") return None elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.info(f"Invalid version {version}: {code}") return None elif not status.is_success(code): logger.info(f"Not success version {version}: {code}") return None schema_id = result.get("id") if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: schema = self._schema_from_result(result) version = result["version"] self._cache_schema(schema, schema_id, subject, version) return utils.SchemaVersion(subject=subject, schema_id=schema_id, schema=schema, version=version) def get_versions( self, subject: str, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> list: """Get a list of versions registered under the specified subject. GET subjects/{subject}/versions Args: subject: subject name headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List version of the schema registered under this subject """ url, method = self.url_manager.url_for("get_versions", subject=subject) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if status.is_success(code): return result elif code == status.HTTP_404_NOT_FOUND: logger.info(f"Subject {subject} not found") return [] raise ClientError( f"Unable to get the versions for subject {subject}", http_code=code, server_traceback=result, ) def delete_version( self, subject: str, version: typing.Union[int, str] = "latest", headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[int]: """Deletes a specific version of the schema registered under this subject. This only deletes the version and the schema ID remains intact making it still possible to decode data using the schema ID. This API is recommended to be used only in development environments or under extreme circumstances where-in, its required to delete a previously registered schema for compatibility purposes or re-register previously registered schema. DELETE /subjects/(string: subject)/versions/(versionId: version) Args: subject: subject name version: Version of the schema to be deleted. Valid values for versionId are between [1,2^31-1] or the string "latest". "latest" deletes the last registered schema under the specified subject. headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: Version of the schema deleted. If the subject or version does not exist. """ url, method = self.url_manager.url_for("delete_version", subject=subject, version=version) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if status.is_success(code): return result elif status.is_client_error(code): return None raise ClientError("Unable to delete the version", http_code=code, server_traceback=result) def check_version( self, subject: str, schema: typing.Union[BaseSchema, str, typing.Dict[str, typing.Any]], headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, schema_type: typing.Literal["AVRO", "JSON"] = utils.AVRO_SCHEMA_TYPE, ) -> typing.Optional[utils.SchemaVersion]: """Check if a schema has already been registered under the specified subject. If so, this returns the schema string along with its globally unique identifier, its version under this subject and the subject name. POST /subjects/(string: subject) Args: subject: subject name schema: Avro or JSON schema headers typing.Dict: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT schema_type: The type of schema to parse if `schema` is a string. Default "AVRO" Returns: SchemaVersion If schema exist """ if isinstance(schema, str) or isinstance(schema, dict): schema = SchemaFactory.create_schema(schema, schema_type) version = self.subject_to_schema_versions[subject].get(str(schema)) schema_id = self.subject_to_schema_ids[subject].get(str(schema)) if all((version, schema_id)): return utils.SchemaVersion(subject=subject, schema_id=schema_id, version=version, schema=schema) url, method = self.url_manager.url_for("check_version", subject=subject) body = { "schema": json.dumps(schema.raw_schema), "schemaType": schema.schema_type, } result, code = get_response_and_status_code( self.request(url, method=method, body=body, headers=headers, timeout=timeout) ) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Schema {schema.name} under subject {subject} not found: {code}") return None elif status.is_success(code): schema_id = result["id"] version = result.get("version") self._cache_schema(schema, schema_id, subject, version) # type: ignore return utils.SchemaVersion( subject=subject, schema_id=schema_id, version=version, schema=result.get("schema"), ) raise ClientError("Unable to get version of a schema", http_code=code, server_traceback=result) def test_compatibility( self, subject: str, schema: typing.Union[BaseSchema, str, typing.Dict[str, typing.Any]], version: typing.Union[int, str] = "latest", verbose: bool = False, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, schema_type: typing.Literal["AVRO", "JSON"] = utils.AVRO_SCHEMA_TYPE, ) -> typing.Union[bool, typing.Dict[str, typing.Any]]: """Test the compatibility of a candidate parsed schema for a given subject. By default the latest version is checked against. POST /compatibility/subjects/(string: subject)/versions/(versionId: version) Args: subject: subject name schema: Avro or JSON schema headers typing.Dict: Extra headers to add on the requests version: The schema version to test compatibility against verbose: Whether or not to return the errors in case of incompatibility timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT schema_type: The type of schema to parse if `schema` is a string. Default "AVRO" Returns: If verbose if False: return a boolean wether the schema is compatible with the latest version for a subject If verbose is True: return the API reponse with both the compatibility boolean and the possible errors """ url, method = self.url_manager.url_for("test_compatibility", subject=subject, version=version) if isinstance(schema, str) or isinstance(schema, dict): schema = SchemaFactory.create_schema(schema, schema_type) body = { "schema": json.dumps(schema.raw_schema), "schemaType": schema.schema_type, } result, code = get_response_and_status_code( self.request(url, method=method, body=body, headers=headers, params={"verbose": verbose}, timeout=timeout) ) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Subject or version not found: {code}") return False elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.info(f"Unprocessable entity. Invalid subject or schema: {code}") elif status.is_success(code): if verbose: return result else: return result.get("is_compatible") raise ClientError("Unable to check the compatibility", http_code=code, server_traceback=result) def update_compatibility( self, level: str, subject: typing.Optional[str] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> bool: """Update the compatibility level. If subject is None, the compatibility level is global. PUT /config/(string: subject) Args: level: one of BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE subject: Option subject headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: Whether the compatibility was updated Raises: ClientError: if the request was unsuccessful or an invalid """ if level not in utils.VALID_LEVELS: raise ClientError(f"Invalid level specified: {level}") url, method = self.url_manager.url_for("update_compatibility", subject=subject) body = {"compatibility": level} result, code = get_response_and_status_code( self.request(url, method=method, body=body, headers=headers, timeout=timeout) ) if status.is_success(code): return True raise ClientError(f"Unable to update level: {level}.", http_code=code, server_traceback=result) def get_compatibility( self, subject: typing.Optional[str] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> str: """Get the current compatibility level for a subject. Args: subject: subject name headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: One of BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE Raises: ClientError: if the request was unsuccessful or an invalid compatibility level was returned """ url, method = self.url_manager.url_for("get_compatibility", subject=subject) result, code = get_response_and_status_code(self.request(url, method=method, headers=headers, timeout=timeout)) if status.is_success(code): compatibility = result.get("compatibilityLevel") if compatibility not in utils.VALID_LEVELS: if compatibility is None: error_msg_suffix = "No compatibility was returned" else: error_msg_suffix = str(compatibility) raise ClientError( f"Invalid compatibility level received: {error_msg_suffix}", http_code=code, server_traceback=result, ) return compatibility raise ClientError( f"Unable to fetch compatibility level. Error code: {code}", http_code=code, server_traceback=result, ) class AsyncSchemaRegistryClient(BaseClient): """A client that talks to a Schema Registry over HTTP. Args: url: Url to schema registry or dictionary containing client configuration. ca_location: File or directory path to CA certificate(s) for verifying the Schema Registry key. cert_location: Path to public key used for authentication. key_location: Path to private key used for authentication. key_password: Key password extra_headers: Extra headers to add on every requests. timeout: The timeout configuration to use when sending requests. pool_limits: The connection pool configuration to use when determining the maximum number of concurrently open HTTP connections. auth: Auth credentials. """ async def request( self, url: str, method: str = "GET", body: typing.Optional[typing.Dict] = None, params: typing.Optional[typing.Dict] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> httpx.Response: if method not in utils.VALID_METHODS: raise ClientError(f"Method {method} is invalid; valid methods include {utils.VALID_METHODS}") _headers = self.prepare_headers(body=body, headers=headers) async with httpx.AsyncClient(**self.client_kwargs) as client: response = await client.request(method, url, headers=_headers, json=body, params=params, timeout=timeout) return response async def register( self, subject: str, schema: typing.Union[BaseSchema, str, typing.Dict[str, typing.Any]], headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, schema_type: typing.Literal["AVRO", "JSON"] = utils.AVRO_SCHEMA_TYPE, ) -> int: """Register a schema with the registry under the given subject and receive a schema id. Schema can be avro or json, and can be presented as a parsed schema or a string. If schema is a string, the `schema_type` kwarg must be used to indicate what type of schema the string is ("AVRO" by default). If the schema is already parsed, the schema_type is inferred directly from the parsed schema. Multiple instances of the same schema will result in cache misses. POST /subjects/(string: subject)/versions Args: subject: subject name schema: Avro or JSON schema to be registered headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT schema_type typing.Union[AVRO, JSON]: The type of schema to parse if `schema` is a string. Default "AVRO" Returns: schema_id """ if isinstance(schema, str) or isinstance(schema, dict): schema = SchemaFactory.create_schema(schema, schema_type) schema_id = self.subject_to_schema_ids[subject].get(str(schema)) if schema_id is not None: return schema_id # Check if schema is already registered. This should normally work even if # the schema registry we're talking to is readonly, enabling a setup where # only CI/CD can do changes to Schema Registry, and production code has readonly # access response = await self.check_version(subject, schema, headers=headers, timeout=timeout) if response is not None: return response.schema_id url, method = self.url_manager.url_for("register", subject=subject) body = { "schema": json.dumps(schema.raw_schema), "schemaType": schema.schema_type, } result, code = get_response_and_status_code( await self.request(url, method=method, body=body, headers=headers, timeout=timeout) ) msg = None if code in (status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN): msg = "Unauthorized access" elif code == status.HTTP_409_CONFLICT: msg = "Incompatible Avro schema" elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: msg = "Invalid Avro schema" elif not status.is_success(code): msg = "Unable to register schema" if msg is not None: raise ClientError(message=msg, http_code=code, server_traceback=result) schema_id = result["id"] self._cache_schema(schema, schema_id, subject) return schema_id async def get_subjects( self, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> list: """Get list of all registered subjects in your Schema Registry. GET /subjects/(string: subject) Args: headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List of registered subjects. """ url, method = self.url_manager.url_for("get_subjects") result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if status.is_success(code): return result raise ClientError("Unable to get subjects", http_code=code, server_traceback=result) async def delete_subject( self, subject: str, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> list: """Deletes the specified subject and its associated compatibility level if registered. It is recommended to use this API only when a topic needs to be recycled or in development environments. DELETE /subjects/(string: subject) Args: subject: subject name headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List version of the schema deleted under this subject """ url, method = self.url_manager.url_for("delete_subject", subject=subject) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if status.is_success(code): return result elif code == status.HTTP_404_NOT_FOUND: return [] raise ClientError("Unable to delete subject", http_code=code, server_traceback=result) async def get_by_id( self, schema_id: int, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[BaseSchema]: """Retrieve a parsed avro schema by id or None if not found. GET /schemas/ids/{int: id} Args: schema_id: Schema Id headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: Avro or JSON schema """ if schema_id in self.id_to_schema: return self.id_to_schema[schema_id] url, method = self.url_manager.url_for("get_by_id", schema_id=schema_id) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Schema {schema_id} not found: {code}") return None elif status.is_success(code): schema = self._schema_from_result(result) self._cache_schema(schema, schema_id) return schema raise ClientError( f"Received bad schema (id {schema_id})", http_code=code, server_traceback=result, ) async def get_schema( self, subject: str, version: typing.Union[int, str] = "latest", headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[utils.SchemaVersion]: """Get a specific version of the schema registered under this subject. GET /subjects/(string: subject)/versions/(versionId: version) Args: subject: subject name version: version id. If is None, the latest schema is returned headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: SchemaVersion (nametupled): (subject, schema_id, schema, version) None: If server returns a not success response: 404: Schema not found 422: Unprocessable entity ~ (200 - 299): Not success """ url, method = self.url_manager.url_for("get_schema", subject=subject, version=version) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Schema version {version} under subjet {subject} not found: {code}") return None elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.info(f"Invalid version {version}: {code}") return None elif not status.is_success(code): logger.info(f"Not success version {version}: {code}") return None schema_id = result.get("id") if schema_id in self.id_to_schema: schema = self.id_to_schema[schema_id] else: schema = self._schema_from_result(result) version = result["version"] self._cache_schema(schema, schema_id, subject, version) return utils.SchemaVersion(subject=subject, schema_id=schema_id, schema=schema, version=version) async def get_schema_subject_versions( self, schema_id: int, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[typing.List[SubjectVersion]]: """Get the subject-version pairs identified by the input ID. GET /schemas/ids/{int: id}/versions Args: schema_id: Schema Id headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List of Subject/Version pairs where Schema Id is registered """ url, method = self.url_manager.url_for("get_schema_subject_versions", schema_id=schema_id) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if code == status.HTTP_404_NOT_FOUND: logger.warning(f"Schema {schema_id} not found: {code}") return None elif status.is_success(code): ret = [] for entry in result: ret.append(SubjectVersion(entry["subject"], entry["version"])) return ret raise ClientError( f"Received bad schema (id {schema_id})", http_code=code, server_traceback=result, ) async def get_versions( self, subject: str, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> list: """Get a list of versions registered under the specified subject. GET subjects/{subject}/versions Args: subject: subject name headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: List version of the schema registered under this subject """ url, method = self.url_manager.url_for("get_versions", subject=subject) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if status.is_success(code): return result elif code == status.HTTP_404_NOT_FOUND: logger.info(f"Subject {subject} not found") return [] raise ClientError( f"Unable to get the versions for subject {subject}", http_code=code, server_traceback=result, ) async def delete_version( self, subject: str, version: typing.Union[int, str] = "latest", headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> typing.Optional[int]: """Deletes a specific version of the schema registered under this subject. This only deletes the version and the schema ID remains intact making it still possible to decode data using the schema ID. This API is recommended to be used only in development environments or under extreme circumstances where-in, its required to delete a previously registered schema for compatibility purposes or re-register previously registered schema. DELETE /subjects/(string: subject)/versions/(versionId: version) Args: subject: subject name version: Version of the schema to be deleted. Valid values for versionId are between [1,2^31-1] or the string "latest". "latest" deletes the last registered schema under the specified subject. headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: Version of the schema deleted. If the subject or version does not exist. """ url, method = self.url_manager.url_for("delete_version", subject=subject, version=version) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if status.is_success(code): return result elif status.is_client_error(code): return None raise ClientError("Unable to delete the version", http_code=code, server_traceback=result) async def check_version( self, subject: str, schema: typing.Union[BaseSchema, str, typing.Dict[str, typing.Any]], headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, schema_type: typing.Literal["AVRO", "JSON"] = utils.AVRO_SCHEMA_TYPE, ) -> typing.Optional[utils.SchemaVersion]: """Check if a schema has already been registered under the specified subject. If so, this returns the schema string along with its globally unique identifier, its version under this subject and the subject name. POST /subjects/(string: subject) Args: subject: subject name schema: Avro or JSON schema headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT schema_type: The type of schema to parse if `schema` is a string. Default "AVRO" Returns: SchemaVersion If schema exist """ schemas_to_version = self.subject_to_schema_versions[subject] if isinstance(schema, str) or isinstance(schema, dict): schema = SchemaFactory.create_schema(schema, schema_type) version = schemas_to_version.get(str(schema)) schemas_to_id = self.subject_to_schema_ids[subject] schema_id = schemas_to_id.get(str(schema)) if all((version, schema_id)): return utils.SchemaVersion(subject=subject, schema_id=schema_id, version=version, schema=schema) url, method = self.url_manager.url_for("check_version", subject=subject) body = { "schema": json.dumps(schema.raw_schema), "schemaType": schema.schema_type, } result, code = get_response_and_status_code( await self.request(url, method=method, body=body, headers=headers, timeout=timeout) ) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Schema {schema.name} under subject {subject} not found: {code}") return None elif status.is_success(code): schema_id = result["id"] version = result.get("version") self._cache_schema(schema, schema_id, subject, version) # type: ignore return utils.SchemaVersion( subject=subject, schema_id=schema_id, version=version, schema=result.get("schema"), ) raise ClientError("Unable to get version of a schema", http_code=code, server_traceback=result) async def test_compatibility( self, subject: str, schema: typing.Union[BaseSchema, str, typing.Dict[str, typing.Any]], version: typing.Union[int, str] = "latest", verbose: bool = False, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, schema_type: typing.Literal["AVRO", "JSON"] = utils.AVRO_SCHEMA_TYPE, ) -> typing.Union[bool, typing.Dict[str, typing.Any]]: """Test the compatibility of a candidate parsed schema for a given subject. By default the latest version is checked against. POST /compatibility/subjects/(string: subject)/versions/(versionId: version) Args: subject: subject name schema: Avro or JSON schema version: The schema version to test compatibility against verbose: Whether or not to return the errors in case of incompatibility headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT schema_type: The type of schema to parse if `schema` is a string. Default "AVRO" Returns: If verbose if False: return a boolean wether the schema is compatible with the latest version for a subject If verbose is True: return the API reponse with both the compatibility boolean and the possible errors """ url, method = self.url_manager.url_for("test_compatibility", subject=subject, version=version) if isinstance(schema, str) or isinstance(schema, dict): schema = SchemaFactory.create_schema(schema, schema_type) body = { "schema": json.dumps(schema.raw_schema), "schemaType": schema.schema_type, } result, code = get_response_and_status_code( await self.request( url, method=method, body=body, headers=headers, params={"verbose": verbose}, timeout=timeout ) ) if code == status.HTTP_404_NOT_FOUND: logger.info(f"Subject or version not found: {code}") return False elif code == status.HTTP_422_UNPROCESSABLE_ENTITY: logger.info(f"Unprocessable entity. Invalid subject or schema: {code}") return False elif status.is_success(code): if verbose: return result else: return result.get("is_compatible") raise ClientError("Unable to check the compatibility", http_code=code, server_traceback=result) async def update_compatibility( self, level: str, subject: typing.Optional[str] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> bool: """Update the compatibility level. If subject is None, the compatibility level is global. PUT /config/(string: subject) Args: level: one of BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE subject: Option subject headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: Whether the compatibility was updated Raises: ClientError: if the request was unsuccessful or an invalid """ if level not in utils.VALID_LEVELS: raise ClientError(f"Invalid level specified: {level}") url, method = self.url_manager.url_for("update_compatibility", subject=subject) body = {"compatibility": level} result, code = get_response_and_status_code( await self.request(url, method=method, body=body, headers=headers, timeout=timeout) ) if status.is_success(code): return True raise ClientError(f"Unable to update level: {level}.", http_code=code, server_traceback=result) async def get_compatibility( self, subject: typing.Optional[str] = None, headers: typing.Optional[typing.Dict] = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> str: """Get the current compatibility level for a subject. Args: subject: subject name headers: Extra headers to add on the requests timeout: The timeout configuration to use when sending requests. Default USE_CLIENT_DEFAULT Returns: One of BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE Raises: ClientError: if the request was unsuccessful or an invalid compatibility level was returned """ url, method = self.url_manager.url_for("get_compatibility", subject=subject) result, code = get_response_and_status_code( await self.request(url, method=method, headers=headers, timeout=timeout) ) if status.is_success(code): compatibility = result.get("compatibilityLevel") if compatibility not in utils.VALID_LEVELS: if compatibility is None: error_msg_suffix = "No compatibility was returned" else: error_msg_suffix = str(compatibility) raise ClientError( f"Invalid compatibility level received: {error_msg_suffix}", http_code=code, server_traceback=result, ) return compatibility raise ClientError( f"Unable to fetch compatibility level. Error code: {code}", http_code=code, server_traceback=result, ) ================================================ FILE: schema_registry/client/errors.py ================================================ """Exception exposed by the client module.""" import typing class ClientError(Exception): """Error thrown by Schema Registry client.""" def __init__( self, message: str, http_code: typing.Optional[int] = None, server_traceback: typing.Optional[str] = None, ) -> None: """Base class for all exceptions that occur when interacting with the registry API. Args: message: Message description http_code: HTTP Code returned by the registry API. Defaults to None. server_traceback: Server's traceback. Defaults to None. """ self.message = message self.server_traceback = server_traceback self.http_code = http_code super().__init__(message) def __repr__(self) -> str: return f"ClientError(error={self.message})" def __str__(self) -> str: return self.message ================================================ FILE: schema_registry/client/paths.py ================================================ """Available urls supported with the client.""" paths = [ ("get_subjects", "subjects", "GET"), ("get_versions", "subjects/{subject}/versions", "GET"), ("delete_version", "subjects/{subject}/versions/{version}", "DELETE"), ("register", "subjects/{subject}/versions", "POST"), ("delete_subject", "subjects/{subject}", "DELETE"), ("get_schema", "subjects/{subject}/versions/{version}", "GET"), ("check_version", "subjects/{subject}", "POST"), ("get_by_id", "schemas/ids/{schema_id}", "GET"), ("get_schema_subject_versions", "schemas/ids/{schema_id}/versions", "GET"), ( "test_compatibility", "compatibility/subjects/{subject}/versions/{version}", "POST", ), ("update_compatibility", "config/{subject}", "PUT"), ("get_compatibility", "config/{subject}", "GET"), ] ================================================ FILE: schema_registry/client/schema.py ================================================ """Class to wrap json or avro raw schema with generic methods.""" from __future__ import annotations import json import typing from abc import ABC, abstractmethod from dataclasses import dataclass import anyio import fastavro import jsonschema from schema_registry.client.utils import AVRO_SCHEMA_TYPE, JSON_SCHEMA_TYPE class BaseSchema(ABC): """Abstract class for schema wrapper""" def __init__(self, schema: typing.Union[str, typing.Dict[str, typing.Any]]) -> None: if isinstance(schema, str): schema = json.loads(schema) self.raw_schema = typing.cast(typing.Dict, schema) self.schema = self.parse_schema(self.raw_schema) self.generate_hash() @abstractmethod def parse_schema(self, schema: typing.Dict) -> typing.Dict: pass @staticmethod @abstractmethod def load(fp: str) -> BaseSchema: """Parse a schema from a file path.""" @staticmethod @abstractmethod async def async_load(fp: str) -> BaseSchema: """Parse a schema from a file path.""" @property @abstractmethod def name(self) -> typing.Optional[str]: pass @property @abstractmethod def schema_type(self) -> str: pass def generate_hash(self) -> None: self._hash = hash(json.dumps(self.schema)) def __hash__(self) -> int: return self._hash def __str__(self) -> str: return str(self.schema) def __eq__(self, other: typing.Any) -> bool: if not isinstance(other, BaseSchema): return NotImplemented return self.__hash__() == other.__hash__() class AvroSchema(BaseSchema): """Integrate BaseSchema for Avro schema.""" def __init__(self, *args: typing.Any, **kwargs: typing.Any) -> None: self._expanded_schema: typing.Optional[typing.Dict] = None self._flat_schema: typing.Optional[typing.Dict] = None super().__init__(*args, **kwargs) @property def name(self) -> typing.Optional[str]: return self.schema.get("name") @property def schema_type(self) -> str: return AVRO_SCHEMA_TYPE @property def expanded_schema(self) -> typing.Dict: """Returns a schema where all named types are expanded to their real schema. Returns: expanded_schema (typing.Dict): Schema parsed expanded """ if self._expanded_schema is None: # NOTE: Dict expected when we pass a dict self._expanded_schema = typing.cast(typing.Dict, fastavro.schema.expand_schema(self.raw_schema)) return self._expanded_schema @property def flat_schema(self) -> typing.Dict: """Parse the schema removing the fastavro write_hint flag __fastavro_parsed. Returns: flat_schema (typing.Dict): Schema parsed without the write hint """ if self._flat_schema is None: # NOTE: Dict expected when we pass a dict self._flat_schema = typing.cast( typing.Dict, fastavro.parse_schema(self.raw_schema, _write_hint=False, _force=True), ) return self._flat_schema def parse_schema(self, schema: typing.Dict) -> typing.Dict: # NOTE: Dict expected when we pass a dict return typing.cast(typing.Dict, fastavro.parse_schema(schema, _force=True)) @staticmethod def load(fp: str) -> AvroSchema: """Parse an avro schema from a file path.""" with open(fp, mode="r") as f: content = f.read() return AvroSchema(content) @staticmethod async def async_load(fp: str) -> AvroSchema: """Parse an avro schema from a file path.""" async with await anyio.open_file(fp, mode="r") as f: content = await f.read() return AvroSchema(content) class JsonSchema(BaseSchema): """Integrate BaseSchema for JSON schema.""" @property def name(self) -> typing.Optional[str]: return self.schema.get("title", self.schema.get("$id", self.schema.get("$ref"))) @property def schema_type(self) -> str: return JSON_SCHEMA_TYPE def parse_schema(self, schema: typing.Dict) -> typing.Dict: jsonschema.Draft7Validator.check_schema(schema) return schema @staticmethod def load(fp: str) -> BaseSchema: """Parse a json schema from a file path.""" with open(fp, mode="r") as f: content = f.read() return JsonSchema(content) @staticmethod async def async_load(fp: str) -> BaseSchema: """Parse a json schema from a file path.""" async with await anyio.open_file(fp, mode="r") as f: content = await f.read() return JsonSchema(content) class SchemaFactory: """Factory to generate Schema wrapper from the given schema and schema_type.""" @staticmethod def create_schema( schema: typing.Union[str, typing.Dict[str, typing.Any]], schema_type: str ) -> typing.Union[JsonSchema, AvroSchema]: if schema_type == JSON_SCHEMA_TYPE: return JsonSchema(schema) elif schema_type == AVRO_SCHEMA_TYPE: return AvroSchema(schema) else: raise ValueError(f"Unsupported schema type '{schema_type}'. Supported schemas are 'AVRO' and 'JSON'.") @dataclass class SubjectVersion(object): """Represents information extracted from the registry.""" subject: str version: int ================================================ FILE: schema_registry/client/status.py ================================================ """HTTP codes See RFC 2616 - https://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html And RFC 6585 - https://tools.ietf.org/html/rfc6585 And RFC 4918 - https://tools.ietf.org/html/rfc4918 """ HTTP_100_CONTINUE = 100 HTTP_101_SWITCHING_PROTOCOLS = 101 HTTP_200_OK = 200 HTTP_201_CREATED = 201 HTTP_202_ACCEPTED = 202 HTTP_203_NON_AUTHORITATIVE_INFORMATION = 203 HTTP_204_NO_CONTENT = 204 HTTP_205_RESET_CONTENT = 205 HTTP_206_PARTIAL_CONTENT = 206 HTTP_207_MULTI_STATUS = 207 HTTP_300_MULTIPLE_CHOICES = 300 HTTP_301_MOVED_PERMANENTLY = 301 HTTP_302_FOUND = 302 HTTP_303_SEE_OTHER = 303 HTTP_304_NOT_MODIFIED = 304 HTTP_305_USE_PROXY = 305 HTTP_306_RESERVED = 306 HTTP_307_TEMPORARY_REDIRECT = 307 HTTP_400_BAD_REQUEST = 400 HTTP_401_UNAUTHORIZED = 401 HTTP_402_PAYMENT_REQUIRED = 402 HTTP_403_FORBIDDEN = 403 HTTP_404_NOT_FOUND = 404 HTTP_405_METHOD_NOT_ALLOWED = 405 HTTP_406_NOT_ACCEPTABLE = 406 HTTP_407_PROXY_AUTHENTICATION_REQUIRED = 407 HTTP_408_REQUEST_TIMEOUT = 408 HTTP_409_CONFLICT = 409 HTTP_410_GONE = 410 HTTP_411_LENGTH_REQUIRED = 411 HTTP_412_PRECONDITION_FAILED = 412 HTTP_413_REQUEST_ENTITY_TOO_LARGE = 413 HTTP_414_REQUEST_URI_TOO_LONG = 414 HTTP_415_UNSUPPORTED_MEDIA_TYPE = 415 HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE = 416 HTTP_417_EXPECTATION_FAILED = 417 HTTP_422_UNPROCESSABLE_ENTITY = 422 HTTP_423_LOCKED = 423 HTTP_424_FAILED_DEPENDENCY = 424 HTTP_428_PRECONDITION_REQUIRED = 428 HTTP_429_TOO_MANY_REQUESTS = 429 HTTP_431_REQUEST_HEADER_FIELDS_TOO_LARGE = 431 HTTP_451_UNAVAILABLE_FOR_LEGAL_REASONS = 451 HTTP_500_INTERNAL_SERVER_ERROR = 500 HTTP_501_NOT_IMPLEMENTED = 501 HTTP_502_BAD_GATEWAY = 502 HTTP_503_SERVICE_UNAVAILABLE = 503 HTTP_504_GATEWAY_TIMEOUT = 504 HTTP_505_HTTP_VERSION_NOT_SUPPORTED = 505 HTTP_507_INSUFFICIENT_STORAGE = 507 HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511 def is_informational(code: int) -> bool: return HTTP_100_CONTINUE <= code < HTTP_200_OK def is_success(code: int) -> bool: return HTTP_200_OK <= code < HTTP_300_MULTIPLE_CHOICES def is_redirect(code: int) -> bool: return HTTP_300_MULTIPLE_CHOICES <= code < HTTP_400_BAD_REQUEST def is_client_error(code: int) -> bool: return HTTP_400_BAD_REQUEST <= code < HTTP_500_INTERNAL_SERVER_ERROR def is_server_error(code: int) -> bool: return HTTP_500_INTERNAL_SERVER_ERROR <= code <= 599 ================================================ FILE: schema_registry/client/urls.py ================================================ """Module to handle client registry url.""" import typing import urllib from collections import defaultdict class UrlManager: """Instantiate url related and Path related to the registry client.""" def __init__(self, base_url: str, paths: typing.List[typing.Tuple[str, str, str]]) -> None: parsed_url = urllib.parse.urlparse(base_url) assert parsed_url.scheme in ( # noqa: S101 "http", "https", ), f"The url {base_url} has invalid schema. Use http or https. For example http://{base_url}" # this is the absolute url to the server # make sure that url ends with / if not base_url.endswith("/"): base_url += "/" self.base_url = base_url self.paths = {path.name: path for path in (Path(path) for path in paths)} @property def url(self) -> str: """Return base url.""" return self.base_url def url_for(self, func: str, **kwargs: typing.Any) -> tuple: """Generate a url for a given function.""" path = self.paths[func] url = path.generate_url(**kwargs) return urllib.parse.urljoin(self.base_url, url), path.method class Path: """Associate an action related to a path & method.""" def __init__(self, path: typing.Tuple[str, str, str]) -> None: self.func = path[0] self.url = path[1] self.method = path[2] @property def name(self) -> str: return self.func def generate_url(self, **kwargs: typing.Any) -> str: parameters = {key: value for key, value in kwargs.items() if value} return self.url.format_map(defaultdict(str, **parameters)) ================================================ FILE: schema_registry/client/utils.py ================================================ """Regroups global constants.""" import typing from collections import namedtuple SchemaVersion = namedtuple("SchemaVersion", "subject schema_id schema version") BACKWARD = "BACKWARD" BACKWARD_TRANSITIVE = "BACKWARD_TRANSITIVE" FORWARD = "FORWARD" FORWARD_TRANSITIVE = "FORWARD_TRANSITIVE" FULL = "FULL" FULL_TRANSITIVE = "FULL_TRANSITIVE" NONE = "NONE" VALID_LEVELS = ( BACKWARD, BACKWARD_TRANSITIVE, FORWARD, FORWARD_TRANSITIVE, FULL, FULL_TRANSITIVE, NONE, ) VALID_METHODS = ("GET", "POST", "PUT", "DELETE") VALID_AUTH_PROVIDERS = ( "URL", "USER_INFO", ) HEADER_AVRO_JSON = "application/x-avro-json" HEADER_AVRO = "application/avro" HEADER_APPLICATION_JSON = "application/json" HEADERS = "application/vnd.schemaregistry.v1+json" ACCEPT_HEADERS = "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, application/json" URL = "url" SSL_CA_LOCATION = "ssl.ca.location" SSL_CERTIFICATE_LOCATION = "ssl.certificate.location" SSL_KEY_LOCATION = "ssl.key.location" SSL_KEY_PASSWORD = "ssl.key.password" # noqa: S105 AVRO_SCHEMA_TYPE: typing.Literal["AVRO"] = "AVRO" JSON_SCHEMA_TYPE: typing.Literal["JSON"] = "JSON" ================================================ FILE: schema_registry/py.typed ================================================ ================================================ FILE: schema_registry/serializers/__init__.py ================================================ from schema_registry.serializers.message_serializer import AsyncAvroMessageSerializer # noqa from schema_registry.serializers.message_serializer import AsyncJsonMessageSerializer # noqa from schema_registry.serializers.message_serializer import AsyncMessageSerializer # noqa from schema_registry.serializers.message_serializer import AvroMessageSerializer # noqa from schema_registry.serializers.message_serializer import JsonMessageSerializer # noqa from schema_registry.serializers.message_serializer import MessageSerializer # noqa ================================================ FILE: schema_registry/serializers/errors.py ================================================ """Exception exposed by the serializers module.""" class SerializerError(Exception): """Generic error from serializer package.""" def __init__(self, message: str) -> None: self.message = message super().__init__(message) def __repr__(self) -> str: return f"{self.__class__.__name__}(error={self.message})" def __str__(self) -> str: return self.message ================================================ FILE: schema_registry/serializers/faust.py ================================================ """Faust serializers.""" import typing from collections.abc import Mapping, Sequence from schema_registry.client import SchemaRegistryClient from schema_registry.client.schema import AvroSchema, BaseSchema, JsonSchema from schema_registry.serializers import ( AvroMessageSerializer, JsonMessageSerializer, MessageSerializer, ) try: from faust import Codec, Record except ImportError as ex: raise Exception("Cannot use Faust serializers Faust is not installed.") from ex class Serializer(Codec): """Generic serializer for Faust.""" def __init__( self, schema_subject: str, schema: BaseSchema, message_serializer: MessageSerializer, ): self.schema_subject = schema_subject self.schema = schema self.message_serializer = message_serializer Codec.__init__(self) def _loads(self, event: bytes) -> typing.Optional[typing.Dict]: return self.message_serializer.decode_message(event) def _dumps(self, payload: typing.Dict[str, typing.Any]) -> bytes: """Given a parsed avro schema, encode a record for the given topic. The record is expected to be a dictionary. The schema is registered with the subject of 'topic-value' """ payload = self.clean_payload(payload) return self.message_serializer.encode_record_with_schema(self.schema_subject, self.schema, payload) @staticmethod def _clean_item(item: typing.Any) -> typing.Any: if isinstance(item, Record): return Serializer._clean_item(item.to_representation()) elif isinstance(item, str): # str is also a sequence, need to make sure we don't iterate over it. return item elif isinstance(item, Mapping): return type(item)({key: Serializer._clean_item(value) for key, value in item.items()}) # type: ignore elif isinstance(item, Sequence): return type(item)(Serializer._clean_item(value) for value in item) # type: ignore return item @staticmethod def clean_payload( payload: typing.Dict[str, typing.Any], ) -> typing.Dict[str, typing.Any]: """Try to clean payload retrieve by faust.Record.to_representation. All values inside payload should be native types and not faust.Record On Faust versions <=1.9.0 Record.to_representation always returns a dict with native types as a values which are compatible with fastavro. On Faust 1.10.0 <= versions Record.to_representation always returns a dic but values can also be faust.Record, so fastavro is incapable of serialize them. Args: payload (dict): Payload to clean Returns: dict that represents the clean payload """ return Serializer._clean_item(payload) def avro_serializer_factory( schema_registry_client: SchemaRegistryClient, schema_subject: str, schema: typing.Union[AvroSchema, str, typing.Dict[str, typing.Any]], return_record_name: bool = False, ) -> "Serializer": # type: ignore # noqa: F821 if isinstance(schema, str) or isinstance(schema, dict): schema = AvroSchema(schema) return Serializer( schema_subject, schema, AvroMessageSerializer(schema_registry_client, return_record_name=return_record_name), ) def json_serializer_factory( schema_registry_client: SchemaRegistryClient, schema_subject: str, schema: typing.Union[JsonSchema, str, typing.Dict[str, typing.Any]], return_record_name: bool = False, ) -> "Serializer": # type: ignore # noqa: F821 if isinstance(schema, str) or isinstance(schema, dict): schema = JsonSchema(schema) return Serializer( schema_subject, schema, JsonMessageSerializer(schema_registry_client, return_record_name=return_record_name), ) FaustSerializer = avro_serializer_factory FaustJsonSerializer = json_serializer_factory ================================================ FILE: schema_registry/serializers/message_serializer.py ================================================ """Defines serializer for serlizing and deserializing messages""" import io import json import logging import struct import sys import traceback import typing from abc import ABC, abstractmethod from fastavro import schemaless_reader, schemaless_writer from fastavro.types import Schema from jsonschema import validate from schema_registry.client import ( AsyncSchemaRegistryClient, SchemaRegistryClient, schema, utils, ) from schema_registry.client.errors import ClientError from schema_registry.client.schema import BaseSchema from .errors import SerializerError log = logging.getLogger(__name__) MAGIC_BYTE = 0 class ContextStringIO(io.BytesIO): """Wrapper to allow use of StringIO via 'with' constructs.""" def __enter__(self) -> "ContextStringIO": return self def __exit__(self, *args: typing.Any) -> None: self.close() class MessageSerializer(ABC): """A helper class that can serialize and deserialize messages asynchronously. Args: schemaregistry_client: Http Client reader_schema: Specify a schema to decode the message return_record_name: If the record name should be returned """ def __init__( self, schemaregistry_client: SchemaRegistryClient, reader_schema: typing.Optional[schema.AvroSchema] = None, return_record_name: bool = False, ): self.schemaregistry_client = schemaregistry_client self.id_to_decoder_func = {} # type: typing.Dict self.id_to_writers = {} # type: typing.Dict self.reader_schema = reader_schema self.return_record_name = return_record_name @property @abstractmethod def _serializer_schema_type(self) -> typing.Literal["AVRO", "JSON"]: ... @abstractmethod def _get_encoder_func(self, schema: BaseSchema) -> typing.Callable: ... @abstractmethod def _get_decoder_func(self, payload: ContextStringIO, writer_schema: BaseSchema) -> typing.Callable: ... def encode_record_with_schema( self, subject: str, schema: BaseSchema, record: typing.Dict[str, typing.Any] ) -> bytes: """Given a parsed avro schema, encode a record for the given subject. The schema is registered with the subject of 'topic-value' The record is expected to be a dictionary. Args: subject: Subject name schema: Avro Schema record: An object to serialize Returns: Encoded record with schema ID as bytes """ # Try to register the schema schema_id = self.schemaregistry_client.register(subject, schema, schema_type=self._serializer_schema_type) # cache writer if not self.id_to_writers.get(schema_id): self.id_to_writers[schema_id] = self._get_encoder_func(schema) return self.encode_record_with_schema_id(schema_id, record) def encode_record_with_schema_id(self, schema_id: int, record: dict) -> bytes: """Encode a record with a given schema id. The record must be a python dictionary. Args: schema_id: integer ID record: An object to serialize Returns: Decoder function """ # use slow avro if schema_id not in self.id_to_writers: try: schema = self.schemaregistry_client.get_by_id(schema_id) if not schema: raise SerializerError("Schema does not exist") self.id_to_writers[schema_id] = self._get_encoder_func(schema) except ClientError as err: exc_type, exc_value, exc_traceback = sys.exc_info() raise SerializerError(repr(traceback.format_exception(exc_type, exc_value, exc_traceback))) from err writer = self.id_to_writers[schema_id] with ContextStringIO() as outf: # Write the magic byte and schema ID in network byte order (big endian) outf.write(struct.pack(">bI", MAGIC_BYTE, schema_id)) # write the record to the rest of the buffer writer(record, outf) return outf.getvalue() def decode_message(self, message: typing.Optional[bytes]) -> typing.Optional[dict]: """Decode a message from kafka that has been encoded for use with the schema registry. Args: message: message key or value to be decoded Returns: Decoded message contents. """ if message is None: return None if len(message) <= 5: raise SerializerError("message is too small to decode") with ContextStringIO(message) as payload: magic, schema_id = struct.unpack(">bI", payload.read(5)) if magic != MAGIC_BYTE: raise SerializerError("message does not start with magic byte") if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id](payload) try: writer_schema = self.schemaregistry_client.get_by_id(schema_id) except ClientError as e: raise SerializerError(f"unable to fetch schema with id {schema_id}: {e}") from e if writer_schema is None: raise SerializerError(f"unable to fetch schema with id {schema_id}") decoder_func = self._get_decoder_func(payload, writer_schema) self.id_to_decoder_func[schema_id] = decoder_func return decoder_func(payload) class AvroMessageSerializer(MessageSerializer): """AvroMessageSerializer to serialize and deserialize messages. !!! Example ```python from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers import AvroMessageSerializer client = SchemaRegistryClient("http://127.0.0.1:8081") avro_message_serializer = AvroMessageSerializer(client) avro_user_schema = schema.AvroSchema({ "type": "record", "namespace": "com.example", "name": "AvroUsers", "fields": [ {"name": "first_name", "type": "string"}, {"name": "last_name", "type": "string"}, {"name": "age", "type": "int"}, ], }) # We want to encode the user_record with avro_user_schema user_record = { "first_name": "my_first_name", "last_name": "my_last_name", "age": 20, } # Encode the record message_encoded = avro_message_serializer.encode_record_with_schema( "user", avro_user_schema, user_record) # this is because the message encoded reserved 5 bytes for the schema_id assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) # Decode the message message_decoded = avro_message_serializer.decode_message(message_encoded) assert message_decoded == user_record # Now if we send a bad record bad_record = { "first_name": "my_first_name", "last_name": "my_last_name", "age": "my_age" } avro_message_serializer.encode_record_with_schema( "user", avro_user_schema, bad_record) # >>> TypeError: an integer is required on field age ``` Args: schemaregistry_client: Http Client reader_schema: Specify a schema to decode the message return_record_name: If the record name should be returned """ @property def _serializer_schema_type(self) -> typing.Literal["AVRO", "JSON"]: return utils.AVRO_SCHEMA_TYPE def _get_encoder_func(self, schema: typing.Union[BaseSchema]) -> typing.Callable: return lambda record, fp: schemaless_writer(fp, schema.schema, record) def _get_decoder_func(self, payload: ContextStringIO, writer_schema: BaseSchema) -> typing.Callable: return lambda payload: schemaless_reader( payload, writer_schema.schema, typing.cast(Schema, self.reader_schema), self.return_record_name, ) class JsonMessageSerializer(MessageSerializer): """JsonMessageSerializer to serialize and deserialize messages. !!! Example ```python from schema_registry.client import SchemaRegistryClient, schema from schema_registry.serializers import JsonMessageSerializer client = SchemaRegistryClient("http://127.0.0.1:8081") json_message_serializer = JsonMessageSerializer(client) json_schema = schema.JsonSchema({ "definitions" : { "record:python.test.basic.basic" : { "description" : "basic schema for tests", "type" : "object", "required" : [ "number", "name" ], "properties" : { "number" : { "oneOf" : [ { "type" : "integer" }, { "type" : "null" } ] }, "name" : { "oneOf" : [ { "type" : "string" } ] } } } }, "$ref" : "#/definitions/record:python.test.basic.basic" }) # Encode the record basic_record = { "number": 10, "name": "a_name", } message_encoded = json_message_serializer.encode_record_with_schema( "basic", json_schema, basic_record) # this is because the message encoded reserved 5 bytes for the schema_id assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) # Decode the message message_decoded = json_message_serializer.decode_message(message_encoded) assert message_decoded == basic_record ``` Args: schemaregistry_client: Http Client reader_schema: Specify a schema to decode the message return_record_name: If the record name should be returned """ @property def _serializer_schema_type(self) -> typing.Literal["AVRO", "JSON"]: return utils.JSON_SCHEMA_TYPE def _get_encoder_func(self, schema: typing.Union[BaseSchema]) -> typing.Callable: def json_encoder_func(record: dict, fp: ContextStringIO) -> typing.Any: validate(record, schema.schema) fp.write(json.dumps(record).encode()) return json_encoder_func def _get_decoder_func(self, payload: ContextStringIO, writer_schema: BaseSchema) -> typing.Callable: def json_decoder_func(payload: typing.IO) -> typing.Any: obj = json.load(payload) validate(obj, writer_schema.schema) return obj return json_decoder_func class AsyncMessageSerializer(ABC): """AsyncMessageSerializer to serialize and deserialize messages asynchronously. Args: schemaregistry_client: Http Client reader_schema: Specify a schema to decode the message return_record_name: If the record name should be returned """ def __init__( self, schemaregistry_client: AsyncSchemaRegistryClient, reader_schema: typing.Optional[schema.AvroSchema] = None, return_record_name: bool = False, ): self.schemaregistry_client = schemaregistry_client self.id_to_decoder_func = {} # type: typing.Dict self.id_to_writers = {} # type: typing.Dict self.reader_schema = reader_schema self.return_record_name = return_record_name @property @abstractmethod def _serializer_schema_type(self) -> typing.Literal["AVRO", "JSON"]: ... @abstractmethod def _get_encoder_func(self, schema: BaseSchema) -> typing.Callable: ... @abstractmethod def _get_decoder_func(self, payload: ContextStringIO, writer_schema: BaseSchema) -> typing.Callable: ... async def encode_record_with_schema(self, subject: str, schema: typing.Union[BaseSchema], record: dict) -> bytes: """Given a parsed avro schema, encode a record for the given subject. The record is expected to be a dictionary. The schema is registered with the subject of 'topic-value' Args: subject: Subject name schema: Avro Schema record: An object to serialize Returns: Encoded record with schema ID """ # Try to register the schema schema_id = await self.schemaregistry_client.register(subject, schema, schema_type=self._serializer_schema_type) # cache writer if not self.id_to_writers.get(schema_id): self.id_to_writers[schema_id] = self._get_encoder_func(schema) return await self.encode_record_with_schema_id(schema_id, record) async def encode_record_with_schema_id(self, schema_id: int, record: dict) -> bytes: """Encode a record with a given schema id. The record must be a python dictionary. Args: schema_id: integer ID record: An object to serialize Returns: Decoder function """ # use slow avro if schema_id not in self.id_to_writers: try: schema = await self.schemaregistry_client.get_by_id(schema_id) if not schema: raise SerializerError("Schema does not exist") self.id_to_writers[schema_id] = self._get_encoder_func(schema) except ClientError as err: exc_type, exc_value, exc_traceback = sys.exc_info() raise SerializerError(repr(traceback.format_exception(exc_type, exc_value, exc_traceback))) from err writer = self.id_to_writers[schema_id] with ContextStringIO() as outf: # Write the magic byte and schema ID in network byte order (big endian) outf.write(struct.pack(">bI", MAGIC_BYTE, schema_id)) # write the record to the rest of the buffer writer(record, outf) return outf.getvalue() async def decode_message(self, message: typing.Optional[bytes]) -> typing.Optional[dict]: """Decode a message from kafka that has been encoded for use with the schema registry. Args: message: message key or value to be decoded Returns: Decoded message """ if message is None: return None if len(message) <= 5: raise SerializerError("message is too small to decode") with ContextStringIO(message) as payload: magic, schema_id = struct.unpack(">bI", payload.read(5)) if magic != MAGIC_BYTE: raise SerializerError("message does not start with magic byte") if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id](payload) try: writer_schema = await self.schemaregistry_client.get_by_id(schema_id) except ClientError as e: raise SerializerError(f"unable to fetch schema with id {schema_id}: {e}") from e if writer_schema is None: raise SerializerError(f"unable to fetch schema with id {schema_id}") decoder_func = self._get_decoder_func(payload, writer_schema) self.id_to_decoder_func[schema_id] = decoder_func return decoder_func(payload) class AsyncAvroMessageSerializer(AsyncMessageSerializer): """AsyncAvroMessageSerializer to serialize and deserialize messages asynchronously. Args: schemaregistry_client: Http Client reader_schema: Specify a schema to decode the message return_record_name: If the record name should be returned """ @property def _serializer_schema_type(self) -> typing.Literal["AVRO", "JSON"]: return utils.AVRO_SCHEMA_TYPE def _get_encoder_func(self, schema: typing.Union[BaseSchema]) -> typing.Callable: return lambda record, fp: schemaless_writer(fp, schema.schema, record) def _get_decoder_func(self, payload: ContextStringIO, writer_schema: BaseSchema) -> typing.Callable: return lambda payload: schemaless_reader( payload, writer_schema.schema, typing.cast(Schema, self.reader_schema), self.return_record_name, ) class AsyncJsonMessageSerializer(AsyncMessageSerializer): """AsyncJsonMessageSerializer to serialize and deserialize messages asynchronously. Args: schemaregistry_client: Http Client reader_schema: Specify a schema to decode the message return_record_name: If the record name should be returned """ @property def _serializer_schema_type(self) -> typing.Literal["AVRO", "JSON"]: return utils.JSON_SCHEMA_TYPE def _get_encoder_func(self, schema: typing.Union[BaseSchema]) -> typing.Callable: def json_encoder_func(record: dict, fp: ContextStringIO) -> typing.Any: validate(record, schema.schema) fp.write(json.dumps(record).encode()) return json_encoder_func def _get_decoder_func(self, payload: ContextStringIO, writer_schema: BaseSchema) -> typing.Callable: def json_decoder_func(payload: typing.IO) -> typing.Any: obj = json.load(payload) validate(obj, writer_schema.schema) return obj return json_decoder_func ================================================ FILE: scripts/README.md ================================================ # Development Scripts * `scripts/run-tests` - Run the test suite * `scripts/lint` - Run the code linting * `scripts/publish` - Publish the latest version to PyPI and deploy github pages * `scripts/clean` - Clean annoying files, remove docker containers and network * `scripts/create-tag` - Create git tag un publish it * `scripts/install` - Install dependencies ================================================ FILE: scripts/clean ================================================ #!/bin/bash -e if [ -d 'dist' ] ; then rm -rf dist fi if [ -d 'site' ] ; then rm -rf site fi if [ -d 'python_schema_registry_client.egg-info' ] ; then rm -rf python_schema_registry_client.egg-info fi # delete python cache find . -iname '*.pyc' -delete find . -iname '__pycache__' -delete docker-compose stop yes | docker-compose rm ================================================ FILE: scripts/format ================================================ #!/bin/bash -e set -x poetry run ruff format schema_registry tests poetry run ruff check schema_registry tests --fix ================================================ FILE: scripts/publish ================================================ #!/bin/bash -e poetry publish --build -u $TWINE_USERNAME -p $TWINE_PASSWORD ================================================ FILE: scripts/test ================================================ #!/bin/bash -e set -o errexit export SCHEMA_REGISTRY_URL=http://localhost:8081 export KAFKA_BOOSTRAP_SERVER_NAME=127.0.0.1 export KAFKA_BOOSTRAP_SERVER_PORT=9092 export SCHEMA_REGISTRY_SERVER=127.0.0.1 export SCHEMA_REGISTRY_SERVER_PORT=8081 ./scripts/wait_for_services poetry run pytest --cov=schema_registry ${1} --cov-fail-under=87 poetry run ruff check schema_registry tests poetry run ruff format --check schema_registry tests poetry run mypy schema_registry poetry run codecov --token=$CODECOV_TOKEN ================================================ FILE: scripts/wait_for_services ================================================ #!/bin/bash set -e cmd="$@" until nc -vz ${KAFKA_BOOSTRAP_SERVER_NAME} ${KAFKA_BOOSTRAP_SERVER_PORT}; do >&2 echo "Waiting for Kafka to be ready... - sleeping" sleep 2 done >&2 echo "Kafka is up" until nc -vz ${SCHEMA_REGISTRY_SERVER} ${SCHEMA_REGISTRY_SERVER_PORT}; do >&2 echo "Waiting for Schema Registry to be ready... - sleeping" sleep 2 done >&2 echo "Schema Registry is up" echo "Executing command ${cmd}" exec $cmd ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/avro_schemas/adv_schema.avsc ================================================ { "type": "record", "doc": "advanced schema for tests", "name": "advanced", "namespace": "python.test.advanced", "fields": [ { "name": "number", "doc": "age", "type": [ "long", "null" ] }, { "name": "name", "doc": "a name", "type": [ "string" ] }, { "name": "friends", "doc": "friends", "type" : { "type": "map", "values" : { "name": "basicPerson", "type": "record", "namespace": "python.test.advanced", "fields": [ { "name": "number", "doc": "friend age", "type": [ "long", "null" ] }, { "name": "name", "doc": "friend name", "type": [ "string" ] } ] } } }, { "name" : "family", "doc" : "family", "type" : { "namespace" : "python.test.advanced", "type" : "map", "values" : "basicPerson" } } ] } ================================================ FILE: tests/avro_schemas/basic_schema.avsc ================================================ { "name": "basic", "type": "record", "doc": "basic schema for tests", "namespace": "python.test.basic", "fields": [ { "name": "number", "doc": "age", "type": [ "long", "null" ] }, { "name": "name", "doc": "a name", "type": [ "string" ] } ] } ================================================ FILE: tests/avro_schemas/invalid_schema.avsc ================================================ { "type" : "record", "name" : "string_key", "namespace" : "OrbitDbProducer", "fields" : [ { "name" : "key", "type" : "array", "items": "string" } ] } ================================================ FILE: tests/avro_schemas/logical_types_schema.avsc ================================================ { "type": "record", "name": "visit", "fields": [ { "name": "metadata", "type": { "type": "record", "name": "metadata_record", "fields": [ { "name": "timestamp", "type": { "type": "long", "logicalType": "timestamp-millis" } }, { "name": "total", "type": { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 } } ] } } ] } ================================================ FILE: tests/avro_schemas/nested_schema.avsc ================================================ { "name": "Customer", "namespace": "com.questanalytics.core", "type": "record", "fields": [ { "name": "uid", "type": "int", "default": 10 }, { "name": "order", "type": { "name": "OrderRecord", "namespace": "com.questanalytics.core", "type": "record", "fields": [{ "name": "uid", "type": "int", "default": 10 }] } }, { "name": "name", "type": "string", "default": "bond" } ] } ================================================ FILE: tests/avro_schemas/order_schema.avsc ================================================ { "type": "record", "name": "Order", "aliases": ["Order"], "fields": [ {"name": "uid", "type": "int"} ] } ================================================ FILE: tests/avro_schemas/primitive_float.avsc ================================================ { "type": "float" } ================================================ FILE: tests/avro_schemas/primitive_string.avsc ================================================ { "type": "string" } ================================================ FILE: tests/avro_schemas/user_v1.avsc ================================================ { "type": "record", "name": "UserKey", "aliases": ["User"], "fields": [ {"name": "name", "type": "string"} ] } ================================================ FILE: tests/avro_schemas/user_v2.avsc ================================================ { "type": "record", "name": "User", "aliases": ["UserKey"], "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"], "default": 42}, {"name": "favorite_color", "type": ["string", "null"], "default": "purple"} ] } ================================================ FILE: tests/certificates/cert.pem ================================================ -----BEGIN CERTIFICATE----- MIICljCCAX4CCQD4pDS7tmqFojANBgkqhkiG9w0BAQsFADANMQswCQYDVQQGEwJO TDAeFw0yMDA0MDcxODQ2MzhaFw0yMDA1MDcxODQ2MzhaMA0xCzAJBgNVBAYTAk5M MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAzYk0Qw/BYxWgL0EXQ30J 32tI09UgHX8/HmmNV1TAT+28lkXQRH0TWeTLm6unODqSkE+v7g5LujQB4hTw6jcK IyuFVneBhEvb6OJ/iTWY0/mzHcj0zOgxjjwJKCwcmjFSWqPJD+4icXnTBimYxwaR oxCOd8qxFmTr30mF1xxqowKPTbxb1CGcDvGLANjg6VQEjKVKJpTrJVKIdjLamULJ D/xOO6IJCojCz1lInxm0HrQZuLuPPABXLq3PI1cgnXfOOBMdpsTgSw1Xc9DwaF7T +37oVYnyGS6/aoCOLukAiomjqC/pXjYK51XsNFmHsBJwdmK021xrBcBIKh9+BYAw wwIDAQABMA0GCSqGSIb3DQEBCwUAA4IBAQBgB3gLtj5g50V4hAxmTJAKgbUTYeTR WIK85dfQFktdLPxxzZEPs63j7AONvQgU3bb85T8hzJH7XK5egBvJzrJ+uHnfeKrK JYO0WILOCuMIhImghVMUVFtDL+OX07hps9ctX28FZ+8oOI+kx0gvhR4j6/cS9Oj0 XpfKfz772NbeYiuKjGYApO4kTAOKiW177ji8bVKkghmiSSRVIqoZ1Yf7kig0Y33Z vuw1foBbHiBhNZyCiq8fkg6Xa8x+D1rZVWOdO+YZRYDkeh2YSzYNx2FrdcEkU8Xl qFTRu7mqJwKo8mJNs9seZmZ6qUwUZRlrgEVxHPGpzND/9rkW6B34K6bX -----END CERTIFICATE----- ================================================ FILE: tests/certificates/key.pem ================================================ -----BEGIN ENCRYPTED PRIVATE KEY----- MIIFHzBJBgkqhkiG9w0BBQ0wPDAbBgkqhkiG9w0BBQwwDgQI6ITVHlk0uCICAggA MB0GCWCGSAFlAwQBKgQQmO730QfVVhfwCGbXs8dlZQSCBNCzhN5cg5IL4bM5EiyV mXAi7aSdrGMEPByT2Q0hXCtym0HaIvEpgTglX/zKX34JH8v/TjlVNKk6fBk2AvcQ 7pgB1EEX/7MgjjWZAejm9YGDNy8w6uqx81Zxtrq/Ex2QvSkHhkHoykYsAPCHn19t JYSWWnaiDEfq6YW6WCwRRSxxL3xygVM0nAU396tyI/tWYNkbG4DdmYC2E83N54Pu rt97AXgFzBQhQ2OI3D8GyhhSHkjYHnGtQgjOnUOXBfKch334XA7nOvRkkiodAxdw LTwK99IZPK+vHr950iKQ6mss0HuFFigoKWXw/0udL9nGYBGE/xuc9Xc7koFSXQJ2 j/O4cq4KhWGtWGyY/d79ywIqVR2IIHUsnggmB8wbJwinZDs89ieh5P7kwSrAIYst xD1eLIBnQ7SDWFA3FfNYKwtYHP/ylCuYzPm/K5LEw0YJOD7hU8g6k+LF37QJJDZS Q8y+JSfM70p1KamEJhqvXjur/AUnNVHHVbuenyVpK9UylMwT5O/jkwZlTE18nN1B v43k4qJ1lbR6e3eW1htKQzsxBHP8Vvx3BV4DR8yFbJE/IDRXuTAWMSlaYGMBqQpR PsEJZiJDw6ySWp7YKc6wX5NJ5AfQkVf/qPbgv/8NQ9yFHvKtNfMAlU7WQ6GFVxo/ xs5T+GbrfMMceab/FMh7I14E6q3R5tD7iCa3V9r1GZz/TjrNbolybyVcfUkuAhKV W3itvzYYUPvGAoWOIcfWt4/yuFheOa/CF9DsXA3k78BAMJrm+eJtQ93VCh9xNuD/ 2V6qeamuZ6Ck6U/UaDke1UMBAWa3MLQO4+lKBayDi9FvowzRHL1eyCv7BFCy6B9o kY7bNyY2vVeALpW9fFbn5i21giZdVByfVvBTva0GFmckUVAOomVwSR0UWmOolBTG qnkyuQTlAQeIfPY73CrMXKqpnR6JvzT+ROBZvcoN91p3VtbQxsJZaJ7ARlgLFALl MW59mbNoJv9Ch6Sam0NPfKDQuMxviiy3wJ6F8hASyGEHC5qmBFgmRDqyGsLROZqB afyQ8Nr+WniHnE5AiLbTxNqfaiYzK3oQGhlwD5S9s9XNChF6vxhcXg4vBOJ36gTF BmrgDzAeh8fzcCzwSmMSSkUFmY6JSZWFUx/PXfLo/rJUVhG9Xyyl8qbM8j4//h/W LCot/aXzvkXQxewZ1XLq6eNTf/h2BcEusgYoKTgPwZg0QlFqv2UZUC4EI3rDg87h 89RDk3PIFJWmhVqmjUwTtHA3UTDvyw0C+AMWYE7CDNSKRCsvA9zy3F5kfJj+6fUA rFY2JOuSJekzjysDstIWuLvoTidxgIRUgrWDpZsK1v+K8pH8G/Kgc7Rul2F2WxIe zX1EK/6A6K+N+Ripcu3ARtm/pHqYw/RQYbKHSX7un9mvBgcrQ3Bd8tzh65r91qnU bx7qnEFY3c9dOOEkZpY5+Wdx+o/tzZqfig3sJjReaa42lUT3sAaj1y8O5aydY7/v soC2Cl0hSZBmNQE1Hg7AlCAhDinbBMLM6kNycs+Kz/3LHY6mo7Zl1JktCr+w/4M6 BJRt02IsV4novcdnXLX+lR6fm8npoG2bWKuvikWDstKt4hxhm20g+6c1V8p4U3u2 FwfzQ2ItEd4Q5Lofz2VLYkyEaw== -----END ENCRYPTED PRIVATE KEY----- ================================================ FILE: tests/client/__init__.py ================================================ ================================================ FILE: tests/client/async_client/__init__.py ================================================ ================================================ FILE: tests/client/async_client/test_http_client.py ================================================ # import pickle import os from base64 import b64encode import httpx import pytest from schema_registry.client import AsyncSchemaRegistryClient, utils @pytest.mark.asyncio async def test_invalid_cert(): with pytest.raises(FileNotFoundError): AsyncSchemaRegistryClient(url="https://127.0.0.1:65534", cert_location="/path/to/cert") def test_cert_with_key(certificates): client = AsyncSchemaRegistryClient( url="https://127.0.0.1:65534", cert_location=certificates["certificate"], key_location=certificates["key"], key_password=certificates["password"], ) assert client.conf[utils.SSL_CERTIFICATE_LOCATION] == certificates["certificate"] assert client.conf[utils.SSL_KEY_LOCATION] == certificates["key"] assert client.conf[utils.SSL_KEY_PASSWORD] == certificates["password"] def test_custom_headers(): extra_headers = {"custom-serialization": utils.HEADER_AVRO_JSON} client = AsyncSchemaRegistryClient(url="https://127.0.0.1:65534", extra_headers=extra_headers) assert extra_headers == client.extra_headers @pytest.mark.asyncio async def test_override_headers(avro_deployment_schema, response_klass, async_mock): extra_headers = {"custom-serialization": utils.HEADER_AVRO_JSON} async_client = AsyncSchemaRegistryClient(url=os.getenv("SCHEMA_REGISTRY_URL"), extra_headers=extra_headers) response = await async_client.request("https://example.com") assert response.request.headers.get("custom-serialization") == utils.HEADER_AVRO_JSON subject = "test" override_header = {"custom-serialization": utils.HEADER_AVRO} mock = async_mock( httpx.AsyncClient, "request", returned_value=response_klass(200, content={"id": 1}), ) with mock: await async_client.register(subject, avro_deployment_schema, headers=override_header) prepare_headers = async_client.prepare_headers(body="1") prepare_headers["custom-serialization"] = utils.HEADER_AVRO mock.assert_called_with(headers=prepare_headers) def test_cert_path(): client = AsyncSchemaRegistryClient(url="https://127.0.0.1:65534", ca_location=True) assert client.conf[utils.SSL_CA_LOCATION] def test_init_with_dict(certificates): client = AsyncSchemaRegistryClient( { "url": "https://127.0.0.1:65534", "ssl.certificate.location": certificates["certificate"], "ssl.key.location": certificates["key"], "ssl.key.password": "test", } ) assert "https://127.0.0.1:65534/" == client.url_manager.url def test_empty_url(): with pytest.raises(AssertionError): AsyncSchemaRegistryClient({"url": ""}) def test_invalid_type_url(): with pytest.raises(AttributeError): AsyncSchemaRegistryClient(url=1) def test_invalid_type_url_dict(): with pytest.raises(AttributeError): AsyncSchemaRegistryClient({"url": 1}) @pytest.mark.asyncio async def test_basic_auth_url(): username = "secret-user" password = "secret" client = AsyncSchemaRegistryClient({"url": f"https://{username}:{password}@127.0.0.1:65534"}) userpass = b":".join((httpx._utils.to_bytes(username), httpx._utils.to_bytes(password))) token = b64encode(userpass).decode() response = await client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Basic {token}" @pytest.mark.asyncio async def test_basic_auth_user_info(): username = "secret-user" password = "secret" client = AsyncSchemaRegistryClient( { "url": "https://user_url:secret_url@127.0.0.1:65534", "basic.auth.credentials.source": "user_info", "basic.auth.user.info": f"{username}:{password}", } ) userpass = b":".join((httpx._utils.to_bytes(username), httpx._utils.to_bytes(password))) token = b64encode(userpass).decode() response = await client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Basic {token}" @pytest.mark.asyncio async def test_auth(): username = "secret-user" password = "secret" client = AsyncSchemaRegistryClient( url="https://user_url:secret_url@127.0.0.1:65534", auth=httpx.BasicAuth(username=username, password=password), ) userpass = b":".join((httpx._utils.to_bytes(username), httpx._utils.to_bytes(password))) token = b64encode(userpass).decode() response = await client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Basic {token}" @pytest.mark.asyncio async def test_custom_auth(): class CustomAuth(httpx.Auth): def __init__(self, token): self.token = token def auth_flow(self, request): # Send the request, with a custom `Authorization` header. request.headers["Authorization"] = f"Bearer {self.token}" yield request token = "token" client = AsyncSchemaRegistryClient(url="https://@127.0.0.1:65534", auth=CustomAuth(token)) response = await client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Bearer {token}" def test_basic_auth_invalid(): with pytest.raises(ValueError): AsyncSchemaRegistryClient( { "url": "https://user_url:secret_url@127.0.0.1:65534", "basic.auth.credentials.source": "VAULT", } ) ================================================ FILE: tests/client/async_client/test_schema.py ================================================ import fastavro import jsonschema import pytest from schema_registry.client import schema from tests import data_gen def test_avro_schema_from_string(): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) assert isinstance(parsed, schema.AvroSchema) @pytest.mark.asyncio async def test_avro_schema_from_file(): parsed = await schema.AvroSchema.async_load(data_gen.get_schema_path("adv_schema.avsc")) assert isinstance(parsed, schema.AvroSchema) @pytest.mark.asyncio async def test_avro_schema_load_parse_error(): with pytest.raises(fastavro.schema.UnknownType): await schema.AvroSchema.async_load(data_gen.get_schema_path("invalid_schema.avsc")) def test_json_schema_from_string(): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) assert isinstance(parsed, schema.JsonSchema) @pytest.mark.asyncio async def test_json_schema_from_file(): parsed = await schema.JsonSchema.async_load(data_gen.get_schema_path("adv_schema.json")) assert isinstance(parsed, schema.JsonSchema) @pytest.mark.asyncio async def test_json_schema_load_parse_error(): with pytest.raises(jsonschema.exceptions.SchemaError): await schema.JsonSchema.async_load(data_gen.get_schema_path("invalid_schema.json")) ================================================ FILE: tests/client/async_client/test_schema_compatibility.py ================================================ import httpx import pytest from schema_registry.client import errors, schema, utils from tests import data_gen from tests.conftest import RequestLoggingAsyncSchemaRegistryClient @pytest.mark.asyncio async def test_avro_compatibility(async_client, avro_user_schema_v3): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "test-avro-user-schema" version_2 = schema.AvroSchema(data_gen.AVRO_USER_V2) await async_client.register(subject, version_2) compatibility = await async_client.test_compatibility(subject, avro_user_schema_v3) assert compatibility @pytest.mark.asyncio async def test_avro_compatibility_dataclasses_avroschema( async_client, dataclass_avro_schema, dataclass_avro_schema_advance ): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "dataclasses-avroschema-subject" await async_client.register(subject, dataclass_avro_schema.avro_schema()) compatibility = await async_client.test_compatibility(subject, dataclass_avro_schema_advance.avro_schema()) assert compatibility @pytest.mark.asyncio async def test_avro_update_compatibility_for_subject(async_client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can ipdate compatibility level for the specified subject. """ assert await async_client.update_compatibility("FULL", "test-avro-user-schema") @pytest.mark.asyncio async def test_avro_update_global_compatibility(async_client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can ipdate compatibility level for the specified subject. """ assert await async_client.update_compatibility("FULL") @pytest.mark.asyncio async def test_avro_update_compatibility_fail(async_client, response_klass, async_mock): http_code = 404 mock = async_mock(httpx.AsyncClient, "request", returned_value=response_klass(http_code)) with mock: with pytest.raises(errors.ClientError) as excinfo: await async_client.update_compatibility("FULL", "test-avro-user-schema") assert excinfo.http_code == http_code @pytest.mark.asyncio async def test_avro_get_compatibility_for_subject(async_client): """Test latest compatibility for test-avro-user-schema subject""" assert await async_client.get_compatibility("test-avro-user-schema") == "FULL" @pytest.mark.asyncio async def test_avro_get_global_compatibility(async_client): """Test latest compatibility for test-avro-user-schema subject""" assert await async_client.get_compatibility() is not None @pytest.mark.asyncio async def test_json_compatibility(async_client, json_user_schema_v3): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "test-json-user-schema" version_2 = schema.JsonSchema(data_gen.JSON_USER_V2) await async_client.register(subject, version_2) compatibility = await async_client.test_compatibility(subject, json_user_schema_v3) assert compatibility @pytest.mark.asyncio async def test_json_compatibility_dataclasses_jsonschema( async_client: RequestLoggingAsyncSchemaRegistryClient, dataclass_json_schema, dataclass_json_schema_advance, ): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "dataclasses-jsonschema-subject" await async_client.register( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) compatibility = await async_client.test_compatibility( subject, dataclass_json_schema_advance.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) assert compatibility @pytest.mark.asyncio async def test_json_update_compatibility_for_subject(async_client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can ipdate compatibility level for the specified subject. """ assert await async_client.update_compatibility("FULL", "test-json-user-schema") @pytest.mark.asyncio async def test_json_update_global_compatibility(async_client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can ipdate compatibility level for the specified subject. """ assert await async_client.update_compatibility("FULL") @pytest.mark.asyncio async def test_json_update_compatibility_fail(async_client, response_klass, async_mock): http_code = 404 mock = async_mock(httpx.AsyncClient, "request", returned_value=response_klass(http_code)) with mock: with pytest.raises(errors.ClientError) as excinfo: await async_client.update_compatibility("FULL", "test-json-user-schema") assert excinfo.http_code == http_code @pytest.mark.asyncio async def test_json_get_compatibility_for_subject(async_client): """Test latest compatibility for test-json-user-schema subject""" assert await async_client.get_compatibility("test-json-user-schema") == "FULL" @pytest.mark.asyncio async def test_json_get_global_compatibility(async_client): """Test latest compatibility for test-json-user-schema subject""" assert await async_client.get_compatibility() is not None ================================================ FILE: tests/client/async_client/test_schema_delete.py ================================================ import pytest from schema_registry.client import schema from tests import data_gen @pytest.mark.asyncio async def test_avro_delete_subject(async_client, avro_user_schema_v3): subject = "avro-subject-to-delete" versions = [ schema.AvroSchema(data_gen.AVRO_USER_V1), schema.AvroSchema(data_gen.AVRO_USER_V2), ] for version in versions: await async_client.register(subject, version) assert len(await async_client.delete_subject(subject)) == len(versions) @pytest.mark.asyncio async def test_json_delete_subject(async_client, json_user_schema_v3): subject = "json-subject-to-delete" versions = [schema.JsonSchema(data_gen.JSON_USER_V2), json_user_schema_v3] for version in versions: await async_client.register(subject, version) assert len(await async_client.delete_subject(subject)) == len(versions) @pytest.mark.asyncio async def test_delete_subject_does_not_exist(async_client): assert not await async_client.delete_subject("a-random-subject") ================================================ FILE: tests/client/async_client/test_schema_getters.py ================================================ import pytest from schema_registry.client import schema as schema_loader from tests import data_gen @pytest.mark.asyncio async def test_avro_getters(async_client): subject = "test-avro-basic-schema" parsed_basic = schema_loader.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) await async_client.register(subject, parsed_basic) schema = await async_client.get_by_id(1) assert schema is not None subject = "subject-does-not-exist" latest = await async_client.get_schema(subject) assert latest is None schema_id = await async_client.register(subject, parsed_basic) latest = await async_client.get_schema(subject) fetched = await async_client.get_by_id(schema_id) assert fetched == parsed_basic @pytest.mark.asyncio async def test_avro_get_subjects(async_client, avro_user_schema_v3, avro_country_schema): subject_user = "test-avro-user-schema" subject_country = "test-avro-country" await async_client.register("test-avro-user-schema", avro_user_schema_v3) await async_client.register("test-avro-country", avro_country_schema) subjects = await async_client.get_subjects() assert subject_user in subjects assert subject_country in subjects @pytest.mark.asyncio async def test_json_getters(async_client): subject = "test-json-basic-schema" parsed_basic = schema_loader.JsonSchema(data_gen.JSON_BASIC_SCHEMA) await async_client.register(subject, parsed_basic) schema = await async_client.get_by_id(1) assert schema is not None subject = "subject-does-not-exist" latest = await async_client.get_schema(subject) assert latest is None schema_id = await async_client.register(subject, parsed_basic) latest = await async_client.get_schema(subject) fetched = await async_client.get_by_id(schema_id) assert fetched == parsed_basic @pytest.mark.asyncio async def test_json_get_subjects(async_client, json_user_schema_v3, json_country_schema): subject_user = "test-json-user-schema" subject_country = "test-json-country" await async_client.register("test-json-user-schema", json_user_schema_v3) await async_client.register("test-json-country", json_country_schema) subjects = await async_client.get_subjects() assert subject_user in subjects assert subject_country in subjects ================================================ FILE: tests/client/async_client/test_schema_registration.py ================================================ import pytest from schema_registry.client import schema, utils from tests import data_gen def assertLatest(self, meta_tuple, sid, schema, version): self.assertNotEqual(sid, -1) self.assertNotEqual(version, -1) self.assertEqual(meta_tuple[0], sid) self.assertEqual(meta_tuple[1], schema) self.assertEqual(meta_tuple[2], version) @pytest.mark.asyncio async def test_avro_register(async_client): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) schema_id = await async_client.register("test-avro-basic-schema", parsed) assert schema_id > 0 assert len(async_client.id_to_schema) == 1 schema_versions = await async_client.get_schema_subject_versions(schema_id) assert schema_versions[0].subject == "test-avro-basic-schema" @pytest.mark.asyncio async def test_avro_register_json_data(async_client, avro_deployment_schema): schema_id = await async_client.register("test-avro-deployment", avro_deployment_schema) assert schema_id > 0 @pytest.mark.asyncio async def test_avro_register_with_custom_headers(async_client, avro_country_schema): headers = {"custom-serialization": "application/x-avro-json"} schema_id = await async_client.register("test-avro-country", avro_country_schema, headers=headers) assert schema_id > 0 @pytest.mark.asyncio async def test_avro_register_with_logical_types(async_client): parsed = schema.AvroSchema(data_gen.AVRO_LOGICAL_TYPES_SCHEMA) schema_id = await async_client.register("test-logical-types-schema", parsed) assert schema_id > 0 assert len(async_client.id_to_schema) == 1 @pytest.mark.asyncio async def test_avro_multi_subject_register(async_client): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) schema_id = await async_client.register("test-avro-basic-schema", parsed) assert schema_id > 0 # register again under different subject dupe_id = await async_client.register("test-avro-basic-schema-backup", parsed) assert schema_id == dupe_id assert len(async_client.id_to_schema) == 1 schema_versions = await async_client.get_schema_subject_versions(schema_id) schema_versions.sort(key=lambda x: x.subject) assert schema_versions[0].subject == "test-avro-basic-schema" assert schema_versions[1].subject == "test-avro-basic-schema-backup" # The schema version we get here has a tendency to vary with the # number of times the schema has been soft-deleted, so only verifying # it's an int and > 0 assert isinstance(schema_versions[1].version, int) assert schema_versions[1].version > 0 @pytest.mark.asyncio async def test_avro_dupe_register(async_client): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) subject = "test-avro-basic-schema" schema_id = await async_client.register(subject, parsed) # Verify we had a check version call async_client.assert_url_suffix(0, "/subjects/%s" % subject) async_client.assert_method(0, "POST") # Verify that we had a register call async_client.assert_url_suffix(1, "/subjects/%s/versions" % subject) async_client.assert_method(1, "POST") assert len(async_client.request_calls) == 2 assert schema_id > 0 latest = await async_client.get_schema(subject) async_client.assert_url_suffix(2, "/subjects/%s/versions/latest" % subject) async_client.assert_method(2, "GET") assert len(async_client.request_calls) == 3 # register again under same subject dupe_id = await async_client.register(subject, parsed) assert schema_id == dupe_id # Served from cache assert len(async_client.request_calls) == 3 dupe_latest = await async_client.get_schema(subject) assert latest == dupe_latest @pytest.mark.asyncio async def test_avro_multi_register(async_client): """Register two different schemas under the same subject with backwards compatibility.""" version_1 = schema.AvroSchema(data_gen.AVRO_USER_V1) version_2 = schema.AvroSchema(data_gen.AVRO_USER_V2) subject = "test-avro-user-schema" id1 = await async_client.register(subject, version_1) latest_schema_1 = await async_client.get_schema(subject) await async_client.check_version(subject, version_1) id2 = await async_client.register(subject, version_2) latest_schema_2 = await async_client.get_schema(subject) await async_client.check_version(subject, version_2) assert id1 != id2 assert latest_schema_1 != latest_schema_2 # ensure version is higher assert latest_schema_1.version < latest_schema_2.version await async_client.register(subject, version_1) latest_schema_3 = await async_client.get_schema(subject) assert latest_schema_2 == latest_schema_3 @pytest.mark.asyncio async def test_register_dataclass_avro_schema(async_client, dataclass_avro_schema): subject = "dataclasses-avroschema-subject" schema_id = await async_client.register(subject, dataclass_avro_schema.avro_schema()) assert schema_id > 0 assert len(async_client.id_to_schema) == 1 subjects = await async_client.get_subjects() assert subject in subjects @pytest.mark.asyncio async def test_json_register(async_client): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) schema_id = await async_client.register("test-json-basic-schema", parsed) assert schema_id > 0 assert len(async_client.id_to_schema) == 1 @pytest.mark.asyncio async def test_json_register_json_data(async_client, json_deployment_schema): schema_id = await async_client.register("test-json-deployment", json_deployment_schema) assert schema_id > 0 @pytest.mark.asyncio async def test_json_register_with_custom_headers(async_client, json_country_schema): headers = {"custom-serialization": "application/x-avro-json"} schema_id = await async_client.register("test-json-country", json_country_schema, headers=headers) assert schema_id > 0 @pytest.mark.asyncio async def test_json_multi_subject_register(async_client): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) schema_id = await async_client.register("test-json-basic-schema", parsed) assert schema_id > 0 # register again under different subject dupe_id = await async_client.register("test-json-basic-schema-backup", parsed) assert schema_id == dupe_id assert len(async_client.id_to_schema) == 1 @pytest.mark.asyncio async def test_json_dupe_register(async_client): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) subject = "test-json-basic-schema" schema_id = await async_client.register(subject, parsed) # Verify we had a check version call async_client.assert_url_suffix(0, "/subjects/%s" % subject) async_client.assert_method(0, "POST") # Verify that we had a register call async_client.assert_url_suffix(1, "/subjects/%s/versions" % subject) async_client.assert_method(1, "POST") assert len(async_client.request_calls) == 2 assert schema_id > 0 latest = await async_client.get_schema(subject) async_client.assert_url_suffix(2, "/subjects/%s/versions/latest" % subject) async_client.assert_method(2, "GET") assert len(async_client.request_calls) == 3 # register again under same subject dupe_id = await async_client.register(subject, parsed) assert schema_id == dupe_id # Served from cache assert len(async_client.request_calls) == 3 dupe_latest = await async_client.get_schema(subject) assert latest == dupe_latest @pytest.mark.asyncio async def test_json_multi_register(async_client, json_user_schema_v3): """Register two different schemas under the same subject with backwards compatibility.""" version_1 = schema.JsonSchema(data_gen.JSON_USER_V2) version_2 = json_user_schema_v3 subject = "test-json-user-schema" id1 = await async_client.register(subject, version_1) latest_schema_1 = await async_client.get_schema(subject) await async_client.check_version(subject, version_1) id2 = await async_client.register(subject, version_2) latest_schema_2 = await async_client.get_schema(subject) await async_client.check_version(subject, version_2) assert id1 != id2 assert latest_schema_1 != latest_schema_2 # ensure version is higher assert latest_schema_1.version < latest_schema_2.version await async_client.register(subject, version_1) latest_schema_3 = await async_client.get_schema(subject) assert latest_schema_2 == latest_schema_3 @pytest.mark.asyncio async def test_register_dataclass_json_schema(async_client, dataclass_json_schema): subject = "dataclasses-jsonschema-subject" schema_id = await async_client.register( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) assert schema_id > 0 assert len(async_client.id_to_schema) == 1 subjects = await async_client.get_subjects() assert subject in subjects ================================================ FILE: tests/client/async_client/test_schema_version.py ================================================ import pytest from schema_registry.client import utils @pytest.mark.asyncio async def test_avro_version_does_not_exists(async_client, avro_country_schema): assert await async_client.check_version("test-avro-schema-version", avro_country_schema) is None @pytest.mark.asyncio async def test_avro_get_versions(async_client, avro_country_schema): subject = "test-avro-schema-version" await async_client.register(subject, avro_country_schema) versions = await async_client.get_versions(subject) assert versions @pytest.mark.asyncio async def test_avro_get_versions_does_not_exist(async_client): assert not await async_client.get_versions("random-subject") @pytest.mark.asyncio async def test_avro_check_version(async_client, avro_country_schema): subject = "test-avro-schema-version" schema_id = await async_client.register(subject, avro_country_schema) result = await async_client.check_version(subject, avro_country_schema) assert subject == result.subject assert schema_id == result.schema_id assert isinstance(result.version, int) assert isinstance(result.schema, str) @pytest.mark.asyncio async def test_avro_check_version_dataclasses_avroschema(async_client, dataclass_avro_schema): subject = "dataclasses-avroschema-subject" schema_id = await async_client.register(subject, dataclass_avro_schema.avro_schema()) result = await async_client.check_version(subject, dataclass_avro_schema.avro_schema()) assert subject == result.subject assert schema_id == result.schema_id @pytest.mark.asyncio async def test_avro_delete_version(async_client, avro_country_schema): subject = "test-avro-schema-version" await async_client.register(subject, avro_country_schema) versions = await async_client.get_versions(subject) latest_version = versions[-1] assert latest_version == await async_client.delete_version(subject, latest_version) @pytest.mark.asyncio async def test_avro_delete_version_does_not_exist(async_client, avro_country_schema): subject = "test-avro-schema-version" await async_client.register(subject, avro_country_schema) assert not await async_client.delete_version("random-subject") assert not await async_client.delete_version(subject, "random-version") @pytest.mark.asyncio async def test_json_version_does_not_exists(async_client, json_country_schema): assert await async_client.check_version("test-json-schema-version", json_country_schema) is None @pytest.mark.asyncio async def test_json_get_versions(async_client, json_country_schema): subject = "test-json-schema-version" await async_client.register(subject, json_country_schema) versions = await async_client.get_versions(subject) assert versions @pytest.mark.asyncio async def test_json_get_versions_does_not_exist(async_client): assert not await async_client.get_versions("random-subject") @pytest.mark.asyncio async def test_json_check_version(async_client, json_country_schema): subject = "test-json-schema-version" schema_id = await async_client.register(subject, json_country_schema) result = await async_client.check_version(subject, json_country_schema) assert subject == result.subject assert schema_id == result.schema_id @pytest.mark.asyncio async def test_json_check_version_dataclasses_avroschema(async_client, dataclass_json_schema): subject = "dataclasses-jsonschema-subject" schema_id = await async_client.register( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) result = await async_client.check_version( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) assert subject == result.subject assert schema_id == result.schema_id @pytest.mark.asyncio async def test_json_delete_version(async_client, json_country_schema): subject = "test-json-schema-version" await async_client.register(subject, json_country_schema) versions = await async_client.get_versions(subject) latest_version = versions[-1] assert latest_version == await async_client.delete_version(subject, latest_version) @pytest.mark.asyncio async def test_json_delete_version_does_not_exist(async_client, json_country_schema): subject = "test-json-schema-version" await async_client.register(subject, json_country_schema) assert not await async_client.delete_version("random-subject") assert not await async_client.delete_version(subject, "random-version") ================================================ FILE: tests/client/sync_client/__init__.py ================================================ ================================================ FILE: tests/client/sync_client/test_http_client.py ================================================ import pickle from base64 import b64encode import httpx import pytest from httpx import USE_CLIENT_DEFAULT from schema_registry.client import SchemaRegistryClient, schema, utils from tests import data_gen def test_invalid_cert(): with pytest.raises(FileNotFoundError): SchemaRegistryClient(url="https://127.0.0.1:65534", cert_location="/path/to/cert") def test_cert_with_key(certificates): client = SchemaRegistryClient( url="https://127.0.0.1:65534", cert_location=certificates["certificate"], key_location=certificates["key"], key_password=certificates["password"], ) assert client.conf[utils.SSL_CERTIFICATE_LOCATION] == certificates["certificate"] assert client.conf[utils.SSL_KEY_LOCATION] == certificates["key"] assert client.conf[utils.SSL_KEY_PASSWORD] == certificates["password"] def test_pickelable(client): unpickled_client = pickle.loads(pickle.dumps(client)) assert client == unpickled_client # make sure that is possible to do client operations with unpickled_client subject = "test-avro-basic-schema" parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) unpickled_client.get_subjects() schema_id = unpickled_client.register(subject, parsed) assert schema_id > 0 assert unpickled_client.delete_subject(subject) def test_custom_headers(): extra_headers = {"custom-serialization": utils.HEADER_AVRO_JSON} client = SchemaRegistryClient(url="https://127.0.0.1:65534", extra_headers=extra_headers) assert extra_headers == client.extra_headers def test_custom_httpx_config(): """Test the SchemaRegistryClient creation with custom httpx config.""" timeout = httpx.Timeout(10.0, connect=60.0) pool_limits = httpx.Limits(max_keepalive_connections=5, max_connections=10) client = SchemaRegistryClient( url="https://127.0.0.1:65534", timeout=timeout, pool_limits=pool_limits, ) assert client.timeout == timeout assert client.pool_limits == pool_limits def test_override_headers(client, avro_deployment_schema, mocker, response_klass): extra_headers = {"custom-serialization": utils.HEADER_AVRO_JSON} client = SchemaRegistryClient("https://127.0.0.1:65534", extra_headers=extra_headers) response = client.request("https://example.com") assert response.request.headers.get("custom-serialization") == utils.HEADER_AVRO_JSON subject = "test" override_header = {"custom-serialization": utils.HEADER_AVRO} request_patch = mocker.patch.object(httpx.Client, "request", return_value=response_klass(200, content={"id": 1})) client.register(subject, avro_deployment_schema, headers=override_header) prepare_headers = client.prepare_headers(body="1") prepare_headers["custom-serialization"] = utils.HEADER_AVRO request_patch.assert_called_once_with( "POST", mocker.ANY, headers=prepare_headers, params=None, json=mocker.ANY, timeout=USE_CLIENT_DEFAULT, ) def test_cert_path(): client = SchemaRegistryClient(url="https://127.0.0.1:65534", ca_location=True) assert client.conf[utils.SSL_CA_LOCATION] def test_init_with_dict(certificates): client = SchemaRegistryClient( { "url": "https://127.0.0.1:65534", "ssl.certificate.location": certificates["certificate"], "ssl.key.location": certificates["key"], "ssl.key.password": certificates["password"], } ) assert "https://127.0.0.1:65534/" == client.url_manager.url def test_empty_url(): with pytest.raises(AssertionError): SchemaRegistryClient({"url": ""}) def test_invalid_type_url(): with pytest.raises(AttributeError): SchemaRegistryClient(url=1) def test_invalid_type_url_dict(): with pytest.raises(AttributeError): SchemaRegistryClient({"url": 1}) def test_invalid_url(): with pytest.raises(AssertionError): SchemaRegistryClient({"url": "example.com:65534"}) def test_basic_auth_url(): username = "secret-user" password = "secret" client = SchemaRegistryClient({"url": f"https://{username}:{password}@127.0.0.1:65534"}) userpass = b":".join((httpx._utils.to_bytes(username), httpx._utils.to_bytes(password))) token = b64encode(userpass).decode() response = client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Basic {token}" def test_basic_auth_user_info(): username = "secret-user" password = "secret" client = SchemaRegistryClient( { "url": "https://user_url:secret_url@127.0.0.1:65534", "basic.auth.credentials.source": "user_info", "basic.auth.user.info": f"{username}:{password}", } ) userpass = b":".join((httpx._utils.to_bytes(username), httpx._utils.to_bytes(password))) token = b64encode(userpass).decode() response = client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Basic {token}" def test_auth(): username = "secret-user" password = "secret" client = SchemaRegistryClient( url="https://user_url:secret_url@127.0.0.1:65534", auth=httpx.BasicAuth(username=username, password=password), ) userpass = b":".join((httpx._utils.to_bytes(username), httpx._utils.to_bytes(password))) token = b64encode(userpass).decode() response = client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Basic {token}" def test_custom_auth(): class CustomAuth(httpx.Auth): def __init__(self, token): self.token = token def auth_flow(self, request): # Send the request, with a custom `Authorization` header. request.headers["Authorization"] = f"Bearer {self.token}" yield request token = "token" client = SchemaRegistryClient(url="https://127.0.0.1:65534", auth=CustomAuth(token)) response = client.request("https://example.com") assert response.request.headers.get("Authorization") == f"Bearer {token}" def test_basic_auth_invalid(): with pytest.raises(ValueError): SchemaRegistryClient( { "url": "https://user_url:secret_url@127.0.0.1:65534", "basic.auth.credentials.source": "VAULT", } ) ================================================ FILE: tests/client/sync_client/test_schema.py ================================================ import fastavro import jsonschema import pytest from schema_registry.client import schema, utils from tests import data_gen def test_avro_schema_from_string(): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) assert isinstance(parsed, schema.AvroSchema) def test_avro_schema_from_file(): parsed = schema.AvroSchema.load(data_gen.get_schema_path("adv_schema.avsc")) assert isinstance(parsed, schema.AvroSchema) def test_avro_schema_load_parse_error(): with pytest.raises(fastavro.schema.UnknownType): schema.AvroSchema.load(data_gen.get_schema_path("invalid_schema.avsc")) def test_avro_schema_type_property(): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) assert parsed.schema_type == utils.AVRO_SCHEMA_TYPE def test_expanded_schema(client): advance_schema = schema.AvroSchema(data_gen.AVRO_ADVANCED_SCHEMA) expanded = { "type": "record", "doc": "advanced schema for tests", "name": "python.test.advanced.advanced", "fields": [ {"name": "number", "doc": "age", "type": ["long", "null"]}, {"name": "name", "doc": "a name", "type": ["string"]}, { "doc": "friends", "name": "friends", "type": { "type": "map", "values": { "type": "record", "name": "python.test.advanced.basicPerson", "fields": [ { "doc": "friend age", "name": "number", "type": ["long", "null"], }, {"doc": "friend name", "name": "name", "type": ["string"]}, ], }, }, }, { "name": "family", "doc": "family", "type": { "type": "map", "values": { "type": "record", "name": "python.test.advanced.basicPerson", "fields": [ { "doc": "friend age", "name": "number", "type": ["long", "null"], }, {"doc": "friend name", "name": "name", "type": ["string"]}, ], }, }, }, ], } assert advance_schema.expanded_schema == expanded def test_flat_schema(client): advance_schema = schema.AvroSchema(data_gen.AVRO_ADVANCED_SCHEMA) subject = "test-avro-advance-schema" client.register(subject, advance_schema) schema_version = client.get_schema(subject) parsed_schema = schema_version.schema parsed_schema.schema.pop("__fastavro_parsed") parsed_schema.schema.pop("__named_schemas") assert schema_version.schema.flat_schema == parsed_schema.schema def test_json_schema_from_string(): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) assert isinstance(parsed, schema.JsonSchema) def test_json_schema_from_file(): parsed = schema.JsonSchema.load(data_gen.get_schema_path("adv_schema.json")) assert isinstance(parsed, schema.JsonSchema) def test_json_schema_load_parse_error(): with pytest.raises(jsonschema.exceptions.SchemaError): schema.JsonSchema.load(data_gen.get_schema_path("invalid_schema.json")) def test_json_schema_type_property(): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) assert parsed.schema_type == "JSON" ================================================ FILE: tests/client/sync_client/test_schema_compatibility.py ================================================ import httpx import pytest from schema_registry.client import errors, schema, utils from tests import data_gen def test_avro_compatibility(client, avro_user_schema_v3): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "test-avro-user-schema" version_2 = schema.AvroSchema(data_gen.AVRO_USER_V2) client.register(subject, version_2) compatibility = client.test_compatibility(subject, avro_user_schema_v3) assert compatibility is True def test_avro_compatibility_dataclasses_avroschema(client, dataclass_avro_schema, dataclass_avro_schema_advance): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "dataclasses-avroschema-subject" client.register(subject, dataclass_avro_schema.avro_schema()) compatibility = client.test_compatibility(subject, dataclass_avro_schema_advance.avro_schema()) assert compatibility is True def test_avro_update_compatibility_for_subject(client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can update compatibility level for the specified subject. """ assert client.update_compatibility("FULL", "test-avro-user-schema") def test_avro_update_global_compatibility(client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can update compatibility level for the specified subject. """ assert client.update_compatibility("FULL") def test_avro_update_compatibility_fail(client, response_klass, mocker): http_code = 404 mocker.patch.object(httpx.Client, "request", return_value=response_klass(http_code)) with pytest.raises(errors.ClientError) as excinfo: client.update_compatibility("FULL", "test-avro-user-schema") assert excinfo.http_code == http_code def test_avro_get_compatibility_for_subject(client): """Test latest compatibility for test-avro-user-schema subject.""" assert client.get_compatibility("test-avro-user-schema") == "FULL" def test_avro_get_global_compatibility(client): """Test latest compatibility for test-avro-user-schema subject.""" assert client.get_compatibility() is not None def test_avro_compatibility_non_verbose(client, avro_user_schema_v3): """Test the compatibility with the verbose option set to False""" subject = "test-avro-user-schema" version_2 = schema.AvroSchema(data_gen.AVRO_USER_V2) client.register(subject, version_2) compatibility = client.test_compatibility(subject, avro_user_schema_v3, verbose=False) assert isinstance(compatibility, bool) def test_avro_compatibility_verbose(client, avro_user_schema_v3): """Test the compatibility with the verbose option set to True""" subject = "test-avro-user-schema" version_2 = schema.AvroSchema(data_gen.AVRO_USER_V2) client.register(subject, version_2) compatibility = client.test_compatibility(subject, avro_user_schema_v3, verbose=True) assert isinstance(compatibility, dict) assert compatibility["is_compatible"] is True assert isinstance(compatibility["messages"], list) def test_json_compatibility(client, json_user_schema_v3): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "test-json-user-schema" version_2 = schema.JsonSchema(data_gen.JSON_USER_V2) client.register(subject, version_2) compatibility = client.test_compatibility(subject, json_user_schema_v3) assert compatibility is True def test_json_compatibility_dataclasses_jsonschema(client, dataclass_json_schema, dataclass_json_schema_advance): """Test the compatibility of a new User Schema against the User schema version 2.""" subject = "dataclasses-jsonschema-subject" client.register( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) compatibility = client.test_compatibility( subject, dataclass_json_schema_advance.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) assert compatibility is True def test_json_update_compatibility_for_subject(client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can update compatibility level for the specified subject. """ assert client.update_compatibility("FULL", "test-json-user-schema") def test_json_update_global_compatibility(client): """The latest User V2 schema is BACKWARD and FORWARDFULL compatibility (FULL). So, we can update compatibility level for the specified subject. """ assert client.update_compatibility("FULL") def test_json_update_compatibility_fail(client, response_klass, mocker): http_code = 404 mocker.patch.object(httpx.Client, "request", return_value=response_klass(http_code)) with pytest.raises(errors.ClientError) as excinfo: client.update_compatibility("FULL", "test-json-user-schema") assert excinfo.http_code == http_code def test_json_get_compatibility_for_subject(client): """Test latest compatibility for test-json-user-schema subject.""" assert client.get_compatibility("test-json-user-schema") == "FULL" def test_json_get_global_compatibility(client): """Test latest compatibility for test-json-user-schema subject.""" assert client.get_compatibility() is not None ================================================ FILE: tests/client/sync_client/test_schema_delete.py ================================================ from schema_registry.client import schema from tests import data_gen def test_avro_delete_subject(client, avro_user_schema_v3): subject = "avro-subject-to-delete" versions = [ schema.AvroSchema(data_gen.AVRO_USER_V1), schema.AvroSchema(data_gen.AVRO_USER_V2), ] for version in versions: client.register(subject, version) assert len(client.delete_subject(subject)) == len(versions) def test_json_delete_subject(client, json_user_schema_v3): subject = "json-subject-to-delete" versions = [schema.JsonSchema(data_gen.JSON_USER_V2), json_user_schema_v3] for version in versions: client.register(subject, version) assert len(client.delete_subject(subject)) == len(versions) def test_delete_subject_does_not_exist(client): assert not client.delete_subject("a-random-subject") ================================================ FILE: tests/client/sync_client/test_schema_getters.py ================================================ from schema_registry.client import schema as schema_loader from tests import data_gen def test_avro_getters(client): subject = "test-avro-basic-schema" parsed_basic = schema_loader.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) client.register(subject, parsed_basic) schema = client.get_by_id(1) assert schema is not None subject = "subject-does-not-exist" latest = client.get_schema(subject) assert latest is None schema_id = client.register(subject, parsed_basic) latest = client.get_schema(subject) fetched = client.get_by_id(schema_id) assert fetched == parsed_basic def test_avro_get_subjects(client, avro_user_schema_v3, avro_country_schema): subject_user = "test-avro-user-schema" subject_country = "test-avro-country" client.register("test-avro-user-schema", avro_user_schema_v3) client.register("test-avro-country", avro_country_schema) subjects = client.get_subjects() assert subject_user in subjects assert subject_country in subjects def test_json_getters(client): subject = "test-json-basic-schema" parsed_basic = schema_loader.JsonSchema(data_gen.JSON_BASIC_SCHEMA) client.register(subject, parsed_basic) schema = client.get_by_id(1) assert schema is not None subject = "subject-does-not-exist" latest = client.get_schema(subject) assert latest is None schema_id = client.register(subject, parsed_basic) latest = client.get_schema(subject) fetched = client.get_by_id(schema_id) assert fetched == parsed_basic def test_json_get_subjects(client, json_user_schema_v3, json_country_schema): subject_user = "test-json-user-schema" subject_country = "test-json-country" client.register("test-json-user-schema", json_user_schema_v3) client.register("test-json-country", json_country_schema) subjects = client.get_subjects() assert subject_user in subjects assert subject_country in subjects ================================================ FILE: tests/client/sync_client/test_schema_registration.py ================================================ from schema_registry.client import schema, utils from tests import data_gen from tests.conftest import RequestLoggingSchemaRegistryClient def assertLatest(self, meta_tuple, sid, schema, version): self.assertNotEqual(sid, -1) self.assertNotEqual(version, -1) self.assertEqual(meta_tuple[0], sid) self.assertEqual(meta_tuple[1], schema) self.assertEqual(meta_tuple[2], version) def test_avro_register(client): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) schema_id = client.register("test-avro-basic-schema", parsed) assert schema_id > 0 assert len(client.id_to_schema) == 1 schema_versions = client.get_schema_subject_versions(schema_id) assert schema_versions[0].subject == "test-avro-basic-schema" def test_avro_register_json_data(client, avro_deployment_schema): schema_id = client.register("test-avro-deployment", avro_deployment_schema) assert schema_id > 0 def test_avro_register_with_custom_headers(client, avro_country_schema): headers = {"custom-serialization": "application/x-avro-json"} schema_id = client.register("test-avro-country", avro_country_schema, headers=headers) assert schema_id > 0 def test_avro_register_with_logical_types(client): parsed = schema.AvroSchema(data_gen.AVRO_LOGICAL_TYPES_SCHEMA) schema_id = client.register("test-logical-types-schema", parsed) assert schema_id > 0 assert len(client.id_to_schema) == 1 def test_avro_multi_subject_register(client: RequestLoggingSchemaRegistryClient): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) schema_id = client.register("test-avro-basic-schema", parsed) assert schema_id > 0 # register again under different subject dupe_id = client.register("test-avro-basic-schema-backup", parsed) assert schema_id == dupe_id assert len(client.id_to_schema) == 1 schema_versions = client.get_schema_subject_versions(schema_id) schema_versions.sort(key=lambda x: x.subject) assert schema_versions[0].subject == "test-avro-basic-schema" assert schema_versions[1].subject == "test-avro-basic-schema-backup" # The schema version we get here has a tendency to vary with the # number of times the schema has been soft-deleted, so only verifying # it's an int and > 0 assert isinstance(schema_versions[1].version, int) assert schema_versions[1].version > 0 def test_avro_dupe_register(client): parsed = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) subject = "test-avro-basic-schema" schema_id = client.register(subject, parsed) # Verify we had a check version call client.assert_url_suffix(0, "/subjects/%s" % subject) client.assert_method(0, "POST") # Verify that we had a register call client.assert_url_suffix(1, "/subjects/%s/versions" % subject) client.assert_method(1, "POST") assert len(client.request_calls) == 2 assert schema_id > 0 latest = client.get_schema(subject) client.assert_url_suffix(2, "/subjects/%s/versions/latest" % subject) client.assert_method(2, "GET") assert len(client.request_calls) == 3 # register again under same subject dupe_id = client.register(subject, parsed) assert schema_id == dupe_id # Served from cache assert len(client.request_calls) == 3 dupe_latest = client.get_schema(subject) assert latest == dupe_latest def test_avro_multi_register(client): """Register two different schemas under the same subject with backwards compatibility.""" version_1 = schema.AvroSchema(data_gen.AVRO_USER_V1) version_2 = schema.AvroSchema(data_gen.AVRO_USER_V2) subject = "test-avro-user-schema" id1 = client.register(subject, version_1) latest_schema_1 = client.get_schema(subject) client.check_version(subject, version_1) id2 = client.register(subject, version_2) latest_schema_2 = client.get_schema(subject) client.check_version(subject, version_2) assert id1 != id2 assert latest_schema_1 != latest_schema_2 # ensure version is higher assert latest_schema_1.version < latest_schema_2.version client.register(subject, version_1) latest_schema_3 = client.get_schema(subject) assert latest_schema_2 == latest_schema_3 def test_register_dataclass_avro_schema(client, dataclass_avro_schema): subject = "dataclasses-avroschema-subject" schema_id = client.register(subject, dataclass_avro_schema.avro_schema()) assert schema_id > 0 assert len(client.id_to_schema) == 1 subjects = client.get_subjects() assert subject in subjects def test_json_register(client): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) schema_id = client.register("test-json-basic-schema", parsed) assert schema_id > 0 assert len(client.id_to_schema) == 1 def test_json_register_json_data(client, json_deployment_schema): schema_id = client.register("test-json-deployment", json_deployment_schema) assert schema_id > 0 def test_json_register_with_custom_headers(client, json_country_schema): headers = {"custom-serialization": "application/x-avro-json"} schema_id = client.register("test-json-country", json_country_schema, headers=headers) assert schema_id > 0 def test_json_multi_subject_register(client): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) schema_id = client.register("test-json-basic-schema", parsed) assert schema_id > 0 # register again under different subject dupe_id = client.register("test-json-basic-schema-backup", parsed) assert schema_id == dupe_id assert len(client.id_to_schema) == 1 def test_json_dupe_register(client): parsed = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) subject = "test-json-basic-schema" schema_id = client.register(subject, parsed) # Verify we had a check version call client.assert_url_suffix(0, "/subjects/%s" % subject) client.assert_method(0, "POST") # Verify that we had a register call client.assert_url_suffix(1, "/subjects/%s/versions" % subject) client.assert_method(1, "POST") assert len(client.request_calls) == 2 assert schema_id > 0 latest = client.get_schema(subject) client.assert_url_suffix(2, "/subjects/%s/versions/latest" % subject) client.assert_method(2, "GET") assert len(client.request_calls) == 3 # register again under same subject dupe_id = client.register(subject, parsed) assert schema_id == dupe_id # Served from cache assert len(client.request_calls) == 3 dupe_latest = client.get_schema(subject) assert latest == dupe_latest def test_json_multi_register(client, json_user_schema_v3): """Register two different schemas under the same subject with backwards compatibility.""" version_1 = schema.JsonSchema(data_gen.JSON_USER_V2) version_2 = json_user_schema_v3 subject = "test-json-user-schema" id1 = client.register(subject, version_1) latest_schema_1 = client.get_schema(subject) client.check_version(subject, version_1) id2 = client.register(subject, version_2) latest_schema_2 = client.get_schema(subject) client.check_version(subject, version_2) assert id1 != id2 assert latest_schema_1 != latest_schema_2 # ensure version is higher assert latest_schema_1.version < latest_schema_2.version client.register(subject, version_1) latest_schema_3 = client.get_schema(subject) assert latest_schema_2 == latest_schema_3 def test_register_dataclass_json_schema(client, dataclass_json_schema): subject = "dataclasses-jsonschema-subject" schema_id = client.register( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) assert schema_id > 0 assert len(client.id_to_schema) == 1 subjects = client.get_subjects() assert subject in subjects ================================================ FILE: tests/client/sync_client/test_schema_version.py ================================================ from schema_registry.client import utils def test_avro_version_does_not_exists(client, avro_country_schema): assert client.check_version("test-avro-schema-version", avro_country_schema) is None def test_avro_get_versions(client, avro_country_schema): subject = "test-avro-schema-version" client.register(subject, avro_country_schema) versions = client.get_versions(subject) assert versions def test_avro_get_versions_does_not_exist(client): assert not client.get_versions("random-subject") def test_avro_check_version(client, avro_country_schema): subject = "test-avro-schema-version" schema_id = client.register(subject, avro_country_schema) result = client.check_version(subject, avro_country_schema) assert subject == result.subject assert schema_id == result.schema_id assert isinstance(result.version, int) assert isinstance(result.schema, str) def test_avro_check_version_dataclasses_avroschema(client, dataclass_avro_schema): subject = "dataclasses-avroschema-subject" schema_id = client.register(subject, dataclass_avro_schema.avro_schema()) result = client.check_version(subject, dataclass_avro_schema.avro_schema()) assert subject == result.subject assert schema_id == result.schema_id def test_avro_delete_version(client, avro_country_schema): subject = "test-avro-schema-version" client.register(subject, avro_country_schema) versions = client.get_versions(subject) latest_version = versions[-1] assert latest_version == client.delete_version(subject, latest_version) def test_avro_delete_version_does_not_exist(client, avro_country_schema): subject = "test-avro-schema-version" client.register(subject, avro_country_schema) assert not client.delete_version("random-subject") assert not client.delete_version(subject, "random-version") def test_json_version_does_not_exists(client, json_country_schema): assert client.check_version("test-json-schema-version", json_country_schema) is None def test_json_get_versions(client, json_country_schema): subject = "test-json-schema-version" client.register(subject, json_country_schema) versions = client.get_versions(subject) assert versions def test_json_get_versions_does_not_exist(client): assert not client.get_versions("random-subject") def test_json_check_version(client, json_country_schema): subject = "test-json-schema-version" schema_id = client.register(subject, json_country_schema) result = client.check_version(subject, json_country_schema) assert subject == result.subject assert schema_id == result.schema_id def test_json_check_version_dataclasses_jsonschema(client, dataclass_json_schema): subject = "dataclasses-jsonschema-subject" schema_id = client.register( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) result = client.check_version( subject, dataclass_json_schema.model_json_schema(), schema_type=utils.JSON_SCHEMA_TYPE, ) assert subject == result.subject assert schema_id == result.schema_id def test_json_delete_version(client, json_country_schema): subject = "test-json-schema-version" client.register(subject, json_country_schema) versions = client.get_versions(subject) latest_version = versions[-1] assert latest_version == client.delete_version(subject, latest_version) def test_json_delete_version_does_not_exist(client, json_country_schema): subject = "test-json-schema-version" client.register(subject, json_country_schema) assert not client.delete_version("random-subject") assert not client.delete_version(subject, "random-version") ================================================ FILE: tests/client/test_urls.py ================================================ import urllib import pytest from schema_registry.client import urls from schema_registry.client.paths import paths BASE_URLS = ("http://localhost:8081", "http://localhost:8082/api/schema-registry/") def test_fail_url_manager_creation(): base_url = "localhost:8081" with pytest.raises(AssertionError): urls.UrlManager(base_url, []) @pytest.mark.parametrize("base_url", BASE_URLS) def test_url_with_path(base_url): paths = [("get_cars", "cars/{car_id}", "GET"), ("create_car", "cars", "POST")] url_manager = urls.UrlManager(base_url, paths) url, method = url_manager.url_for("get_cars") assert base_url in url @pytest.mark.parametrize("base_url", BASE_URLS) def test_urls_generation(base_url): local_paths = [("get_cars", "cars/{car_id}", "GET"), ("create_car", "cars", "POST")] url_manager = urls.UrlManager(base_url, local_paths) url, method = url_manager.url_for("get_cars") assert url == urllib.parse.urljoin(base_url, "cars/") assert method == "GET" url, method = url_manager.url_for("get_cars", car_id=10) assert url == urllib.parse.urljoin(base_url, "cars/10") assert method == "GET" url, method = url_manager.url_for("create_car") assert url == urllib.parse.urljoin(base_url, "cars") assert method == "POST" @pytest.mark.parametrize("base_url", BASE_URLS) def test_client_paths(base_url): url_manager = urls.UrlManager(base_url, paths) for func, _path, _ in paths: kwargs = {"subject": "my-subject", "version": 1} url, method = url_manager.url_for(func, **kwargs) assert base_url in url ================================================ FILE: tests/conftest.py ================================================ import dataclasses import enum import logging import os import typing from collections import namedtuple import pydantic import pytest import pytest_asyncio from dataclasses_avroschema import AvroModel from httpx._client import USE_CLIENT_DEFAULT, TimeoutTypes, UseClientDefault from schema_registry.client import ( AsyncSchemaRegistryClient, SchemaRegistryClient, errors, schema, utils, ) from schema_registry.serializers import ( AsyncAvroMessageSerializer, AsyncJsonMessageSerializer, AvroMessageSerializer, JsonMessageSerializer, ) logger = logging.getLogger(__name__) CERTIFICATES_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "certificates") flat_schemas = { "avro_deployment_schema": { "type": "record", "namespace": "com.kubertenes", "name": "AvroDeployment", "fields": [ {"name": "image", "type": "string"}, {"name": "replicas", "type": "int"}, {"name": "port", "type": "int"}, ], }, "avro_country_schema": { "type": "record", "namespace": "com.example", "name": "AvroSomeSchema", "fields": [{"name": "country", "type": "string"}], }, "avro_user_schema_v3": { "type": "record", "name": "User", "aliases": ["UserKey"], "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"], "default": 42}, {"name": "favorite_color", "type": ["string", "null"], "default": "purple"}, {"name": "country", "type": ["null", "string"], "default": None}, ], }, "json_deployment_schema": { "definitions": { "record:com.kubertenes.JsonDeployment": { "type": "object", "required": ["image", "replicas", "port"], "additionalProperties": True, "properties": { "image": {"type": "string"}, "replicas": {"type": "integer"}, "port": {"type": "integer"}, }, } }, "$ref": "#/definitions/record:com.kubertenes.JsonDeployment", }, "json_country_schema": { "definitions": { "record:com.example.JsonSomeSchema": { "type": "object", "required": ["country"], "additionalProperties": True, "properties": {"country": {"type": "string"}}, } }, "$ref": "#/definitions/record:com.example.JsonSomeSchema", }, "json_user_schema_v3": { "definitions": { "record:User": { "type": "object", "required": ["name", "favorite_number", "favorite_color", "country"], "additionalProperties": { "default": "null", "oneOf": [{"type": "null"}, {"type": "string"}], }, "properties": { "name": {"type": "string"}, "favorite_number": { "default": 42, "oneOf": [{"type": "integer"}, {"type": "null"}], }, "favorite_color": { "default": "purple", "oneOf": [{"type": "string"}, {"type": "null"}], }, "country": { "default": None, "oneOf": [{"type": "null"}, {"type": "string"}], }, }, } }, "$ref": "#/definitions/record:User", }, } class Response: def __init__(self, status_code, content=None): self.status_code = status_code if content is None: content = {} self.content = content def json(self): return self.content @pytest.fixture def response_klass(): return Response RequestArgs = namedtuple("RequestArgs", ["url", "method", "body", "headers", "params", "timeout"]) class Color(str, enum.Enum): BLUE = "BLUE" YELLOW = "YELLOW" GREEN = "GREEN" class RequestLoggingAssertMixin(object): def assert_url_suffix(self, call_no: int, url_suffix: str) -> None: args = self.request_calls[call_no] assert args.url.startswith(self.conf[utils.URL]) assert args.url[len(self.conf[utils.URL]) :] == url_suffix def assert_method(self, call_no: int, method: str) -> None: assert self.request_calls[call_no].method == method class RequestLoggingSchemaRegistryClient(SchemaRegistryClient, RequestLoggingAssertMixin): def __init__(self, url, *args, **kwargs): self.request_calls = [] super(SchemaRegistryClient, self).__init__(url, *args, **kwargs) def request( self, url: str, method: str = "GET", body: dict = None, params: dict = None, headers: dict = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> tuple: self.request_calls.append(RequestArgs(url, method, body, headers, params, timeout)) return super().request(url, method, body, headers=headers, params=params, timeout=timeout) @pytest.fixture def client(): url = os.getenv("SCHEMA_REGISTRY_URL", "http://localhost:8081") client = RequestLoggingSchemaRegistryClient(url) yield client subjects = { "test-avro-basic-schema", "test-json-basic-schema", "test-avro-deployment", "test-json-deployment", "test-avro-country", "test-json-country", "test-avro-basic-schema-backup", "test-json-basic-schema-backup", "test-avro-advance-schema", "test-json-advance-schema", "test-avro-user-schema", "test-json-user-schema", "subject-does-not-exist", "test-logical-types-schema", "test-avro-schema-version", "test-json-schema-version", "test-avro-nested-schema", "test-json-nested-schema", "test-dataclasses-avroschema", "test-dataclasses-jsonschema", "test-union-field-avroschema", } # Executing the clean up. Delete all the subjects between tests. for subject in subjects: try: client.delete_subject(subject) except errors.ClientError as exc: logger.info(exc.message) @pytest.fixture def schemas(): return flat_schemas @pytest.fixture def avro_deployment_schema(): return schema.AvroSchema(flat_schemas.get("avro_deployment_schema")) @pytest.fixture def avro_country_schema(): return schema.AvroSchema(flat_schemas.get("avro_country_schema")) @pytest.fixture def avro_user_schema_v3(): """The user V2 is: { "type": "record", "name": "User", "aliases": ["UserKey"], "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"], "default": 42}, {"name": "favorite_color", "type": ["string", "null"], "default": "purple"} ] } """ return schema.AvroSchema(flat_schemas.get("avro_user_schema_v3")) @pytest.fixture def json_deployment_schema(): return schema.JsonSchema(flat_schemas.get("json_deployment_schema")) @pytest.fixture def json_country_schema(): return schema.JsonSchema(flat_schemas.get("json_country_schema")) @pytest.fixture def json_user_schema_v3(): return schema.JsonSchema(flat_schemas.get("json_user_schema_v3")) @pytest.fixture def avro_message_serializer(client): return AvroMessageSerializer(client) @pytest.fixture def json_message_serializer(client): return JsonMessageSerializer(client) @pytest.fixture def async_avro_message_serializer(async_client): return AsyncAvroMessageSerializer(async_client) @pytest.fixture def async_json_message_serializer(async_client): return AsyncJsonMessageSerializer(async_client) @pytest.fixture def certificates(): return { "certificate": os.path.join(CERTIFICATES_DIR, "cert.pem"), "key": os.path.join(CERTIFICATES_DIR, "key.pem"), "password": "test", } class AsyncMock: def __init__(self, module, func, returned_value=None): self.module = module self.func = func self.returned_value = returned_value self.original_object = getattr(module, func) self.args_called_with = None self.kwargs_called_with = None def __enter__(self): setattr(self.module, self.func, self.mock) def __exit__(self, *args): setattr(self.module, self.func, self.original_object) def assert_called_with(self, **kwargs): for key, value in kwargs.items(): assert self.kwargs_called_with[key] == value async def mock(self, *args, **kwargs): self.args_called_with = args self.kwargs_called_with = kwargs return self.returned_value @pytest.fixture def async_mock(): return AsyncMock class RequestLoggingAsyncSchemaRegistryClient(AsyncSchemaRegistryClient, RequestLoggingAssertMixin): def __init__(self, url, *args, **kwargs): self.request_calls = [] super(AsyncSchemaRegistryClient, self).__init__(url, *args, **kwargs) async def request( self, url: str, method: str = "GET", body: dict = None, params: dict = None, headers: dict = None, timeout: typing.Union[TimeoutTypes, UseClientDefault] = USE_CLIENT_DEFAULT, ) -> tuple: self.request_calls.append(RequestArgs(url, method, body, headers, params, timeout)) return await super().request(url, method, body, headers=headers, params=params, timeout=timeout) @pytest_asyncio.fixture async def async_client(): url = os.getenv("SCHEMA_REGISTRY_URL") client = RequestLoggingAsyncSchemaRegistryClient(url) yield client subjects = { "test-avro-basic-schema", "test-json-basic-schema", "test-avro-deployment", "test-json-deployment", "test-avro-country", "test-json-country", "test-avro-basic-schema-backup", "test-json-basic-schema-backup", "test-avro-advance-schema", "test-json-advance-schema", "test-avro-user-schema", "test-json-user-schema", "subject-does-not-exist", "test-logical-types-schema", "test-avro-schema-version", "test-json-schema-version", "test-avro-nested-schema", "test-json-nested-schema", "test-dataclasses-avroschema", "test-dataclasses-jsonschema", "test-union-field-avroschema", "test-union-field-jsonschema", } # Executing the clean up. Delete all the subjects between tests. for subject in subjects: try: await client.delete_subject(subject) except errors.ClientError as exc: logger.info(exc.message) @pytest.fixture def dataclass_avro_schema(): @dataclasses.dataclass class UserAdvance(AvroModel): name: str age: int pets: typing.List[str] = dataclasses.field(default_factory=lambda: ["dog", "cat"]) accounts: typing.Dict[str, int] = dataclasses.field(default_factory=lambda: {"key": 1}) has_car: bool = False return UserAdvance @pytest.fixture def dataclass_avro_schema_advance(): @dataclasses.dataclass class UserAdvance(AvroModel): name: str age: int pets: typing.List[str] = dataclasses.field(default_factory=lambda: ["dog", "cat"]) accounts: typing.Dict[str, int] = dataclasses.field(default_factory=lambda: {"key": 1}) has_car: bool = False favorite_colors: Color = Color.BLUE address: str = None return UserAdvance @pytest.fixture def dataclass_json_schema(): class UserAdvance(pydantic.BaseModel): model_config = pydantic.ConfigDict(json_schema_extra={"additionalProperties": {"type": "string"}}) name: str age: int pets: typing.List[str] = pydantic.Field(default_factory=lambda: ["dog", "cat"]) accounts: typing.Dict[str, int] = pydantic.Field(default_factory=lambda: {"key": 1}) has_car: bool = False return UserAdvance @pytest.fixture def dataclass_json_schema_advance(): class UserAdvance(pydantic.BaseModel): model_config = pydantic.ConfigDict(json_schema_extra={"additionalProperties": {"type": "string"}}) name: str age: int pets: typing.List[str] = pydantic.Field(default_factory=lambda: ["dog", "cat"]) accounts: typing.Dict[str, int] = pydantic.Field(default_factory=lambda: {"key": 1}) has_car: bool = False address: str = None return UserAdvance ================================================ FILE: tests/data_gen.py ================================================ import datetime import os import os.path import faker fake = faker.Faker() epoch = datetime.datetime.utcfromtimestamp(0) AVRO_SCHEMAS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "avro_schemas") JSON_SCHEMAS_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "json_schemas") def unix_time_millis(dt): return (dt - epoch).total_seconds() * 1000.0 def get_schema_path(fname): ext = os.path.splitext(fname)[1] if ext == ".avsc": return os.path.join(AVRO_SCHEMAS_DIR, fname) if ext == ".json": return os.path.join(JSON_SCHEMAS_DIR, fname) raise ValueError( f"File format '{ext}' not supported. Schemas files must have extensions of either avro (.avsc) or json (.json)." ) def load_schema_file(fname): fname = get_schema_path(fname) with open(fname) as f: return f.read() def create_basic_item(i): return {"name": fake.first_name(), "number": fake.pyint(max_value=100)} def create_adv_item(i): friends = map(create_basic_item, range(1, 3)) family = map(create_basic_item, range(1, 3)) basic = create_basic_item(i) basic["family"] = {bi["name"]: bi for bi in family} basic["friends"] = {bi["name"]: bi for bi in friends} return basic def create_logical_item(): return { "metadata": { "timestamp": fake.past_datetime(tzinfo=datetime.timezone.utc), "total": fake.pydecimal(left_digits=2, right_digits=2), } } def create_nested_schema(): return { "name": fake.first_name(), "uid": fake.pyint(min_value=0, max_value=9999, step=1), "order": {"uid": fake.pyint(min_value=0, max_value=9999, step=1)}, } AVRO_BASIC_SCHEMA = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "basic_schema.avsc")) AVRO_ADVANCED_SCHEMA = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "adv_schema.avsc")) AVRO_BASIC_ITEMS = map(create_basic_item, range(1, 20)) AVRO_USER_V1 = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "user_v1.avsc")) AVRO_USER_V2 = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "user_v2.avsc")) AVRO_LOGICAL_TYPES_SCHEMA = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "logical_types_schema.avsc")) AVRO_ADVANCED_ITEMS = map(create_adv_item, range(1, 20)) AVRO_NESTED_SCHEMA = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "nested_schema.avsc")) AVRO_ORDER_SCHEMA = load_schema_file(os.path.join(AVRO_SCHEMAS_DIR, "order_schema.avsc")) JSON_BASIC_SCHEMA = load_schema_file(os.path.join(JSON_SCHEMAS_DIR, "basic_schema.json")) JSON_ADVANCED_SCHEMA = load_schema_file(os.path.join(JSON_SCHEMAS_DIR, "adv_schema.json")) JSON_BASIC_ITEMS = map(create_basic_item, range(1, 20)) JSON_USER_V1 = load_schema_file(os.path.join(JSON_SCHEMAS_DIR, "user_v1.json")) JSON_USER_V2 = load_schema_file(os.path.join(JSON_SCHEMAS_DIR, "user_v2.json")) JSON_ADVANCED_ITEMS = map(create_adv_item, range(1, 20)) JSON_NESTED_SCHEMA = load_schema_file(os.path.join(JSON_SCHEMAS_DIR, "nested_schema.json")) JSON_ORDER_SCHEMA = load_schema_file(os.path.join(JSON_SCHEMAS_DIR, "order_schema.json")) def cleanup(files): for f in files: try: os.remove(f) except OSError: pass ================================================ FILE: tests/json_schemas/adv_schema.json ================================================ { "definitions" : { "record.python.test.advanced.advanced" : { "description" : "advanced schema for tests", "type" : "object", "required" : [ "number", "name", "friends", "family" ], "additionalProperties" : false, "properties" : { "number" : { "oneOf" : [ { "type" : "integer", "minimum" : -9223372036854775808, "maximum" : 9223372036854775807 }, { "type" : "null" } ] }, "name" : { "oneOf" : [ { "type" : "string" } ] }, "friends" : { "type" : "object", "additionalProperties" : { "$ref" : "#/definitions/record.python.test.advanced.basicPerson" } }, "family" : { "type" : "object", "additionalProperties" : { "$ref" : "#/definitions/record.python.test.advanced.basicPerson" } } } }, "record.python.test.advanced.basicPerson" : { "type" : "object", "required" : [ "number", "name" ], "additionalProperties" : false, "properties" : { "number" : { "oneOf" : [ { "type" : "integer", "minimum" : -9223372036854775808, "maximum" : 9223372036854775807 }, { "type" : "null" } ] }, "name" : { "oneOf" : [ { "type" : "string" } ] } } } }, "$ref" : "#/definitions/record.python.test.advanced.advanced" } ================================================ FILE: tests/json_schemas/basic_schema.json ================================================ { "definitions" : { "record:python.test.basic.basic" : { "description" : "basic schema for tests", "type" : "object", "required" : [ "number", "name" ], "properties" : { "number" : { "oneOf" : [ { "type" : "integer" }, { "type" : "null" } ] }, "name" : { "oneOf" : [ { "type" : "string" } ] } } } }, "$ref" : "#/definitions/record:python.test.basic.basic" } ================================================ FILE: tests/json_schemas/invalid_schema.json ================================================ { "type": "object", "properties": { "firstName": { "type": "invalidType" }, "lastName": { "type": "string" }, "email": { "type": "string", "format": "email" }, "gender": { "type": "string", "enum": ["Male", "Female"] }, "active": { "type": "boolean" }, "weight": { "type": "number" }, "height": { "type": "integer" }, "dateOfBirth": { "type": "string", "format": "date-time" } }, "required": [ "firstName", "email" ], "additionalProperties": false } ================================================ FILE: tests/json_schemas/nested_schema.json ================================================ { "definitions" : { "record:com.questanalytics.core.Customer" : { "type" : "object", "required" : [ "uid", "order", "name" ], "properties" : { "uid" : { "default" : "NONE", "type" : "integer" }, "order" : { "default" : "NONE", "$ref" : "#/definitions/record:com.questanalytics.core.OrderRecord" }, "name" : { "default" : "NONE", "type" : "string" } } }, "record:com.questanalytics.core.OrderRecord" : { "type" : "object", "required" : [ "uid" ], "properties" : { "uid" : { "default" : "NONE", "type" : "integer" } } } }, "$ref" : "#/definitions/record:com.questanalytics.core.Customer" } ================================================ FILE: tests/json_schemas/order_schema.json ================================================ { "definitions" : { "record:Order" : { "type" : "object", "required" : [ "uid" ], "additionalProperties" : false, "properties" : { "uid" : { "type" : "integer" } } } }, "$ref" : "#/definitions/record:Order" } ================================================ FILE: tests/json_schemas/user_v1.json ================================================ { "definitions" : { "record:UserKey" : { "type" : "object", "required" : [ "name" ], "additionalProperties" : true, "properties" : { "name" : { "type" : "string" } } } }, "$ref" : "#/definitions/record:UserKey" } ================================================ FILE: tests/json_schemas/user_v2.json ================================================ { "definitions" : { "record:User" : { "type" : "object", "required" : [ "name", "favorite_number", "favorite_color" ], "additionalProperties" : { "default" : "null", "oneOf" : [{"type" : "null"},{"type" : "string"}] }, "properties" : { "name" : { "type" : "string" }, "favorite_number" : { "default" : 42, "oneOf" : [{"type" : "integer"}, {"type" : "null"}] }, "favorite_color" : { "default" : "purple", "oneOf" : [{"type" : "string"},{"type" : "null"}] } } } }, "$ref" : "#/definitions/record:User" } ================================================ FILE: tests/serializer/__init__.py ================================================ ================================================ FILE: tests/serializer/test_async_message_serializer.py ================================================ import math import struct import jsonschema import pytest from schema_registry.client import schema from tests import data_gen pytestmark = pytest.mark.asyncio async def assertAvroMessageIsSame(message, expected, schema_id, async_avro_message_serializer): assert message assert len(message) > 5 magic, sid = struct.unpack(">bI", message[0:5]) assert magic == 0 assert sid == schema_id decoded = await async_avro_message_serializer.decode_message(message) assert decoded assert decoded == expected async def test_avro_encode_with_schema_id(async_client, async_avro_message_serializer): basic = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) subject = "test-avro-basic-schema" schema_id = await async_client.register(subject, basic) records = data_gen.AVRO_BASIC_ITEMS for record in records: message = await async_avro_message_serializer.encode_record_with_schema_id(schema_id, record) await assertAvroMessageIsSame(message, record, schema_id, async_avro_message_serializer) adv = schema.AvroSchema(data_gen.AVRO_ADVANCED_SCHEMA) subject = "test-avro-advance-schema" adv_schema_id = await async_client.register(subject, adv) assert adv_schema_id != schema_id records = data_gen.AVRO_ADVANCED_ITEMS for record in records: message = await async_avro_message_serializer.encode_record_with_schema_id(adv_schema_id, record) await assertAvroMessageIsSame(message, record, adv_schema_id, async_avro_message_serializer) async def test_avro_encode_logical_types(async_client, async_avro_message_serializer): logical_types_schema = schema.AvroSchema(data_gen.AVRO_LOGICAL_TYPES_SCHEMA) subject = "test-logical-types-schema" schema_id = await async_client.register(subject, logical_types_schema) record = data_gen.create_logical_item() message = await async_avro_message_serializer.encode_record_with_schema_id(schema_id, record) decoded = await async_avro_message_serializer.decode_message(message) decoded_datetime = decoded["metadata"]["timestamp"] decoded_total = decoded["metadata"]["total"] record_datetime = record["metadata"]["timestamp"] record_total = record["metadata"]["total"] assert math.floor(record_datetime.timestamp()) <= math.floor(decoded_datetime.timestamp()) assert record_total == decoded_total async def test_avro_encode_decode_with_schema_from_json(async_avro_message_serializer, avro_deployment_schema): deployment_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": 1, "port": 8080, } message_encoded = await async_avro_message_serializer.encode_record_with_schema( "avro-deployment", avro_deployment_schema, deployment_record ) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) # now decode the message message_decoded = await async_avro_message_serializer.decode_message(message_encoded) assert message_decoded == deployment_record # async def test_encode_with_schema_string(async_avro_message_serializer): # deployment_record = {"image": "registry.gitlab.com/my-project:1.0.0", "replicas": 1, "port": 8080} # schema = """{ # "type": "record", # "namespace": "com.kubertenes.v2", # "name": "AvroDeploymentV2", # "fields": [ # {"name": "image", "type": "string"}, # {"name": "replicas", "type": "int"}, # {"name": "host", "type": "string", "default": "localhost"}, # {"name": "port", "type": "int"} # ] # }""" # message_encoded = async_avro_message_serializer.encode_record_with_schema( # "avro-deployment", schema, deployment_record # ) # assert message_encoded # assert len(message_encoded) > 5 # assert isinstance(message_encoded, bytes) # # now decode the message # message_decoded = async_avro_message_serializer.decode_message(message_encoded) # assert message_decoded == deployment_record async def test_avro_fail_encode_with_schema(async_avro_message_serializer, avro_deployment_schema): bad_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": "1", "port": "8080", } with pytest.raises(TypeError): await async_avro_message_serializer.encode_record_with_schema( "avro-deployment", avro_deployment_schema, bad_record ) async def test_avro_encode_record_with_schema(async_client, async_avro_message_serializer): topic = "test" basic = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) subject = "test-avro-value" schema_id = await async_client.register(subject, basic) records = data_gen.AVRO_BASIC_ITEMS for record in records: message = await async_avro_message_serializer.encode_record_with_schema(topic, basic, record) await assertAvroMessageIsSame(message, record, schema_id, async_avro_message_serializer) async def test_avro_decode_none(async_avro_message_serializer): """ "null/None messages should decode to None""" assert await async_avro_message_serializer.decode_message(None) is None async def assertJsonMessageIsSame(message, expected, schema_id, async_json_message_serializer): assert message assert len(message) > 5 magic, sid = struct.unpack(">bI", message[0:5]) assert magic == 0 assert sid == schema_id decoded = await async_json_message_serializer.decode_message(message) assert decoded assert decoded == expected async def test_json_encode_with_schema_id(async_client, async_json_message_serializer): basic = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) subject = "test-json-basic-schema" schema_id = await async_client.register(subject, basic) records = data_gen.JSON_BASIC_ITEMS for record in records: message = await async_json_message_serializer.encode_record_with_schema_id(schema_id, record) await assertJsonMessageIsSame(message, record, schema_id, async_json_message_serializer) adv = schema.JsonSchema(data_gen.JSON_ADVANCED_SCHEMA) subject = "test-json-advance-schema" adv_schema_id = await async_client.register(subject, adv) assert adv_schema_id != schema_id records = data_gen.JSON_ADVANCED_ITEMS for record in records: message = await async_json_message_serializer.encode_record_with_schema_id(adv_schema_id, record) await assertJsonMessageIsSame(message, record, adv_schema_id, async_json_message_serializer) async def test_json_encode_decode_with_schema_from_json(async_json_message_serializer, json_deployment_schema): deployment_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": 1, "port": 8080, } message_encoded = await async_json_message_serializer.encode_record_with_schema( "json-deployment", json_deployment_schema, deployment_record ) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) # now decode the message message_decoded = await async_json_message_serializer.decode_message(message_encoded) assert message_decoded == deployment_record async def test_json_fail_encode_with_schema(async_json_message_serializer, json_deployment_schema): bad_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": "1", "port": "8080", } with pytest.raises(jsonschema.exceptions.ValidationError): await async_json_message_serializer.encode_record_with_schema( "json-deployment", json_deployment_schema, bad_record ) async def test_json_encode_record_with_schema(async_client, async_json_message_serializer): topic = "test" basic = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) subject = "test-json-value" schema_id = await async_client.register(subject, basic) records = data_gen.JSON_BASIC_ITEMS for record in records: message = await async_json_message_serializer.encode_record_with_schema(topic, basic, record) await assertJsonMessageIsSame(message, record, schema_id, async_json_message_serializer) async def test_json_decode_none(async_json_message_serializer): """null/None messages should decode to None.""" assert await async_json_message_serializer.decode_message(None) is None ================================================ FILE: tests/serializer/test_faust_serializer.py ================================================ import typing import faust import pydantic from dataclasses_avroschema.faust import AvroRecord from schema_registry.client import schema from schema_registry.serializers import AvroMessageSerializer, JsonMessageSerializer from schema_registry.serializers import faust as serializer from tests import data_gen def test_create_avro_faust_serializer(client, avro_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustSerializer(client, schema_subject, avro_country_schema) assert isinstance(faust_serializer.message_serializer, AvroMessageSerializer) assert faust_serializer.schema_subject == schema_subject assert faust_serializer.schema == avro_country_schema assert faust_serializer.message_serializer.schemaregistry_client == client def test_avro_dumps_load_message(client, avro_country_schema): faust_serializer = serializer.FaustSerializer(client, "test-avro-country", avro_country_schema) record = {"country": "Argentina"} message_encoded = faust_serializer._dumps(record) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_avro_nested_schema(client): nested_schema = schema.AvroSchema(data_gen.AVRO_NESTED_SCHEMA) faust_serializer = serializer.FaustSerializer(client, "test-avro-nested-schema", nested_schema) record = data_gen.create_nested_schema() message_encoded = faust_serializer._dumps(record) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_avro_dumps_load_with_register_codec(client, avro_country_schema): payload = {"country": "Argentina"} country_serializer = serializer.FaustSerializer(client, "test-avro-country", avro_country_schema) faust.serializers.codecs.register("country_avro_serializer", country_serializer) class CountryRecord(faust.Record, serializer="country_avro_serializer"): country: str country_record = CountryRecord(**payload) message_encoded = country_record.dumps() assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = CountryRecord.loads(message_encoded) assert message_decoded == country_record def test_avro_nested_schema_with_register_codec(client): nested_schema = schema.AvroSchema(data_gen.AVRO_NESTED_SCHEMA) order_schema = schema.AvroSchema(data_gen.AVRO_ORDER_SCHEMA) customer_serializer = serializer.FaustSerializer(client, "test-avro-nested-schema", nested_schema) order_serializer = serializer.FaustSerializer(client, "test-avro-order-schema", order_schema) faust.serializers.codecs.register("customer_avro_serializer", customer_serializer) faust.serializers.codecs.register("order_avro_serializer", order_serializer) class Order(faust.Record, serializer="order_avro_serializer"): uid: int class Customer(faust.Record, serializer="customer_avro_serializer"): name: str uid: int order: Order payload = data_gen.create_nested_schema() customer = Customer(**payload) message_encoded = customer.dumps() assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = Customer.loads(message_encoded) assert message_decoded == customer def test_avro_dumps_load_message_dataclasses_avro_schema(client): class AdvanceUserModel(AvroRecord): first_name: str last_name: str age: int faust_serializer = serializer.FaustSerializer(client, "test-dataclasses-avroschema", AdvanceUserModel.avro_schema()) record = { "first_name": "Juan", "last_name": "Perez", "age": 20, } message_encoded = faust_serializer._dumps(record) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_avro_dumps_load_message_union_avro_schema(client): class FirstMemberRecord(AvroRecord): name: str = "" class SecondMemberRecord(AvroRecord): name: str = "" class UnionFieldAvroModel(AvroRecord): a_name: typing.Union[FirstMemberRecord, SecondMemberRecord, None] avro_name = "test-union-field-avroschema" avro_schema = UnionFieldAvroModel.avro_schema() faust_serializer = serializer.FaustSerializer(client, avro_name, avro_schema, return_record_name=True) record = {"a_name": ("FirstMemberRecord", {"name": "jj"})} message_encoded = faust_serializer._dumps(record) assert message_encoded message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_create_json_faust_serializer(client, json_country_schema): schema_subject = "test-json-country" faust_serializer = serializer.FaustJsonSerializer(client, schema_subject, json_country_schema) assert isinstance(faust_serializer.message_serializer, JsonMessageSerializer) assert faust_serializer.schema_subject == schema_subject assert faust_serializer.schema == json_country_schema assert faust_serializer.message_serializer.schemaregistry_client == client def test_json_dumps_load_message(client, json_country_schema): faust_serializer = serializer.FaustJsonSerializer(client, "test-json-country", json_country_schema) record = {"country": "Argentina"} message_encoded = faust_serializer._dumps(record) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_json_nested_schema(client): nested_schema = schema.JsonSchema(data_gen.JSON_NESTED_SCHEMA) faust_serializer = serializer.FaustJsonSerializer(client, "test-json-nested-schema", nested_schema) record = data_gen.create_nested_schema() message_encoded = faust_serializer._dumps(record) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_json_dumps_load_with_register_codec(client, json_country_schema): payload = {"country": "Argentina"} country_serializer = serializer.FaustJsonSerializer(client, "test-json-country", json_country_schema) faust.serializers.codecs.register("country_json_serializer", country_serializer) class CountryRecord(faust.Record, serializer="country_json_serializer"): country: str country_record = CountryRecord(**payload) message_encoded = country_record.dumps() assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = CountryRecord.loads(message_encoded) assert message_decoded == country_record def test_json_nested_schema_with_register_codec(client): nested_schema = schema.JsonSchema(data_gen.JSON_NESTED_SCHEMA) order_schema = schema.JsonSchema(data_gen.JSON_ORDER_SCHEMA) customer_serializer = serializer.FaustJsonSerializer(client, "test-json-nested-schema", nested_schema) order_serializer = serializer.FaustJsonSerializer(client, "test-json-order-schema", order_schema) faust.serializers.codecs.register("customer_json_serializer", customer_serializer) faust.serializers.codecs.register("order_json_serializer", order_serializer) class Order(AvroRecord, serializer="order_json_serializer"): uid: int class Customer(AvroRecord, serializer="customer_json_serializer"): name: str uid: int order: Order payload = data_gen.create_nested_schema() customer = Customer(**payload) message_encoded = customer.dumps() assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = Customer.loads(message_encoded) assert message_decoded == customer def test_json_dumps_load_message_dataclasses_json_schema(client): class AdvanceUserModel(AvroRecord, pydantic.BaseModel): first_name: str last_name: str age: int faust_serializer = serializer.FaustJsonSerializer( client, "test-dataclasses-jsonschema", AdvanceUserModel.model_json_schema() ) record = { "first_name": "Juan", "last_name": "Perez", "age": 20, } message_encoded = faust_serializer._dumps(record) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record def test_json_dumps_load_message_union_json_schema(client): class FirstMemberRecord(pydantic.BaseModel): name: str = "" class SecondMemberRecord(pydantic.BaseModel): name: str = "" class UnionFieldJsonModel(pydantic.BaseModel): a_name: typing.Union[FirstMemberRecord, SecondMemberRecord, None] json_name = "test-union-field-jsonschema" json_schema = UnionFieldJsonModel.model_json_schema() faust_serializer = serializer.FaustJsonSerializer(client, json_name, json_schema, return_record_name=True) record = {"a_name": {"name": "jj"}} message_encoded = faust_serializer._dumps(record) assert message_encoded message_decoded = faust_serializer._loads(message_encoded) assert message_decoded == record ================================================ FILE: tests/serializer/test_faust_serializer_clean_payload.py ================================================ import typing from faust import Record from schema_registry.serializers import faust as serializer class DummyRecord(Record): item: typing.Any def test_avro_simple_record(client, avro_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustSerializer(client, schema_subject, avro_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", } dummy = DummyRecord("test") assert result == faust_serializer.clean_payload(dummy) def test_avro_nested_record(client, avro_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustSerializer(client, schema_subject, avro_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, } dummy = DummyRecord(DummyRecord("test")) assert result == faust_serializer.clean_payload(dummy) def test_avro_list_of_records(client, avro_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustSerializer(client, schema_subject, avro_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": [ { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, ], } dummy = DummyRecord([DummyRecord("test"), DummyRecord("test")]) assert result == faust_serializer.clean_payload(dummy) def test_avro_map_of_records(client, avro_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustSerializer(client, schema_subject, avro_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": { "key1": { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, "key2": { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, }, } dummy = DummyRecord({"key1": DummyRecord("test"), "key2": DummyRecord("test")}) assert result == faust_serializer.clean_payload(dummy) def test_json_simple_record(client, json_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustJsonSerializer(client, schema_subject, json_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", } dummy = DummyRecord("test") assert result == faust_serializer.clean_payload(dummy) def test_json_nested_record(client, json_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustJsonSerializer(client, schema_subject, json_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, } dummy = DummyRecord(DummyRecord("test")) assert result == faust_serializer.clean_payload(dummy) def test_json_list_of_records(client, json_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustJsonSerializer(client, schema_subject, json_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": [ { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, ], } dummy = DummyRecord([DummyRecord("test"), DummyRecord("test")]) assert result == faust_serializer.clean_payload(dummy) def test_json_map_of_records(client, json_country_schema): schema_subject = "test-avro-country" faust_serializer = serializer.FaustJsonSerializer(client, schema_subject, json_country_schema) result = { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": { "key1": { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, "key2": { "__faust": {"ns": "tests.serializer.test_faust_serializer_clean_payload.DummyRecord"}, "item": "test", }, }, } dummy = DummyRecord({"key1": DummyRecord("test"), "key2": DummyRecord("test")}) assert result == faust_serializer.clean_payload(dummy) ================================================ FILE: tests/serializer/test_message_serializer.py ================================================ import math import struct import jsonschema import pytest from schema_registry.client import schema from tests import data_gen def assertAvroMessageIsSame(message, expected, schema_id, avro_message_serializer): assert message assert len(message) > 5 magic, sid = struct.unpack(">bI", message[0:5]) assert magic == 0 assert sid == schema_id decoded = avro_message_serializer.decode_message(message) assert decoded assert decoded == expected def test_avro_encode_with_schema_id(client, avro_message_serializer): basic = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) subject = "test-avro-basic-schema" schema_id = client.register(subject, basic) records = data_gen.AVRO_BASIC_ITEMS for record in records: message = avro_message_serializer.encode_record_with_schema_id(schema_id, record) assertAvroMessageIsSame(message, record, schema_id, avro_message_serializer) adv = schema.AvroSchema(data_gen.AVRO_ADVANCED_SCHEMA) subject = "test-avro-advance-schema" adv_schema_id = client.register(subject, adv) assert adv_schema_id != schema_id records = data_gen.AVRO_ADVANCED_ITEMS for record in records: message = avro_message_serializer.encode_record_with_schema_id(adv_schema_id, record) assertAvroMessageIsSame(message, record, adv_schema_id, avro_message_serializer) def test_avro_encode_logical_types(client, avro_message_serializer): logical_types_schema = schema.AvroSchema(data_gen.AVRO_LOGICAL_TYPES_SCHEMA) subject = "test-logical-types-schema" schema_id = client.register(subject, logical_types_schema) record = data_gen.create_logical_item() message = avro_message_serializer.encode_record_with_schema_id(schema_id, record) decoded = avro_message_serializer.decode_message(message) decoded_datetime = decoded["metadata"]["timestamp"] decoded_total = decoded["metadata"]["total"] record_datetime = record["metadata"]["timestamp"] record_total = record["metadata"]["total"] assert math.floor(record_datetime.timestamp()) <= math.floor(decoded_datetime.timestamp()) assert record_total == decoded_total def test_avro_encode_decode_with_schema_from_json(avro_message_serializer, avro_deployment_schema): deployment_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": 1, "port": 8080, } message_encoded = avro_message_serializer.encode_record_with_schema( "avro-deployment", avro_deployment_schema, deployment_record ) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) # now decode the message message_decoded = avro_message_serializer.decode_message(message_encoded) assert message_decoded == deployment_record # def test_encode_with_schema_string(avro_message_serializer): # deployment_record = {"image": "registry.gitlab.com/my-project:1.0.0", "replicas": 1, "port": 8080} # schema = """{ # "type": "record", # "namespace": "com.kubertenes.v2", # "name": "AvroDeploymentV2", # "fields": [ # {"name": "image", "type": "string"}, # {"name": "replicas", "type": "int"}, # {"name": "host", "type": "string", "default": "localhost"}, # {"name": "port", "type": "int"} # ] # }""" # message_encoded = avro_message_serializer.encode_record_with_schema( # "avro-deployment", schema, deployment_record # ) # assert message_encoded # assert len(message_encoded) > 5 # assert isinstance(message_encoded, bytes) # # now decode the message # message_decoded = avro_message_serializer.decode_message(message_encoded) # assert message_decoded == deployment_record def test_avro_fail_encode_with_schema(avro_message_serializer, avro_deployment_schema): bad_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": "1", "port": "8080", } with pytest.raises(TypeError): avro_message_serializer.encode_record_with_schema("avro-deployment", avro_deployment_schema, bad_record) def test_avro_encode_record_with_schema(client, avro_message_serializer): topic = "test" basic = schema.AvroSchema(data_gen.AVRO_BASIC_SCHEMA) subject = "test-avro-value" schema_id = client.register(subject, basic) records = data_gen.AVRO_BASIC_ITEMS for record in records: message = avro_message_serializer.encode_record_with_schema(topic, basic, record) assertAvroMessageIsSame(message, record, schema_id, avro_message_serializer) def test_avro_decode_none(avro_message_serializer): """ "null/None messages should decode to None""" assert avro_message_serializer.decode_message(None) is None def assertJsonMessageIsSame(message, expected, schema_id, json_message_serializer): assert message assert len(message) > 5 magic, sid = struct.unpack(">bI", message[0:5]) assert magic == 0 assert sid == schema_id decoded = json_message_serializer.decode_message(message) assert decoded assert decoded == expected def test_json_encode_with_schema_id(client, json_message_serializer): basic = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) subject = "test-json-basic-schema" schema_id = client.register(subject, basic) records = data_gen.JSON_BASIC_ITEMS for record in records: message = json_message_serializer.encode_record_with_schema_id(schema_id, record) assertJsonMessageIsSame(message, record, schema_id, json_message_serializer) adv = schema.JsonSchema(data_gen.JSON_ADVANCED_SCHEMA) subject = "test-json-advance-schema" adv_schema_id = client.register(subject, adv) assert adv_schema_id != schema_id records = data_gen.JSON_ADVANCED_ITEMS for record in records: message = json_message_serializer.encode_record_with_schema_id(adv_schema_id, record) assertJsonMessageIsSame(message, record, adv_schema_id, json_message_serializer) def test_json_encode_decode_with_schema_from_json(json_message_serializer, json_deployment_schema): deployment_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": 1, "port": 8080, } message_encoded = json_message_serializer.encode_record_with_schema( "json-deployment", json_deployment_schema, deployment_record ) assert message_encoded assert len(message_encoded) > 5 assert isinstance(message_encoded, bytes) # now decode the message message_decoded = json_message_serializer.decode_message(message_encoded) assert message_decoded == deployment_record def test_json_fail_encode_with_schema(json_message_serializer, json_deployment_schema): bad_record = { "image": "registry.gitlab.com/my-project:1.0.0", "replicas": "1", "port": "8080", } with pytest.raises(jsonschema.exceptions.ValidationError): json_message_serializer.encode_record_with_schema("json-deployment", json_deployment_schema, bad_record) def test_json_encode_record_with_schema(client, json_message_serializer): topic = "test" basic = schema.JsonSchema(data_gen.JSON_BASIC_SCHEMA) subject = "test-json-value" schema_id = client.register(subject, basic) records = data_gen.JSON_BASIC_ITEMS for record in records: message = json_message_serializer.encode_record_with_schema(topic, basic, record) assertJsonMessageIsSame(message, record, schema_id, json_message_serializer) def test_json_decode_none(json_message_serializer): """null/None messages should decode to None.""" assert json_message_serializer.decode_message(None) is None