Repository: mapping-commons/sssom Branch: master Commit: 8cfba1099da4 Files: 129 Total size: 1004.5 KB Directory structure: gitextract_mofzv687/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ └── new_sssom_element.md │ ├── pull_request_template.md │ └── workflows/ │ ├── build.yml │ ├── codespell.yml │ ├── deploy_documentation.yml │ ├── main.yaml │ └── pypi-publish.yaml ├── .gitignore ├── .prettierignore ├── CHANGELOG.md ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SSSOM.md ├── about.yaml ├── examples/ │ ├── README.md │ ├── embedded/ │ │ ├── foodie-inc-2022-05-01.sssom.tsv │ │ └── mp-hp-exact-0.0.1.sssom.tsv │ ├── external/ │ │ ├── example1.sssom.tsv │ │ ├── example1.sssom.yml │ │ ├── mp-hp-exact-0.0.1.sssom.tsv │ │ └── mp-hp-exact-0.0.1.sssom.yml │ └── schema/ │ ├── cardinality-scope-empty.sssom.tsv │ ├── cardinality-scope-predicate+object_source.sssom.tsv │ ├── cardinality-scope-predicate.sssom.tsv │ ├── cardinality-with-unmapped-entities.sssom.tsv │ ├── cardinality.sssom.tsv │ ├── composite-entities.sssom.tsv │ ├── curation_rule-propagated.sssom.tsv │ ├── curation_rule.sssom.tsv │ ├── curation_rule_text-propagated.sssom.tsv │ ├── curation_rule_text.sssom.tsv │ ├── curation_rule_text2.sssom.tsv │ ├── curie_map.sssom.tsv │ ├── cxsmiles_pipe.sssom.tsv │ ├── extension-slots.sssom.tsv │ ├── issue_tracker.sssom.tsv │ ├── issue_tracker_item.sssom.tsv │ ├── literals.sssom.tsv │ ├── mapping_set_confidence.sssom.tsv │ ├── mapping_tool_id.sssom.tsv │ ├── no_term_found.sssom.tsv │ ├── pipe-escaping.sssom.tsv │ ├── predicate-types.sssom.tsv │ ├── record-ids.sssom.tsv │ ├── review_date.sssom.tsv │ ├── reviewer_agreement.sssom.tsv │ ├── similarity_score.sssom.tsv │ └── version.sssom.tsv ├── mkdocs.yml ├── project/ │ ├── excel/ │ │ └── sssom_schema.xlsx │ ├── graphql/ │ │ └── sssom_schema.graphql │ ├── jsonld/ │ │ ├── sssom_schema.context.jsonld │ │ └── sssom_schema.jsonld │ ├── jsonschema/ │ │ └── sssom_schema.schema.json │ ├── prefixmap/ │ │ └── sssom_schema.yaml │ ├── protobuf/ │ │ └── sssom_schema.proto │ ├── shacl/ │ │ └── sssom_schema.shacl.ttl │ ├── shex/ │ │ └── sssom_schema.shex │ └── sqlschema/ │ └── sssom_schema.sql ├── project.Makefile ├── pyproject.toml ├── run.sh ├── scripts/ │ └── gh_table.pl ├── src/ │ ├── CONFIG.yaml │ ├── doc-templates/ │ │ ├── class.md.jinja2 │ │ ├── class_diagram.md.jinja2 │ │ ├── common_metadata.md.jinja2 │ │ ├── frontpage.md.jinja2 │ │ ├── index.md.jinja2 │ │ └── slot.md.jinja2 │ ├── docs/ │ │ ├── 5star-mappings.md │ │ ├── chaining-rules.md │ │ ├── confidence-model.md │ │ ├── contributing.md │ │ ├── create-mapping-commons.md │ │ ├── editors.md │ │ ├── events/ │ │ │ ├── ccb2022.md │ │ │ ├── mc2021.md │ │ │ ├── mc2023.md │ │ │ ├── oboacademy2022.md │ │ │ ├── ohdsi2022.md │ │ │ ├── ohdsi2023.md │ │ │ ├── om2022.md │ │ │ ├── pistoia2022.md │ │ │ └── wsbo2021.md │ │ ├── explanation/ │ │ │ └── mappings.md │ │ ├── faq.md │ │ ├── funding.md │ │ ├── getting-started.md │ │ ├── glossary.md │ │ ├── introduction.md │ │ ├── javascripts/ │ │ │ └── mathjax.js │ │ ├── mapping-commons.md │ │ ├── mapping-justifications.md │ │ ├── mapping-predicates.md │ │ ├── matching-tool-implementation-guide.md │ │ ├── presentations.md │ │ ├── record-identifiers.md │ │ ├── related-documentation.md │ │ ├── spec-formats-json.md │ │ ├── spec-formats-owl.md │ │ ├── spec-formats-rdf.md │ │ ├── spec-formats-tsv.md │ │ ├── spec-formats.md │ │ ├── spec-intro.md │ │ ├── spec-model.md │ │ ├── spec-support-hashing.md │ │ ├── spec-support.md │ │ ├── toolkit.md │ │ ├── training.md │ │ ├── tutorial.md │ │ ├── tutorials/ │ │ │ └── omop-mappings.md │ │ ├── usecases.md │ │ └── workshops.md │ └── sssom_schema/ │ ├── __init__.py │ ├── context/ │ │ ├── sssom_schema.context.jsonld │ │ └── sssom_schema.jsonld │ ├── datamodel/ │ │ ├── __init__.py │ │ └── sssom_schema.py │ └── schema/ │ └── sssom_schema.yaml ├── tests/ │ ├── __init__.py │ ├── input/ │ │ ├── CONFIG.yaml │ │ └── README.md │ └── test_added_in_annotations.py └── utils/ └── get-value.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/new_sssom_element.md ================================================ --- title: "[New metadata element]: " name: Add new SSSOM metadata element about: New metadata element suggestion for SSSOM assignees: matentzn labels: 'new metadata element request' --- **Element id (e.g. creator_id, mapping_tool_version):** (Must be lower case and contain only letters and underscores.) ``` element_id_example ``` **Value data type (e.g. URI, URL, text, xsd:boolean):** ``` xsd:string ``` **Description** (Provide a human-readable description that clarifies the intended use of the metadata element.) Example description. **Complete example to a SSSOM file with this element** (This example can be given as a markdown table or a linked SSSOM file, feel free to edit the markdown table below) ``` # curie_map: # HP: http://purl.obolibrary.org/obo/FBbt_ # MP: http://purl.obolibrary.org/obo/UBERON_ # owl: http://www.w3.org/2002/07/owl# # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# # rdfs: http://www.w3.org/2000/01/rdf-schema# # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # sssom: https://w3id.org/sssom/ # license: https://w3id.org/sssom/license/unspecified # mapping_set_id: https://w3id.org/sssom/mappings/ac9e1878-73f4-4767-8402-a6c40e1b0835 ``` | subject_id | predicate_id | object_id | mapping_justification | element_id_example | | ----------- | --------------- | ----------- | ----------------------- | ------------------- | | HP:0009124 | skos:exactMatch | MP:0000003 | semapv:LexicalMatching | YOUR EXAMPLE VALUE | | HP:0008551 | skos:exactMatch | MP:0000018 | semapv:LexicalMatching | YOUR EXAMPLE VALUE | ================================================ FILE: .github/pull_request_template.md ================================================ Resolves [#ISSUE, #ISSUE] - [ ] `docs/` have been added/updated if necessary - [ ] `make test` has been run locally - [ ] tests have been added/updated (if applicable) - [ ] [CHANGELOG.md](https://github.com/mapping-commons/sssom/blob/master/CHANGELOG.md) has been updated. If you are proposing a change to the SSSOM metadata model, you must - [ ] provide a full, working and valid example in `examples/` - [ ] provide a link to the related GitHub issue in the `see_also` field of the linkml model - [ ] provide a link to a valid example in the `see_also` field of the linkml model - [ ] update the "Model changes across versions" (in `src/docs/spec-models.md`) accordingly - [ ] run SSSOM-Py test suite against the updated model [Add a description, mentioning at least relevant #ISSUE and how it was addressed. A bulleted list of all changes performed by the PR is is helpful.] ================================================ FILE: .github/workflows/build.yml ================================================ name: Build on: workflow_dispatch: #push: # branches: [ master ] # paths: # - 'src/linkml/sssom.yaml' jobs: build: runs-on: ubuntu-latest steps: - name: Check out repository uses: actions/checkout@v4.2.2 with: persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token fetch-depth: 0 # otherwise, you will fail to push refs to dest repo - name: Install uv uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - name: Set up Python run: uv python install 3.10 - name: Install dependencies run: uv sync --all-groups - name: Create local changes run: | make all - name: Commit files run: | git config --local user.email "action@github.com" git config --local user.name "GitHub Action" git commit -m "Rebuilding documentation and generated files with Github Action" -a - name: Push changes uses: ad-m/github-push-action@master with: github_token: ${{ secrets.GITHUB_TOKEN }} branch: ${{ github.ref }} ================================================ FILE: .github/workflows/codespell.yml ================================================ # Codespell configuration is within pyproject.toml --- name: Codespell on: push: branches: [master] pull_request: branches: [master] permissions: contents: read jobs: codespell: name: Check for spelling errors runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4.2.2 - name: Annotate locations with typos uses: codespell-project/codespell-problem-matcher@9ba2c57125d4908eade4308f32c4ff814c184633 - name: Codespell uses: codespell-project/actions-codespell@94259cd8be02ad2903ba34a22d9c13de21a74461 ================================================ FILE: .github/workflows/deploy_documentation.yml ================================================ name: Deploy Documentation # Controls when the action will run. Triggers the workflow on push on: workflow_dispatch: push: branches: - master paths: - "src/docs/*" - "src/sssom_schema/schema/sssom_schema.yaml" - "mkdocs.yml" # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: build-docs: # The type of runner that the job will run on runs-on: ubuntu-latest permissions: contents: write # Steps represent a sequence of tasks that will be executed as part of the job steps: #---------------------------------------------- # check-out repo and set-up python #---------------------------------------------- - name: Check out repository uses: actions/checkout@v4.2.2 with: # persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token fetch-depth: 0 # otherwise, you will fail to push refs to dest repo - name: Install uv uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - name: Set up Python run: uv python install 3.10 - name: Install dependencies run: uv sync --all-groups #---------------------------------------------- # generate markdown files #---------------------------------------------- - name: Create local docs run: | mkdir -p docs touch docs/.nojekyll make gendoc #---------------------------------------------- # deploy website to gh-pages #---------------------------------------------- - name: Deploy web-based documentation to GitHub Pages run: | git config user.name mike-bot git config user.email mike-bot@sssom.invalid make deploy-doc ================================================ FILE: .github/workflows/main.yaml ================================================ # Built from: # https://docs.github.com/en/actions/guides/building-and-testing-python name: Build and test sssom on: pull_request: branches: [master] types: [opened, synchronize, reopened] jobs: test: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.10", "3.14"] steps: - name: Check out repository uses: actions/checkout@v4.2.2 with: fetch-depth: 0 - name: Install uv uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Install dependencies run: uv sync --all-groups - name: Run tests run: make test #---------------------------------------------- # YAML linting #---------------------------------------------- - name: Install Prettier run: npm install prettier - name: Lint YML run: npx prettier --check --prose-wrap always "**/*.yml" - name: Lint YAML run: npx prettier --check --prose-wrap always "**/*.yaml" ================================================ FILE: .github/workflows/pypi-publish.yaml ================================================ name: Publish Python Package # Upload to PyPI is triggered by creating & publishing a release in GitHub UI on: release: # Run when a release is published or a prerelease is published (not on draft creation) types: [published] jobs: build: name: Build Python 🐍 distributions 📦 for publishing to PyPI runs-on: ubuntu-latest steps: - uses: actions/checkout@v4.2.2 with: # Checkout the code including tags required for dynamic versioning fetch-depth: 0 - name: Install uv uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - name: Set up Python run: uv python install 3.10 - name: Build source and wheel archives run: uv build - name: Store built distribution uses: actions/upload-artifact@v4.6.2 with: name: distribution-files path: dist/ pypi-publish: name: Build and publish Python 🐍 package 📦 to PyPI needs: build runs-on: ubuntu-latest # Uses trusted publishing. https://docs.pypi.org/trusted-publishers/adding-a-publisher/ environment: name: pypi-release url: https://pypi.org/p/sssom-schema permissions: id-token: write # This permission is mandatory for trusted publishing. steps: - name: Download built distribution uses: actions/download-artifact@v4.3.0 with: name: distribution-files path: dist - name: Publish package 📦 to PyPI uses: pypa/gh-action-pypi-publish@v1.12.4 with: verbose: true skip-existing: true ================================================ FILE: .gitignore ================================================ /docs/ /project/docs/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ .DS_Store tmp/ /node_modules package-lock.json ================================================ FILE: .prettierignore ================================================ project/prefixmap/sssom_schema.yaml ================================================ FILE: CHANGELOG.md ================================================ # Changelog for SSSOM ## Next - Add `composed entity expression` as a new value in the `EntityType` enumeration ([issue](https://github.com/mapping-commons/sssom/issues/402)). - Add `predicate_type` slot (previously defined but unused) to the `Mapping` and `MappingSet` classes ([issue](https://github.com/mapping-commons/sssom/issues/404)). - Add `similarity_measure` slot to the `MappingSet` class ([issue](https://github.com/mapping-commons/sssom/issues/411)). - Add `sssom_version` slot to the `MappingSet` class ([issue](https://github.com/mapping-commons/sssom/issues/439)). - Change the type of the `see_also` slot to `xsd:anyURI` ([issue](https://github.com/mapping-commons/sssom/issues/422)). - Add `mapping_set_confidence` slot to the `MappingSet` class ([issue](https://github.com/mapping-commons/sssom/issues/438)). - Updated the RDF binding of sssom:publication_date from dcterms:created to dcterms:issued, which is the more appropriate property to use in this case. - Add `mapping_tool_id` slot to the `Mapping` and `MappingSet` classes ([issue](https://github.com/mapping-commons/sssom/issues/449)). - Add `record_id` slot to the `Mapping` class ([issue](https://github.com/mapping-commons/sssom/issues/359)). - Change all URI-typed slots to clarify that they expect _non-relative_ URIs as values ([issue](https://github.com/mapping-commons/sssom/issues/448)). - Add `curation_rule` and `curation_rule_text` to the `MappingSet` class and made propagatable ([issue](https://github.com/mapping-commons/sssom/issues/464)). - Add `cardinality_scope` slot ([issue](https://github.com/mapping-commons/sssom/issues/467)). - Add new value `0:0` to the `mapping_cardinality_enum` ([issue](https://github.com/mapping-commons/sssom/issues/477)). - Add specification for the RDF serialisation ([discussion](https://github.com/mapping-commons/sssom/discussions/454)). - Development snapshots of SSSOM schema can now be released using GitHub's pre-release feature ([issue](https://github.com/mapping-commons/sssom/issues/490)). - Add `review_date` slot ([issue](https://github.com/mapping-commons/sssom/issues/511)) - Add `reviewer_agreement` slot ([issue](https://github.com/mapping-commons/sssom/issues/510)) - Allow encoding pipe characters in multi-valued slots in SSSOM/TSV format ([issue](https://github.com/mapping-commons/sssom/issues/429)). - Specify a standard SSSOM hashing function ([issue](https://github.com/mapping-commons/sssom/issues/436)). ## SSSOM version 1.0.0 - Add the concept of "propagatable slots". - Add the `curie_map` to the model (instead of it being a specificity of the SSSOM/TSV format). - Add the concept of "extension slots". - Add the concept of "literal mappings". - Add the entity reference `sssom:NoTermFound` to express the concept of an "unmapped entity" ([issue](https://github.com/mapping-commons/sssom/issues/28)) - Replace `semantic_similarity_score` with `similarity_score` and `semantic_similarity_measure` with `similarity_measure` in the data model ([issue](https://github.com/mapping-commons/sssom/issues/385)) ## SSSOM version 0.15.1 - Add recommendation to sort the keys in the YAML metadata block. - Double-typed slots explicitly constrained to the [0.0,1.0] range, as per their description. ## SSSOM version 0.15.0 - Add issue_tracker_item and issue_tracker [model elements](https://github.com/mapping-commons/sssom/pull/259). ## SSSOM version 0.13.0 - The necessity of the "canonical column ordering" was downgraded from MUST to SHOULD (https://github.com/mapping-commons/sssom/pull/285) - Documents clearly that built-in prefixes MUST NOT be redefined (https://github.com/mapping-commons/sssom/pull/285) ## SSSOM version 0.11.0 - see https://github.com/mapping-commons/sssom/releases/tag/0.11.0 ### Summary #### New elements: - `mapping_set_title` to capture a human readable title for a mapping set - `registry_title` and `registry_description` to capture the human readable title and description of an SSSOM mapping set registry - `curation_rule` to capture a (potentially) complex (set of) condition(s) executed by an agent (usually human) that led to the establishment of a mapping. #### Updated elements: - Adding mapping_source slot to Mapping by @matentzn in #230 - Improve documentation for `subject_category` and `object_category` elements #### Documentation - Compiled a list of all SSSOM talks: https://mapping-commons.github.io/sssom/presentations/ - Document chaining rules: https://mapping-commons.github.io/sssom/chaining_rules/ #### Quality control and Technical infrastructure - Make adding a concrete SSSOM example part of the new element request - Adding QC checks for example SSSOM files hosted in the repo ## SSSOM version 0.10.1 - see https://github.com/mapping-commons/sssom/releases/tag/0.10.1 ## SSSOM version 0.9.4 - see https://github.com/mapping-commons/sssom/releases/tag/0.9.4 ## SSSOM version 0.9.3 - see https://github.com/mapping-commons/sssom/releases/tag/0.9.3 - Major change: Changed `match_type` logic to `mapping_justification` ([issue](https://github.com/mapping-commons/sssom/issues/150)). ## SSSOM version 0.9.2 - see https://github.com/mapping-commons/sssom/releases/tag/0.9.2 ## SSSOM version 0.9.1 - see https://github.com/mapping-commons/sssom/releases/tag/0.9.1 ## SSSOM version 0.9.0 - Initial release - see https://github.com/mapping-commons/sssom/releases/tag/0.9.0 ================================================ FILE: CITATION.cff ================================================ cff-version: '1.1.0' message: 'Please cite the following works when using this software.' abstract: "Abstract\n Despite progress in the development of standards for describing and exchanging scientific information, the lack of easy-to-use standards for mapping between different representations of the same or similar objects in different databases poses a major impediment to data integration and interoperability. Mappings often lack the metadata needed to be correctly interpreted and applied. For example, are two terms equivalent or merely related? Are they narrow or broad matches? Or are they associated in some other way? Such relationships between the mapped terms are often not documented, which leads to incorrect assumptions and makes them hard to use in scenarios that require a high degree of precision (such as diagnostics or risk prediction). Furthermore, the lack of descriptions of how mappings were done makes it hard to combine and reconcile mappings, particularly curated and automated ones. We have developed the Simple Standard for Sharing Ontological Mappings (SSSOM) which addresses these problems by: (i) Introducing a machine-readable and extensible vocabulary to describe metadata that makes imprecision, inaccuracy and incompleteness in mappings explicit. (ii) Defining an easy-to-use simple table-based format that can be integrated into existing data science pipelines without the need to parse or query ontologies, and that integrates seamlessly with Linked Data principles. (iii) Implementing open and community-driven collaborative workflows that are designed to evolve the standard continuously to address changing requirements and mapping practices. (iv) Providing reference tools and software libraries for working with the standard. In this paper, we present the SSSOM standard, describe several use cases in detail and survey some of the existing work on standardizing the exchange of mappings, with the goal of making mappings Findable, Accessible, Interoperable and Reusable (FAIR). The SSSOM specification can be found at http://w3id.org/sssom/spec.\n Database URL: http://w3id.org/sssom/spec" authors: - family-names: 'Matentzoglu' given-names: 'Nicolas' - family-names: 'Balhoff' given-names: 'James P' - family-names: 'Bello' given-names: 'Susan M' - family-names: 'Bizon' given-names: 'Chris' - family-names: 'Brush' given-names: 'Matthew' - family-names: 'Callahan' given-names: 'Tiffany J' - family-names: 'Chute' given-names: 'Christopher G' - family-names: 'Duncan' given-names: 'William D' - family-names: 'Evelo' given-names: 'Chris T' - family-names: 'Gabriel' given-names: 'Davera' - family-names: 'Graybeal' given-names: 'John' - family-names: 'Gray' given-names: 'Alasdair' - family-names: 'Gyori' given-names: 'Benjamin M' - family-names: 'Haendel' given-names: 'Melissa' - family-names: 'Harmse' given-names: 'Henriette' - family-names: 'Harris' given-names: 'Nomi L' - family-names: 'Harrow' given-names: 'Ian' - family-names: 'Hegde' given-names: 'Harshad B' - family-names: 'Hoyt' given-names: 'Amelia L' - family-names: 'Hoyt' given-names: 'Charles T' - family-names: 'Jiao' given-names: 'Dazhi' - family-names: 'Jiménez-Ruiz' given-names: 'Ernesto' - family-names: 'Jupp' given-names: 'Simon' - family-names: 'Kim' given-names: 'Hyeongsik' - family-names: 'Koehler' given-names: 'Sebastian' - family-names: 'Liener' given-names: 'Thomas' - family-names: 'Long' given-names: 'Qinqin' - family-names: 'Malone' given-names: 'James' - family-names: 'McLaughlin' given-names: 'James A' - family-names: 'McMurry' given-names: 'Julie A' - family-names: 'Moxon' given-names: 'Sierra' - family-names: 'Munoz-Torres' given-names: 'Monica C' - family-names: 'Osumi-Sutherland' given-names: 'David' - family-names: 'Overton' given-names: 'James A' - family-names: 'Peters' given-names: 'Bjoern' - family-names: 'Putman' given-names: 'Tim' - family-names: 'Queralt-Rosinach' given-names: 'Núria' - family-names: 'Shefchek' given-names: 'Kent' - family-names: 'Solbrig' given-names: 'Harold' - family-names: 'Thessen' given-names: 'Anne' - family-names: 'Tudorache' given-names: 'Tania' - family-names: 'Vasilevsky' given-names: 'Nicole' - family-names: 'Wagner' given-names: 'Alex H' - family-names: 'Mungall' given-names: 'Christopher J' doi: '10.1093/database/baac035' identifiers: - type: 'doi' value: '10.1093/database/baac035' - type: 'url' value: 'http://dx.doi.org/10.1093/database/baac035' - type: 'other' value: 'urn:issn:1758-0463' title: 'A Simple Standard for Sharing Ontological Mappings (SSSOM)' url: 'http://dx.doi.org/10.1093/database/baac035' ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by [contacting the project team](contact.md). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This code of conduct has been derived from the excellent code of conduct of the [ATOM project](https://github.com/atom/atom/blob/master/CODE_OF_CONDUCT.md) which in turn is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://contributor-covenant.org/version/1/4][version] [homepage]: https://contributor-covenant.org [version]: https://contributor-covenant.org/version/1/4/ ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to SSSOM :+1: First of all: Thank you for taking the time to contribute! The following is a set of guidelines for contributing to SSSOM. They are derived from the excellent contribution guidelines for the [ATOM Editor](https://github.com/atom/atom/blob/master/CONTRIBUTING.md) and are mostly guidelines, not rules. Use your best judgment, and feel free to propose changes to this document in a pull request. #### Table Of Contents [Code of Conduct](#code-of-conduct) [I don't want to read this whole thing, I just have a question!!!](#i-dont-want-to-read-this-whole-thing-i-just-have-a-question) [What should I know before I get started?](#what-should-i-know-before-i-get-started) [How Can I Contribute?](#how-can-i-contribute) - [Reporting Bugs](#reporting-bugs) - [Your First Code Contribution](#your-first-code-contribution) - [Pull Requests](#pull-requests) - [Local Testing](#local-testing) - [Making a release](#making-a-release) [Style Guides](#styleguides) - [Git Commit Messages](#git-commit-messages) - [Documentation Styleguide](#documentation-styleguide) [Additional Notes](#additional-notes) - [Issue and Pull Request Labels](#issue-and-pull-request-labels) ## Code of Conduct This project and everyone participating in it is governed by the [SSSOM Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [a member of the SSSOM core team](https://mapping-commons.github.io/sssom/contact/). ## I don't want to read this whole thing I just have a question!!! We have an official message board with a detailed FAQ and where the community chimes in with helpful advice if you have questions. - [GitGub Discussions](https://github.com/mapping-commons/sssom/discussions) - [SSSOM FAQ](https://mapping-commons.github.io/sssom/faq/) ## What should I know before I get started? - Read the [introduction](https://mapping-commons.github.io/sssom/introduction/) - Do the [SSSOM tutorial](https://mapping-commons.github.io/sssom/tutorial/) - Read about the [SSSOM toolkit](https://mapping-commons.github.io/sssom-py), which is managed [in a different repo](https://github.com/mapping-commons/sssom-py) ## How Can I Contribute? ### Reporting Bugs This section guides you through submitting a bug report for SSSOM. Following these guidelines helps maintainers and the community understand your report :pencil:, reproduce the behavior :computer: :computer:, and find related reports :mag_right:. Before creating bug reports, please check [this list](#before-submitting-a-bug-report) as you might find out that you don't need to create one. When you are creating a bug report, please include as many details as possible. Wherever available, use [existing issue tracker templates](https://github.com/mapping-commons/sssom/issues/new/choose), the information it asks for helps us resolve issues faster. > **Note:** If you find a **Closed** issue that seems like it is the same thing > that you're experiencing, open a new issue and include a link to the original > issue in the body of your new one. #### Before Submitting A Bug Report - **Check the [discussions](https://github.com/mapping-commons/sssom/discussions)** for a list of common questions and problems. - **Decide whether the issue should be reported in the tracker for the [SSSOM data model](https://github.com/mapping-commons/sssom/issues) or the tracker for the [SSSOM toolkit](https://github.com/mapping-commons/sssom-py/issues)**. - **Perform a [cursory search](https://github.com/mapping-commons/sssom/issues)** to see if the problem has already been reported. If it has **and the issue is still open**, add a comment to the existing issue instead of opening a new one. #### How Do I Submit A (Good) Bug Report or Feature request? Bugs and feature requests are tracked as [GitHub issues](https://guides.github.com/features/issues/). After you've determined which repository your bug or feature is related to, create an issue on that repository providing the information required by [the appropriate template](https://github.com/mapping-commons/sssom/issues/new/choose). Explain the problem and include additional details to help maintainers reproduce the problem: - **Use a clear and descriptive title** for the issue to identify the problem/requests. - **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you started SSSOM, e.g. which command exactly you used in the terminal, or how you started SSSOM otherwise. When listing steps, **don't just say what you did, but explain how you did it**. For example, if you moved the cursor to the end of a line, explain if you used the mouse, or a keyboard shortcut or an SSSOM command, and if so which one? - **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines). - **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior. - **Explain which behavior you expected to see instead and why.** Include details about your configuration and environment: - **Which version of SSSOM toolkit/model are you using?** You can get the exact version by running `sssom --version` in your terminal - **What's the name and version of the OS you're using**? ### Your First Code Contribution Unsure where to begin contributing to SSSOM? You can start by looking through these `beginner` and `help-wanted` issues: - [Beginner issues][beginner] - issues which should only require a few lines of code, and a test or two. - [Help wanted issues][help-wanted] - issues which should be a bit more involved than `beginner` issues. ### Considerations when proposing changes to the model Now that SSSOM 1.0 has been released, and until we start working on a hypothetical SSSOM 2.0, any proposed change to the SSSOM model must consider the issue of backwards compatibility. The key point is that _a set that is compliant with version 1.0 of the specification must be usable “as is” with an implementation compliant with any 1.x version_. This is automatically achieved if all the proposed changes do is _adding_ new _optional_ slots, or _new_ enumeration values. For that reason, it is strongly recommended that evolution of the 1.x branch be limited to this type of changes only, and that other changes be reserved for a hypothetical version 2.0. In addition, new slots must be marked with a `added_in` annotation indicating the version in which the slot will be introduced, as in the following example: ```yaml my_new_slot: instantiates: - sssom:Versionable annotations: added_in: "1.1" ``` ### Pull Requests The process described here has several goals: - Maintain SSSOM's quality - Fix problems that are important to users - Engage the community in working toward the best possible data model and toolkit - Enable a sustainable system for SSSOM's maintainers to review contributions Please follow these steps to have your contribution considered by the maintainers: 1. Follow all instructions in the pull request template (you will see them when you open a pull request). 2. Follow the [style guides](#styleguides) 3. After you submit your pull request, verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing
What if the status checks are failing?If a status check is failing, and you believe that the failure is unrelated to your change, please leave a comment on the pull request explaining why you believe the failure is unrelated. A maintainer will re-run the status check for you. If we conclude that the failure was a false positive, then we will open an issue to track that problem with our status check suite.
While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted. ### Local testing Contributors are strongly advised to run the test suite locally even before submitting a pull request. Prepare the testing environment by running: ```console $ make install ``` This only needs to be run once after cloning the repository. After that, the test suite can be run anytime with ```console $ make test ``` If you are making a change to the documentation/specification, you should also check how your changes are rendered, by running ```console $ make serve ``` and opening `http://127.0.0.1:8000/sssom/` with your browser. Furthermore, any change to the LinkML model should also be tested against SSSOM-Py. To do so: 1. In the current `sssom` repository, build the Python files derived from the LinkML: ```console $ make all ``` 2. Clone the SSSOM-Py repository somewhere (outside your current checkout of `sssom`): ```console $ git clone https://github.com/mapping-commons/sssom-py.git ``` 3. Initialize environment inside the newly cloned repository: ```console $ uv sync ``` 4. Forcefully install your _local_ version of `sssom-schema` in the newly initialized environment: ```sh $ uv pip install /path/to/your/sssom/repository ``` You may get a warning about “incompatible sssom-schema versions”; this is due to the fact that your local copy of `sssom` has a version number set to `0.0.0` (the “real” version number is set at release time, when the package is published to PyPI) and can be safely ignored. 5. Run SSSOM-Py’s test suite: ```sh $ uv run --all-extras pytest ``` ### Making a release Before making a release, check that all the files that are derived from the LinkML schema are up-to-date. If they are not: - re-generate them by running `make all`; - commit all the files that were modified as a result of that command. Update any other file as needed (e.g. changelog, README, copyright notices, etc.), and commit the corresponding changes. Once the repository is ready for a release, tag the head commit of the main branch with a version number tag. If the release is intended to be published on the Python Package Index (which it normally should), the version tag MUST be of the form `vVERSION` (`v` prefix followed by the actual intended version number). Push all changes to the main GitHub repository (including the tag), and create the release from the new tag. Be careful that any new release that bumps either the major or the minor version number, and that is not a _pre_ release, will be interpreted not merely as a new version of the `sssom_schema` Python package, but as a new version of the SSSOM specification! ## Styleguides ### Git Commit Messages - Use the present tense ("Add feature" not "Added feature") - Use the imperative mood ("Move cursor to..." not "Moves cursor to...") - Limit the first line to 72 characters or less - Reference issues and pull requests liberally after the first line - Consider starting the commit message with an applicable emoji: - :art: `:art:` when improving the format/structure of the code - :racehorse: `:racehorse:` when improving performance - :non-potable_water: `:non-potable_water:` when plugging memory leaks - :memo: `:memo:` when writing docs - :penguin: `:penguin:` when fixing something on Linux - :apple: `:apple:` when fixing something on macOS - :checkered_flag: `:checkered_flag:` when fixing something on Windows - :bug: `:bug:` when fixing a bug - :fire: `:fire:` when removing code or files - :green_heart: `:green_heart:` when fixing the CI build - :white_check_mark: `:white_check_mark:` when adding tests - :lock: `:lock:` when dealing with security - :arrow_up: `:arrow_up:` when upgrading dependencies - :arrow_down: `:arrow_down:` when downgrading dependencies - :shirt: `:shirt:` when removing linter warnings ### Documentation Styleguide - Use [Markdown](https://daringfireball.net/projects/markdown). ## Additional Notes ### Issue and Pull Request Labels This section lists the labels we use to help us track and manage issues and pull requests. Most labels are used across all mapping commons repositories. #### Type of Issue and Issue State | Label name | `mapping-commons/sssom` :mag_right: | `sssom`‑org :mag_right: | Description | | ------------------------- | --------------------------------------------------------- | ------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------- | | `enhancement` | [search][search-sssom-repo-label-enhancement] | [search][search-mapping-commons-org-label-enhancement] | Feature requests. | | `bug` | [search][search-sssom-repo-label-bug] | [search][search-mapping-commons-org-label-bug] | Confirmed bugs or reports that are very likely to be bugs. | | `question` | [search][search-sssom-repo-label-question] | [search][search-mapping-commons-org-label-question] | Questions more than bug reports or feature requests (e.g. how do I do X). | | `feedback` | [search][search-sssom-repo-label-feedback] | [search][search-mapping-commons-org-label-feedback] | General feedback more than bug reports or feature requests. | | `help-wanted` | [search][search-sssom-repo-label-help-wanted] | [search][search-mapping-commons-org-label-help-wanted] | The SSSOM core team would appreciate help from the community in resolving these issues. | | `beginner` | [search][search-sssom-repo-label-beginner] | [search][search-mapping-commons-org-label-beginner] | Less complex issues which would be good first issues to work on for users who want to contribute to SSSOM. | | `more-information-needed` | [search][search-sssom-repo-label-more-information-needed] | [search][search-mapping-commons-org-label-more-information-needed] | More information needs to be collected about these problems or feature requests (e.g. steps to reproduce). | | `needs-reproduction` | [search][search-sssom-repo-label-needs-reproduction] | [search][search-mapping-commons-org-label-needs-reproduction] | Likely bugs, but haven't been reliably reproduced. | | `blocked` | [search][search-sssom-repo-label-blocked] | [search][search-mapping-commons-org-label-blocked] | Issues blocked on other issues. | | `duplicate` | [search][search-sssom-repo-label-duplicate] | [search][search-mapping-commons-org-label-duplicate] | Issues which are duplicates of other issues, i.e. they have been reported before. | | `wontfix` | [search][search-sssom-repo-label-wontfix] | [search][search-mapping-commons-org-label-wontfix] | The SSSOM core team has decided not to fix these issues for now, either because they're working as intended or for some other reason. | [search-sssom-repo-label-enhancement]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aenhancement [search-mapping-commons-org-label-enhancement]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aenhancement [search-sssom-repo-label-bug]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Abug [search-mapping-commons-org-label-bug]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Abug [search-sssom-repo-label-question]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aquestion [search-mapping-commons-org-label-question]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aquestion [search-sssom-repo-label-feedback]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Afeedback [search-mapping-commons-org-label-feedback]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Afeedback [search-sssom-repo-label-help-wanted]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Ahelp-wanted [search-mapping-commons-org-label-help-wanted]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Ahelp-wanted [search-sssom-repo-label-beginner]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Abeginner [search-mapping-commons-org-label-beginner]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Abeginner [search-sssom-repo-label-more-information-needed]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Amore-information-needed [search-mapping-commons-org-label-more-information-needed]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Amore-information-needed [search-sssom-repo-label-needs-reproduction]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aneeds-reproduction [search-mapping-commons-org-label-needs-reproduction]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aneeds-reproduction [search-sssom-repo-label-triage-help-needed]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Atriage-help-needed [search-mapping-commons-org-label-triage-help-needed]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Atriage-help-needed [search-sssom-repo-label-windows]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Awindows [search-mapping-commons-org-label-windows]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Awindows [search-sssom-repo-label-linux]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Alinux [search-mapping-commons-org-label-linux]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Alinux [search-sssom-repo-label-mac]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Amac [search-mapping-commons-org-label-mac]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Amac [search-sssom-repo-label-documentation]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Adocumentation [search-mapping-commons-org-label-documentation]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Adocumentation [search-sssom-repo-label-performance]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aperformance [search-mapping-commons-org-label-performance]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aperformance [search-sssom-repo-label-security]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Asecurity [search-mapping-commons-org-label-security]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Asecurity [search-sssom-repo-label-ui]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aui [search-mapping-commons-org-label-ui]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aui [search-sssom-repo-label-api]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aapi [search-mapping-commons-org-label-api]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aapi [search-sssom-repo-label-crash]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Acrash [search-mapping-commons-org-label-crash]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Acrash [search-sssom-repo-label-auto-indent]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aauto-indent [search-mapping-commons-org-label-auto-indent]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aauto-indent [search-sssom-repo-label-encoding]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aencoding [search-mapping-commons-org-label-encoding]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aencoding [search-sssom-repo-label-network]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Anetwork [search-mapping-commons-org-label-network]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Anetwork [search-sssom-repo-label-uncaught-exception]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Auncaught-exception [search-mapping-commons-org-label-uncaught-exception]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Auncaught-exception [search-sssom-repo-label-git]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Agit [search-mapping-commons-org-label-git]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Agit [search-sssom-repo-label-blocked]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Ablocked [search-mapping-commons-org-label-blocked]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Ablocked [search-sssom-repo-label-duplicate]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aduplicate [search-mapping-commons-org-label-duplicate]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aduplicate [search-sssom-repo-label-wontfix]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Awontfix [search-mapping-commons-org-label-wontfix]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Awontfix [search-sssom-repo-label-invalid]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Ainvalid [search-mapping-commons-org-label-invalid]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Ainvalid [search-sssom-repo-label-package-idea]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Apackage-idea [search-mapping-commons-org-label-package-idea]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Apackage-idea [search-sssom-repo-label-wrong-repo]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Awrong-repo [search-mapping-commons-org-label-wrong-repo]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Awrong-repo [search-sssom-repo-label-editor-rendering]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aeditor-rendering [search-mapping-commons-org-label-editor-rendering]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aeditor-rendering [search-sssom-repo-label-build-error]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Abuild-error [search-mapping-commons-org-label-build-error]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Abuild-error [search-sssom-repo-label-error-from-pathwatcher]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aerror-from-pathwatcher [search-mapping-commons-org-label-error-from-pathwatcher]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aerror-from-pathwatcher [search-sssom-repo-label-error-from-save]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aerror-from-save [search-mapping-commons-org-label-error-from-save]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aerror-from-save [search-sssom-repo-label-error-from-open]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aerror-from-open [search-mapping-commons-org-label-error-from-open]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aerror-from-open [search-sssom-repo-label-installer]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Ainstaller [search-mapping-commons-org-label-installer]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Ainstaller [search-sssom-repo-label-auto-updater]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Aauto-updater [search-mapping-commons-org-label-auto-updater]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aauto-updater [search-sssom-repo-label-deprecation-help]: https://github.com/search?q=is%3Aopen+is%3Aissue+repo%3Amapping-commons%2Fsssom+label%3Adeprecation-help [search-mapping-commons-org-label-deprecation-help]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Adeprecation-help [search-sssom-repo-label-electron]: https://github.com/search?q=is%3Aissue+repo%3Amapping-commons%2Fsssom+is%3Aopen+label%3Aelectron [search-mapping-commons-org-label-electron]: https://github.com/search?q=is%3Aopen+is%3Aissue+user%3Amapping-commons+label%3Aelectron [search-sssom-repo-label-work-in-progress]: https://github.com/search?q=is%3Aopen+is%3Apr+repo%3Amapping-commons%2Fsssom+label%3Awork-in-progress [search-mapping-commons-org-label-work-in-progress]: https://github.com/search?q=is%3Aopen+is%3Apr+user%3Amapping-commons+label%3Awork-in-progress [search-sssom-repo-label-needs-review]: https://github.com/search?q=is%3Aopen+is%3Apr+repo%3Amapping-commons%2Fsssom+label%3Aneeds-review [search-mapping-commons-org-label-needs-review]: https://github.com/search?q=is%3Aopen+is%3Apr+user%3Amapping-commons+label%3Aneeds-review [search-sssom-repo-label-under-review]: https://github.com/search?q=is%3Aopen+is%3Apr+repo%3Amapping-commons%2Fsssom+label%3Aunder-review [search-mapping-commons-org-label-under-review]: https://github.com/search?q=is%3Aopen+is%3Apr+user%3Amapping-commons+label%3Aunder-review [search-sssom-repo-label-requires-changes]: https://github.com/search?q=is%3Aopen+is%3Apr+repo%3Amapping-commons%2Fsssom+label%3Arequires-changes [search-mapping-commons-org-label-requires-changes]: https://github.com/search?q=is%3Aopen+is%3Apr+user%3Amapping-commons+label%3Arequires-changes [search-sssom-repo-label-needs-testing]: https://github.com/search?q=is%3Aopen+is%3Apr+repo%3Amapping-commons%2Fsssom+label%3Aneeds-testing [search-mapping-commons-org-label-needs-testing]: https://github.com/search?q=is%3Aopen+is%3Apr+user%3Amapping-commons+label%3Aneeds-testing [beginner]: https://github.com/search?utf8=%E2%9C%93&q=is%3Aopen+is%3Aissue+label%3Abeginner+label%3Ahelp-wanted+user%3Amapping-commons+sort%3Acomments-desc [help-wanted]: https://github.com/search?q=is%3Aopen+is%3Aissue+label%3Ahelp-wanted+user%3Amapping-commons+sort%3Acomments-desc+-label%3Abeginner [contributing-to-official-sssom-packages]: https://flight-manual.sssom.io/hacking-sssom/sections/contributing-to-official-sssom-packages/ [hacking-on-sssom-core]: https://flight-manual.sssom.io/hacking-sssom/sections/hacking-on-sssom-core/ ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2022, Nico Matentzoglu All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ MAKEFLAGS += --warn-undefined-variables SHELL := bash .SHELLFLAGS := -eu -o pipefail -c .DEFAULT_GOAL := help .DELETE_ON_ERROR: .SUFFIXES: .SECONDARY: RUN = uv run # get values from about.yaml file SCHEMA_NAME = sssom_schema SOURCE_SCHEMA_PATH = src/sssom_schema/schema/sssom_schema.yaml SRC = src DEST = project PYMODEL = $(SRC)/$(SCHEMA_NAME)/datamodel DOCDIR = docs TEMPLATE_DIR = $(SRC)/doc-templates # basename of a YAML file in model/ .PHONY: all clean help: status @echo "" @echo "make all -- makes site locally" @echo "make install -- install dependencies" @echo "make setup -- initial setup" @echo "make test -- runs tests" @echo "make testdoc -- builds docs and runs local test server" @echo "make deploy -- deploys site" @echo "make update -- updates linkml version" @echo "make help -- show this help" @echo "" status: check-config @echo "Project: $(SCHEMA_NAME)" @echo "Source: $(SOURCE_SCHEMA_PATH)" setup: install gen-project gendoc git-init-add install: uv sync .PHONY: install all: gen-project gendoc gen-excel get-context %.yaml: gen-project deploy: all mkd-gh-deploy # generates all project files gen-project: $(PYMODEL) $(RUN) gen-project \ --exclude owl \ -d $(DEST) $(SOURCE_SCHEMA_PATH) && mv $(DEST)/*.py $(PYMODEL) test: validate-schema $(RUN) gen-project \ --exclude owl \ -d tmp $(SOURCE_SCHEMA_PATH) $(RUN) pytest validate-schema: $(SOURCE_SCHEMA_PATH) $(RUN) linkml lint --validate --validate-only $< check-config: @(grep my-datamodel about.yaml > /dev/null && printf "\n**Project not configured**:\n\n - Remember to edit 'about.yaml'\n\n" || exit 0) convert-examples-to-%: $(patsubst %, $(RUN) linkml-convert % -s $(SOURCE_SCHEMA_PATH) -C Person, $(shell find src/data/examples -name "*.yaml")) get-context: mkdir -p $(SRC)/$(SCHEMA_NAME)/context cp $(DEST)/jsonld/* $(SRC)/$(SCHEMA_NAME)/context examples/%.yaml: src/data/examples/%.yaml $(RUN) linkml-convert -s $(SOURCE_SCHEMA_PATH) -C Person $< -o $@ examples/%.json: src/data/examples/%.yaml $(RUN) linkml-convert -s $(SOURCE_SCHEMA_PATH) -C Person $< -o $@ examples/%.ttl: src/data/examples/%.yaml $(RUN) linkml-convert -P EXAMPLE=http://example.org/ -s $(SOURCE_SCHEMA_PATH) -C Person $< -o $@ upgrade: uv add linkml --upgrade-package linkml # Test documentation locally serve: mkd-serve # Python datamodel $(PYMODEL): mkdir -p $@ $(DOCDIR): mkdir -p $@ gendoc: $(DOCDIR) cp -rf $(SRC)/docs/* $(DOCDIR) ; \ $(RUN) jinjanate $(SRC)/doc-templates/frontpage.md.jinja2 $(SOURCE_SCHEMA_PATH) -o $(DOCDIR)/index.md $(RUN) gen-doc -d $(DOCDIR) $(SOURCE_SCHEMA_PATH) --template-directory $(TEMPLATE_DIR) --index-name linkml-index testdoc: gendoc serve MKDOCS = $(RUN) --all-groups mkdocs mkd-%: $(MKDOCS) $* deploy-doc: $(RUN) mike deploy --push dev PROJECT_FOLDERS = sqlschema shex shacl protobuf prefixmap owl jsonschema jsonld graphql excel git-init-add: git-init git-add git-commit git-status git-init: git init git-add: git add .gitignore .github Makefile LICENSE *.md examples utils about.yaml mkdocs.yml uv.lock project.Makefile pyproject.toml src/linkml/*yaml src/*/datamodel/*py src/data git add $(patsubst %, project/%, $(PROJECT_FOLDERS)) git-commit: git commit -m 'Initial commit' -a git-status: git status clean: rm -rf $(DEST) rm -rf tmp yaml-lint-all: npx --yes prettier --check --prose-wrap always --write "**/*.yaml" npx --yes prettier --check --prose-wrap always --write "**/*.yml" include project.Makefile ================================================ FILE: README.md ================================================ # A Simple Standard for Sharing Ontological Mappings (SSSOM) SSSOM is a Simple Standard for Sharing Ontological Mappings, providing 1. a TSV-based representation for ontology term mappings 1. a comprehensive set of standard metadata elements to describe mappings and 1. a standard translation between the TSV and the Web Ontology Language (OWL). The SSSOM TSV format in particular is geared towards the needs of the wider bioinformatics community as a way to safely exchange mappings in an easily readable yet semantically well-specified manner. Consider this example of a simple mapping file: | subject_id | predicate_id | object_id | mapping_justification | subject_label | object_label | | --- | --- | --- | --- | --- | --- | | HP:0009124 | skos:exactMatch | MP:0000003 | semapv:LexicalMatching | Abnormal adipose tissue morphology | abnormal adipose tissue morphology | | HP:0008551 | skos:exactMatch | MP:0000018 | semapv:LexicalMatching | Microtia | small ears | | HP:0000411 | skos:exactMatch | MP:0000021 | semapv:LexicalMatching | Protruding ear | prominent ears | SSSOM specifies all its metadata elements: - subject_id - predicate_id - object_id - mapping_justification (*NOTE: Since June 2022* `match_type` is being replaced by `mapping_justification` see [here](https://github.com/mapping-commons/sssom/issues/150)) - subject_label - object_label including clear definitions, examples of use and controlled vocabulary where necessary, along with 30 other optional metadata elements to provide additional provenance. SSSOM further provides a standard way to - augment the TSV file with mapping set - level metadata, such as creator_id, mapping_date or license and - translate a SSSOM compliant TSV files into _OWL reified axioms_. This will allow the easy loading, and merging of SSSOM mapping tables into existing ontologies using standard tools such as ROBOT (under development). Note that SSSOM is currently under development and subject to change. Please leave us a comment on the [issue tracker](https://github.com/OBOFoundry/SSSOM/issues) if you want to be involved. The full specification can be found [here](https://w3id.org/sssom/spec). ## Citation If you have found SSSOM to be helpful in your work, please consider citing: Nicolas Matentzoglu, James P Balhoff, Susan M Bello, Chris Bizon, Matthew Brush, Tiffany J Callahan, Christopher G Chute, William D Duncan, Chris T Evelo, Davera Gabriel, John Graybeal, Alasdair Gray, Benjamin M Gyori, Melissa Haendel, Henriette Harmse, Nomi L Harris, Ian Harrow, Harshad B Hegde, Amelia L Hoyt, Charles T Hoyt, Dazhi Jiao, Ernesto Jiménez-Ruiz, Simon Jupp, Hyeongsik Kim, Sebastian Koehler, Thomas Liener, Qinqin Long, James Malone, James A McLaughlin, Julie A McMurry, Sierra Moxon, Monica C Munoz-Torres, David Osumi-Sutherland, James A Overton, Bjoern Peters, Tim Putman, Núria Queralt-Rosinach, Kent Shefchek, Harold Solbrig, Anne Thessen, Tania Tudorache, Nicole Vasilevsky, Alex H Wagner, Christopher J Mungall, A Simple Standard for Sharing Ontological Mappings (SSSOM), Database, Volume 2022, 2022, baac035, https://doi.org/10.1093/database/baac035 ```bibtex @article{10.1093/database/baac035, author = {Matentzoglu, Nicolas and Balhoff, James P and Bello, Susan M and Bizon, Chris and Brush, Matthew and Callahan, Tiffany J and Chute, Christopher G and Duncan, William D and Evelo, Chris T and Gabriel, Davera and Graybeal, John and Gray, Alasdair and Gyori, Benjamin M and Haendel, Melissa and Harmse, Henriette and Harris, Nomi L and Harrow, Ian and Hegde, Harshad B and Hoyt, Amelia L and Hoyt, Charles T and Jiao, Dazhi and Jiménez-Ruiz, Ernesto and Jupp, Simon and Kim, Hyeongsik and Koehler, Sebastian and Liener, Thomas and Long, Qinqin and Malone, James and McLaughlin, James A and McMurry, Julie A and Moxon, Sierra and Munoz-Torres, Monica C and Osumi-Sutherland, David and Overton, James A and Peters, Bjoern and Putman, Tim and Queralt-Rosinach, Núria and Shefchek, Kent and Solbrig, Harold and Thessen, Anne and Tudorache, Tania and Vasilevsky, Nicole and Wagner, Alex H and Mungall, Christopher J}, title = "{A Simple Standard for Sharing Ontological Mappings (SSSOM)}", journal = {Database}, volume = {2022}, year = {2022}, month = {05}, abstract = "{Despite progress in the development of standards for describing and exchanging scientific information, the lack of easy-to-use standards for mapping between different representations of the same or similar objects in different databases poses a major impediment to data integration and interoperability. Mappings often lack the metadata needed to be correctly interpreted and applied. For example, are two terms equivalent or merely related? Are they narrow or broad matches? Or are they associated in some other way? Such relationships between the mapped terms are often not documented, which leads to incorrect assumptions and makes them hard to use in scenarios that require a high degree of precision (such as diagnostics or risk prediction). Furthermore, the lack of descriptions of how mappings were done makes it hard to combine and reconcile mappings, particularly curated and automated ones. We have developed the Simple Standard for Sharing Ontological Mappings (SSSOM) which addresses these problems by: (i) Introducing a machine-readable and extensible vocabulary to describe metadata that makes imprecision, inaccuracy and incompleteness in mappings explicit. (ii) Defining an easy-to-use simple table-based format that can be integrated into existing data science pipelines without the need to parse or query ontologies, and that integrates seamlessly with Linked Data principles. (iii) Implementing open and community-driven collaborative workflows that are designed to evolve the standard continuously to address changing requirements and mapping practices. (iv) Providing reference tools and software libraries for working with the standard. In this paper, we present the SSSOM standard, describe several use cases in detail and survey some of the existing work on standardizing the exchange of mappings, with the goal of making mappings Findable, Accessible, Interoperable and Reusable (FAIR). The SSSOM specification can be found at http://w3id.org/sssom/spec.Database URL: http://w3id.org/sssom/spec}", issn = {1758-0463}, doi = {10.1093/database/baac035}, url = {https://doi.org/10.1093/database/baac035}, note = {baac035}, eprint = {https://academic.oup.com/database/article-pdf/doi/10.1093/database/baac035/43832024/baac035.pdf}, } ``` A [second report with updates since the primary SSSOM publication](https://ceur-ws.org/Vol-3324/om2022_LTpaper6.pdf) above was published as part of the proceedings of the Ontology Matching Workshop 2022. ## Copying SSSOM is distributed under the terms of the 3-clause BSD license, as included in the [LICENSE](LICENSE) file of the source distribution. By exception, the following files are _not_ covered by the 3-clause BSD license: * [sssom-banner.png](src/docs/images/sssom-banner.png): That file may only be used by members of the internal Monarch team and collaborators on Monarch flagship products. ================================================ FILE: SSSOM.md ================================================ This page has moved here: https://mapping-commons.github.io/sssom/spec/ ================================================ FILE: about.yaml ================================================ name: sssom_schema description: A Simple Standard for Sharing Ontology Mappings (SSSOM) source_schema_path: src/sssom_schema/schema/sssom_schema.yaml ================================================ FILE: examples/README.md ================================================ # Examples of use of sssom This folder contains example data conforming to sssom The source for these is in [src/data](../src/data/examples) ================================================ FILE: examples/embedded/foodie-inc-2022-05-01.sssom.tsv ================================================ # comment: We could map to FOODON:00004187 instead which more specifically refers to # 'raw' Pink apples. Decided against to be consistent with other mapping choices. # curie_map: # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.inc/food/ # orcid: https://orcid.org/ # owl: http://www.w3.org/2002/07/owl# # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# # rdfs: http://www.w3.org/2000/01/rdf-schema# # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # sssom: https://w3id.org/sssom/ # wikidata: https://www.wikidata.org/wiki/ # license: https://creativecommons.org/licenses/by/4.0/ # mapping_date: '2022-05-02' # mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food # and nutrition database with Food Ontology (FOODON). Intended to be used for ontological # analysis and grouping of KEWL FOODIE INC related data. # mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv # mapping_set_version: '2022-05-01' # object_source: wikidata:Q55118395 # object_source_version: http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl # subject_source: KF_FOOD:DB subject_id subject_label predicate_id object_id object_label mapping_justification author_id object_source_version mapping_date confidence comment KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 KF_FOOD:F003 pink skos:exactMatch FOODON:00004187 Pink apple (whole, raw) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." KF_FOOD:F004 braeburn skos:exactMatch sssom:NoMapping semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 ================================================ FILE: examples/embedded/mp-hp-exact-0.0.1.sssom.tsv ================================================ # curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # owl: http://www.w3.org/2002/07/owl# # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# # rdfs: http://www.w3.org/2000/01/rdf-schema# # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # sssom: https://w3id.org/sssom/ # license: https://creativecommons.org/publicdomain/zero/1.0/ # mapping_provider: http://purl.obolibrary.org/obo/upheno.owl # mapping_set_id: https://w3id.org/sssom/mappings/27f85fe9-8a72-4e76-909b-7ba4244d9ede subject_id subject_label predicate_id object_id object_label mapping_justification HP:0000175 Cleft palate skos:exactMatch MP:0000111 cleft palate semapv:LexicalMatching HP:0000252 Microcephaly skos:exactMatch MP:0000433 microcephaly semapv:LexicalMatching HP:0000260 Wide anterior fontanel skos:exactMatch MP:0000085 large anterior fontanelle semapv:LexicalMatching HP:0000375 Abnormal cochlea morphology skos:exactMatch MP:0000031 abnormal cochlea morphology semapv:LexicalMatching HP:0000411 Protruding ear skos:exactMatch MP:0000021 prominent ears semapv:LexicalMatching HP:0000822 Hypertension skos:exactMatch MP:0000231 hypertension semapv:LexicalMatching HP:0001336 Myoclonus skos:exactMatch MP:0000243 myoclonus semapv:LexicalMatching HP:0001363 Craniosynostosis skos:exactMatch MP:0000081 premature cranial suture closure semapv:LexicalMatching HP:0001596 Alopecia skos:exactMatch MP:0000414 alopecia semapv:LexicalMatching HP:0001627 Abnormal heart morphology skos:exactMatch MP:0000266 abnormal heart morphology semapv:LexicalMatching HP:0001633 Abnormal mitral valve morphology skos:exactMatch MP:0000286 abnormal mitral valve morphology semapv:LexicalMatching HP:0001667 Right ventricular hypertrophy skos:exactMatch MP:0000276 heart right ventricle hypertrophy semapv:LexicalMatching HP:0001679 Abnormal aortic morphology skos:exactMatch MP:0000272 abnormal aorta morphology semapv:LexicalMatching HP:0001719 Double outlet right ventricle skos:exactMatch MP:0000284 double outlet right ventricle semapv:LexicalMatching HP:0001882 Leukopenia skos:exactMatch MP:0000221 decreased leukocyte cell number semapv:LexicalMatching HP:0001913 Granulocytopenia skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching HP:0001974 Leukocytosis skos:exactMatch MP:0000218 increased leukocyte cell number semapv:LexicalMatching HP:0001978 Extramedullary hematopoiesis skos:exactMatch MP:0000240 extramedullary hematopoiesis semapv:LexicalMatching HP:0001981 Schistocytosis skos:exactMatch MP:0000314 schistocytosis semapv:LexicalMatching HP:0002212 Curly hair skos:exactMatch MP:0000410 waved hair semapv:LexicalMatching HP:0002659 Increased susceptibility to fractures skos:exactMatch MP:0000061 fragile skeleton semapv:LexicalMatching HP:0002763 Abnormal cartilage morphology skos:exactMatch MP:0000163 abnormal cartilage morphology semapv:LexicalMatching HP:0003307 Hyperlordosis skos:exactMatch MP:0000162 lordosis semapv:LexicalMatching HP:0004349 Reduced bone mineral density skos:exactMatch MP:0000063 decreased bone mineral density semapv:LexicalMatching HP:0006288 Advanced eruption of teeth skos:exactMatch MP:0000122 premature tooth eruption semapv:LexicalMatching HP:0008551 Microtia skos:exactMatch MP:0000018 small ears semapv:LexicalMatching HP:0009124 Abnormal adipose tissue morphology skos:exactMatch MP:0000003 abnormal adipose tissue morphology semapv:LexicalMatching HP:0009910 Aplasia of the middle ear ossicles skos:exactMatch MP:0000040 absent middle ear ossicles semapv:LexicalMatching HP:0009939 Mandibular aplasia skos:exactMatch MP:0000087 absent mandible semapv:LexicalMatching HP:0011002 Osteopetrosis skos:exactMatch MP:0000067 osteopetrosis semapv:LexicalMatching HP:0011457 Loss of eyelashes skos:exactMatch MP:0000425 loss of eyelid cilia semapv:LexicalMatching HP:0011897 Neutrophilia skos:exactMatch MP:0000219 increased neutrophil cell number semapv:LexicalMatching HP:0012234 Agranulocytosis skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching HP:0012543 Hemosiderinuria skos:exactMatch MP:0000327 hemosiderinuria semapv:LexicalMatching HP:0025065 Abnormal mean corpuscular volume skos:exactMatch MP:0000226 abnormal mean corpuscular volume semapv:LexicalMatching HP:0025084 Folliculitis skos:exactMatch MP:0000376 folliculitis semapv:LexicalMatching HP:0031377 Abnormal cell proliferation skos:exactMatch MP:0000350 abnormal cell proliferation semapv:LexicalMatching HP:0031851 Reduced hematocrit skos:exactMatch MP:0000208 decreased hematocrit semapv:LexicalMatching HP:0032310 Granulocytosis skos:exactMatch MP:0000322 increased granulocyte number semapv:LexicalMatching HP:0100629 Midline facial cleft skos:exactMatch MP:0000108 midline facial cleft semapv:LexicalMatching HP:0100671 Abnormal trabecular bone morphology skos:exactMatch MP:0000130 abnormal trabecular bone morphology semapv:LexicalMatching HP:0400001 Chin with vertical crease skos:exactMatch MP:0000114 cleft chin semapv:LexicalMatching ================================================ FILE: examples/external/example1.sssom.tsv ================================================ subject_id subject_label predicate_id object_id object_label mapping_justification author_id object_source_version mapping_date confidence comment KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 KF_FOOD:F003 pink skos:exactMatch FOODON:00004187 Pink apple (whole, raw) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." KF_FOOD:F004 braeburn skos:exactMatch sssom:NoMapping semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 ================================================ FILE: examples/external/example1.sssom.yml ================================================ comment: We could map to FOODON:00004187 instead which more specifically refers to 'raw' Pink apples. Decided against to be consistent with other mapping choices. curie_map: FOODON: http://purl.obolibrary.org/obo/FOODON_ KF_FOOD: https://kewl-foodie.inc/food/ orcid: https://orcid.org/ owl: http://www.w3.org/2002/07/owl# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# rdfs: http://www.w3.org/2000/01/rdf-schema# semapv: https://w3id.org/semapv/vocab/ skos: http://www.w3.org/2004/02/skos/core# sssom: https://w3id.org/sssom/ wikidata: https://www.wikidata.org/wiki/ license: https://creativecommons.org/licenses/by/4.0/ mapping_date: "2022-05-02" mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data. mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv mapping_set_version: "2022-05-01" object_source: wikidata:Q55118395 object_source_version: http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl subject_source: KF_FOOD:DB ================================================ FILE: examples/external/mp-hp-exact-0.0.1.sssom.tsv ================================================ subject_id subject_label predicate_id object_id object_label mapping_justification HP:0000175 Cleft palate skos:exactMatch MP:0000111 cleft palate semapv:LexicalMatching HP:0000252 Microcephaly skos:exactMatch MP:0000433 microcephaly semapv:LexicalMatching HP:0000260 Wide anterior fontanel skos:exactMatch MP:0000085 large anterior fontanelle semapv:LexicalMatching HP:0000375 Abnormal cochlea morphology skos:exactMatch MP:0000031 abnormal cochlea morphology semapv:LexicalMatching HP:0000411 Protruding ear skos:exactMatch MP:0000021 prominent ears semapv:LexicalMatching HP:0000822 Hypertension skos:exactMatch MP:0000231 hypertension semapv:LexicalMatching HP:0001336 Myoclonus skos:exactMatch MP:0000243 myoclonus semapv:LexicalMatching HP:0001363 Craniosynostosis skos:exactMatch MP:0000081 premature cranial suture closure semapv:LexicalMatching HP:0001596 Alopecia skos:exactMatch MP:0000414 alopecia semapv:LexicalMatching HP:0001627 Abnormal heart morphology skos:exactMatch MP:0000266 abnormal heart morphology semapv:LexicalMatching HP:0001633 Abnormal mitral valve morphology skos:exactMatch MP:0000286 abnormal mitral valve morphology semapv:LexicalMatching HP:0001667 Right ventricular hypertrophy skos:exactMatch MP:0000276 heart right ventricle hypertrophy semapv:LexicalMatching HP:0001679 Abnormal aortic morphology skos:exactMatch MP:0000272 abnormal aorta morphology semapv:LexicalMatching HP:0001719 Double outlet right ventricle skos:exactMatch MP:0000284 double outlet right ventricle semapv:LexicalMatching HP:0001882 Leukopenia skos:exactMatch MP:0000221 decreased leukocyte cell number semapv:LexicalMatching HP:0001913 Granulocytopenia skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching HP:0001974 Leukocytosis skos:exactMatch MP:0000218 increased leukocyte cell number semapv:LexicalMatching HP:0001978 Extramedullary hematopoiesis skos:exactMatch MP:0000240 extramedullary hematopoiesis semapv:LexicalMatching HP:0001981 Schistocytosis skos:exactMatch MP:0000314 schistocytosis semapv:LexicalMatching HP:0002212 Curly hair skos:exactMatch MP:0000410 waved hair semapv:LexicalMatching HP:0002659 Increased susceptibility to fractures skos:exactMatch MP:0000061 fragile skeleton semapv:LexicalMatching HP:0002763 Abnormal cartilage morphology skos:exactMatch MP:0000163 abnormal cartilage morphology semapv:LexicalMatching HP:0003307 Hyperlordosis skos:exactMatch MP:0000162 lordosis semapv:LexicalMatching HP:0004349 Reduced bone mineral density skos:exactMatch MP:0000063 decreased bone mineral density semapv:LexicalMatching HP:0006288 Advanced eruption of teeth skos:exactMatch MP:0000122 premature tooth eruption semapv:LexicalMatching HP:0008551 Microtia skos:exactMatch MP:0000018 small ears semapv:LexicalMatching HP:0009124 Abnormal adipose tissue morphology skos:exactMatch MP:0000003 abnormal adipose tissue morphology semapv:LexicalMatching HP:0009910 Aplasia of the middle ear ossicles skos:exactMatch MP:0000040 absent middle ear ossicles semapv:LexicalMatching HP:0009939 Mandibular aplasia skos:exactMatch MP:0000087 absent mandible semapv:LexicalMatching HP:0011002 Osteopetrosis skos:exactMatch MP:0000067 osteopetrosis semapv:LexicalMatching HP:0011457 Loss of eyelashes skos:exactMatch MP:0000425 loss of eyelid cilia semapv:LexicalMatching HP:0011897 Neutrophilia skos:exactMatch MP:0000219 increased neutrophil cell number semapv:LexicalMatching HP:0012234 Agranulocytosis skos:exactMatch MP:0000334 decreased granulocyte number semapv:LexicalMatching HP:0012543 Hemosiderinuria skos:exactMatch MP:0000327 hemosiderinuria semapv:LexicalMatching HP:0025065 Abnormal mean corpuscular volume skos:exactMatch MP:0000226 abnormal mean corpuscular volume semapv:LexicalMatching HP:0025084 Folliculitis skos:exactMatch MP:0000376 folliculitis semapv:LexicalMatching HP:0031377 Abnormal cell proliferation skos:exactMatch MP:0000350 abnormal cell proliferation semapv:LexicalMatching HP:0031851 Reduced hematocrit skos:exactMatch MP:0000208 decreased hematocrit semapv:LexicalMatching HP:0032310 Granulocytosis skos:exactMatch MP:0000322 increased granulocyte number semapv:LexicalMatching HP:0100629 Midline facial cleft skos:exactMatch MP:0000108 midline facial cleft semapv:LexicalMatching HP:0100671 Abnormal trabecular bone morphology skos:exactMatch MP:0000130 abnormal trabecular bone morphology semapv:LexicalMatching HP:0400001 Chin with vertical crease skos:exactMatch MP:0000114 cleft chin semapv:LexicalMatching ================================================ FILE: examples/external/mp-hp-exact-0.0.1.sssom.yml ================================================ curie_map: HP: http://purl.obolibrary.org/obo/HP_ MP: http://purl.obolibrary.org/obo/MP_ owl: http://www.w3.org/2002/07/owl# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# rdfs: http://www.w3.org/2000/01/rdf-schema# semapv: https://w3id.org/semapv/vocab/ skos: http://www.w3.org/2004/02/skos/core# sssom: https://w3id.org/sssom/ license: https://creativecommons.org/publicdomain/zero/1.0/ mapping_provider: http://purl.obolibrary.org/obo/upheno.owl mapping_set_id: https://w3id.org/sssom/mappings/27f85fe9-8a72-4e76-909b-7ba4244d9ede ================================================ FILE: examples/schema/cardinality-scope-empty.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ # NETENT: https://example.net/entities/ # ORGENT: https://example.org/entities/ # SRC: https://example.org/sources/ #mapping_set_id: https://example.org/sets/cardinality-scope-empty #license: https://creativecommons.org/licenses/by/4.0/ subject_id subject_label predicate_id object_id object_label mapping_justification object_source mapping_cardinality ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration SRC:com 1:n ORGENT:0001 alice skos:closeMatch NETENT:0111 alpha semapv:ManualMappingCuration SRC:net 1:n ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration SRC:com 1:n ORGENT:0002 bob skos:closeMatch NETENT:0112 bravo semapv:ManualMappingCuration SRC:net 1:n ORGENT:0007 gavin skos:closeMatch NETENT:0117 golf semapv:ManualMappingCuration SRC:net 1:n ORGENT:0007 gavin skos:exactMatch COMENT:0013 gamma semapv:ManualMappingCuration SRC:com 1:n ================================================ FILE: examples/schema/cardinality-scope-predicate+object_source.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ # NETENT: https://example.net/entities/ # ORGENT: https://example.org/entities/ # SRC: https://example.org/sources/ #mapping_set_id: https://example.org/sets/cardinality-scope-predicate+object_source #license: https://creativecommons.org/licenses/by/4.0/ #cardinality_scope: # - predicate_id # - object_source subject_id subject_label predicate_id object_id object_label mapping_justification object_source mapping_cardinality ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration SRC:com 1:1 ORGENT:0001 alice skos:closeMatch NETENT:0111 alpha semapv:ManualMappingCuration SRC:net 1:1 ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration SRC:com 1:1 ORGENT:0002 bob skos:closeMatch NETENT:0112 bravo semapv:ManualMappingCuration SRC:net 1:1 ORGENT:0007 gavin skos:closeMatch NETENT:0117 golf semapv:ManualMappingCuration SRC:net 1:1 ORGENT:0007 gavin skos:exactMatch COMENT:0013 gamma semapv:ManualMappingCuration SRC:com 1:1 ================================================ FILE: examples/schema/cardinality-scope-predicate.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ # NETENT: https://example.net/entities/ # ORGENT: https://example.org/entities/ # SRC: https://example.org/sources/ #mapping_set_id: https://example.org/sets/cardinality-scope-predicate #license: https://creativecommons.org/licenses/by/4.0/ #cardinality_scope: # - predicate_id subject_id subject_label predicate_id object_id object_label mapping_justification object_source mapping_cardinality ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration SRC:com 1:n ORGENT:0001 alice skos:closeMatch NETENT:0111 alpha semapv:ManualMappingCuration SRC:net 1:n ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration SRC:com 1:n ORGENT:0002 bob skos:closeMatch NETENT:0112 bravo semapv:ManualMappingCuration SRC:net 1:n ORGENT:0007 gavin skos:closeMatch NETENT:0117 golf semapv:ManualMappingCuration SRC:net 1:1 ORGENT:0007 gavin skos:exactMatch COMENT:0013 gamma semapv:ManualMappingCuration SRC:com 1:1 ================================================ FILE: examples/schema/cardinality-with-unmapped-entities.sssom.tsv ================================================ #curie_map: # OBJ: https://example.org/object/ # SRC: https://example.org/sources/ # SUBJ: https://example.org/subject/ #mapping_set_id: https://example.org/sets/cardinality-with-unmapped-entities #license: https://creativecommons.org/licenses/by/4.0/ subject_id predicate_id object_id mapping_justification subject_source object_source mapping_cardinality comment SUBJ:0001 skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration SRC:A SRC:B 1:0 S1 in vocabulary A has no exact match in vocabulary B SUBJ:0001 skos:closeMatch OBJ:0001 semapv:ManualMappingCuration SRC:A SRC:B 1:1 S1 mapped only to O1, O1 mapped only to S1 -- the record involving sssom:NoTermFound does not count, as it is an absence of match rather than an actual mapping sssom:NoTermFound skos:exactMatch OBJ:0002 semapv:ManualMappingCuration SRC:C SRC:D 0:1 O2 in vocabulary D has no exact match in vocabulary C sssom:NoTermFound skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration SRC:E SRC:F 0:0 No exact match between any term from vocabulary E and any term for vocabulary F (in other words, the two vocabularies are completely disjoint, at least as far as exact matches are considered) ================================================ FILE: examples/schema/cardinality.sssom.tsv ================================================ #curie_map: # OBJ: https://example.org/object/ # SUBJ: https://example.org/subject/ #mapping_set_id: https://example.org/sets/cardinality #license: https://creativecommons.org/licenses/by/4.0/ subject_id predicate_id object_id mapping_justification mapping_cardinality comment SUBJ:0001 skos:exactMatch OBJ:0001 semapv:LexicalMatching 1:1 S1 and O1 only mapped to each other SUBJ:0001 skos:exactMatch OBJ:0001 semapv:MappingReview 1:1 S1 and O1 only mapped to each other SUBJ:0002 skos:exactMatch OBJ:0002 semapv:LexicalMatching 1:n S2 mapped to both O2 and O3, O2 mapped only to S2 SUBJ:0002 skos:exactMatch OBJ:0003 semapv:LexicalMatching 1:n S2 mapped to both O2 and O3, O3 mapped only to S2 SUBJ:0003 skos:exactMatch OBJ:0004 semapv:LexicalMatching n:1 S3 and S4 both mapped to only O4 SUBJ:0004 skos:exactMatch OBJ:0004 semapv:LexicalMatching n:1 S3 and S4 both mapped to only O4 SUBJ:0005 skos:exactMatch OBJ:0005 semapv:LexicalMatching n:n S5 mapped to O5 and O6, O5 mapped to S5 and S6 SUBJ:0005 skos:exactMatch OBJ:0006 semapv:LexicalMatching 1:n S5 mapped to O5 and O6, O6 mapped only to S5 SUBJ:0006 skos:exactMatch OBJ:0005 semapv:LexicalMatching n:1 S6 mapped only to O5, O5 mapped to both S5 and S6 ================================================ FILE: examples/schema/composite-entities.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MONDO: http://purl.obolibrary.org/obo/MONDO_ # MP: http://purl.obolibrary.org/obo/MP_ # SCHEMA: http://example.org/schema #mapping_set_id: https://w3id.org/sssom/commons/examples/composite-entities.sssom.tsv #license: https://creativecommons.org/publicdomain/zero/1.0/ #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification subject_type SCHEMA:0001/(disease:'MONDO:0005148',phenotype:'HP:0009124') skos:exactMatch MP:0000003 semapv:ManualMappingCuration composed entity expression SCHEMA:0001/(disease:'MONDO:0005149',phenotype:'HP:0008551') skos:exactMatch MP:0000018 semapv:ManualMappingCuration composed entity expression SCHEMA:0001/(disease:'MONDO:0005150',phenotype:'HP:0000411') skos:exactMatch MP:0000018 semapv:ManualMappingCuration composed entity expression ================================================ FILE: examples/schema/curation_rule-propagated.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # DISEASE_MAPPING_COMMONS_RULES: https://w3id.org/sssom/commons/disease/curation-rules/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule.sssom.tsv #mapping_set_description: "This example illustrates how to express that all mappings in a mapping set have been curated according to a specific curation rule" #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #curation_rule: # - DISEASE_MAPPING_COMMONS_RULES:MPR2 #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration ================================================ FILE: examples/schema/curation_rule.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # DISEASE_MAPPING_COMMONS_RULES: https://w3id.org/sssom/commons/disease/curation-rules/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification curation_rule see_also HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration DISEASE_MAPPING_COMMONS_RULES:MPR2 https://github.com/mapping-commons/disease-mappings/issues/16 HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration DISEASE_MAPPING_COMMONS_RULES:MPR3 https://github.com/mapping-commons/disease-mappings/issues/16 HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration DISEASE_MAPPING_COMMONS_RULES:MPR3 https://github.com/mapping-commons/disease-mappings/issues/16 ================================================ FILE: examples/schema/curation_rule_text-propagated.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule.sssom.tsv #mapping_set_description: "This example illustrates how to express that all mappings in a mapping set have been curated according to a specific curation rule" #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #curation_rule_text: # - Human and mouse phenotypes that inhere in homologous structures and exhibit the same phenotypic quality are considered exact matches. #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration ================================================ FILE: examples/schema/curation_rule_text.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # DISEASE_MAPPING_COMMONS_RULES: https://w3id.org/sssom/commons/disease/curation-rules/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule_text.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification curation_rule_text see_also HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality https://github.com/mapping-commons/disease-mappings/issues/16 HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality https://github.com/mapping-commons/disease-mappings/issues/16 HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration The two phenotypes are associated with the exact same set of diseases https://github.com/mapping-commons/disease-mappings/issues/16 ================================================ FILE: examples/schema/curation_rule_text2.sssom.tsv ================================================ #curie_map: # WTO: http://purl.obolibrary.org/obo/WTO_ # CO321: "http://www.cropontology.org/rdf/CO_321:" # ror: https://ror.org/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curation_rule_text2.sssom.tsv #license: "https://www.etalab.gouv.fr/licence-ouverte-open-licence/" #comment: This is an example file for the SSSOM for illustration only. This example was extracted from a real mapping set where the subject source (WTO) is an ontology used to annotate text (e.g. scientific literature) and the object source (CO321) is an ontology used to annotate the traits evaluated from observational data. The objective of the alignment is to allow information retrieval from both textual and experimental phenotypic dataset. #creator_id: ror:02kvxyf05 #creator_label: "INRAE" subject_id subject_label predicate_id object_id object_label mapping_justification curation_rule_text comment WTO:0000304 cold resistance skos:closeMatch CO321:0000080 Cold tolerance semapv:ManualMappingCuration Rule 4: We consider that "tolerance" and "resistance" are almost equivalent when applied to abiotic environmental conditions. WTO:0000450 aluminium toxicity skos:closeMatch CO321:0000079 Aluminum tolerance semapv:ManualMappingCuration Rule 3: We consider that the user of the information retrieval function interested in plant traits related to metal toxicity (WTO) also wants to retrieve observational data measuring the plant tolerance to the same metal (CO_321). The rule metal + toxicity (WTO) <-> metal + tolerance (CO321) is valid for any kind of metal. WTO:0000065 anther extrusion skos:exactMatch CO321:0000982 Anther extrusion semapv:ManualMappingCuration WTO:0000296 aphid resistance skos:closeMatch CO321:0000085 Aphid damage semapv:ManualMappingCuration Rule 2: We consider that the user of the information retrieval function interested in plant traits related to damages caused by some animal, insect, nematode, etc. also wants to retrieve observational data mentioning resistance to the same living organism. WTO:0000281 Armyworm resistance skos:closeMatch CO321:0000086 Armyworm damage semapv:ManualMappingCuration Rule 2: We consider that the user of the information retrieval function interested in plant traits related to damages caused by some animal, insect, nematode, etc. also wants to retrieve observational data mentioning resistance to the same living organism. WTO:0000125 awn color skos:exactMatch CO321:0000960 Awn color semapv:ManualMappingCuration WTO:0000126 awn length skos:exactMatch CO321:0000026 Awn length semapv:ManualMappingCuration WTO:0000452 bacterial leaf blight resistance skos:closeMatch CO321:0000932 Bacterial leaf blight severity semapv:ManualMappingCuration Rule 1.3: We consider that the user of the information retrieval function, given a pathogen or a disease, would like to retrieve all data, independently of the way the affection is observed. In observational data, a severity score is represented by two digits representing the vertical disease progress and an estimate of severity. The capacity of resistance to a disease would be deduced from the severity of this one on the plant. ================================================ FILE: examples/schema/curie_map.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curie_map.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration ================================================ FILE: examples/schema/cxsmiles_pipe.sssom.tsv ================================================ #curie_map: # CHEBI: http://purl.obolibrary.org/obo/CHEBI_ # PUBCHEM.COMPOUND: http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID # wikidata: http://www.wikidata.org/entity/ # orcid: https://orcid.org/ #mapping_set_id: https://w3id.org/sssom/commons/examples/cxsmiles_pipe.sssom.tsv #mapping_set_description: "Mappings between ChEBI and PubChem Compound established via structural matching. The ferrocene entry uses a CXSMILES match_string whose pipe-delimited extension block must be escaped to avoid being incorrectly split by a parser treating | as a multivalued separator." #license: https://creativecommons.org/publicdomain/zero/1.0/ #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: https://w3id.org/sssom/core_team #comment: This is an example file for SSSOM for illustration only. subject_id predicate_id object_id mapping_justification match_string subject_match_field object_match_field CHEBI:30672 skos:exactMatch PUBCHEM.COMPOUND:7611 semapv:LexicalMatching c12c3c4c5c1[Fe]23451234c5c1c2c3c45 \|C:4.5,0.6,1.7,2.8,3.9,7.12,6.10,9.16,10.18,8.14\| wikidata:P10718 wikidata:P10718 ================================================ FILE: examples/schema/extension-slots.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ # EXPROP: https://example.org/properties/ # ORGENT: https://example.org/entities/ #mapping_set_id: https://example.org/sets/exo2c-with-extensions #mapping_set_title: Sample set EXO2C with extension slots #license: https://creativecommons.org/licenses/by/4.0/ #extension_definitions: # - slot_name: ext_bar # property: EXPROP:barProperty # type_hint: xsd:integer # - slot_name: ext_baz # property: EXPROP:bazProperty # type_hint: linkml:Uriorcurie # - slot_name: ext_foo # property: EXPROP:fooProperty #ext_foo: Foo A #ext_undeclared_foo: Foo B subject_id subject_label predicate_id object_id object_label mapping_justification ext_bar ext_baz ext_undeclared_baz ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration 111 ORGENT:BAZ_0001 BAZ A ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration 112 ORGENT:BAZ_0002 ORGENT:0004 daphne skos:closeMatch COMENT:0014 delta semapv:ManualMappingCuration 114 Baz C ORGENT:0005 eve skos:closeMatch COMENT:0015 epsilon semapv:ManualMappingCuration 115 ORGENT:BAZ_0005 Baz E ================================================ FILE: examples/schema/issue_tracker.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # MH_MAPPINGS_GITHUB_ISSUES: https://github.com/mapping-commons/mh_mapping_initiative/issues/ #mapping_set_id: https://w3id.org/sssom/commons/examples/issue_tracker.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #issue_tracker: "https://github.com/mapping-commons/mh_mapping_initiative/issues" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification issue_tracker_item HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9123 HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9125 ================================================ FILE: examples/schema/issue_tracker_item.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # MH_MAPPINGS_GITHUB_ISSUES: https://github.com/mapping-commons/mh_mapping_initiative/issues/ #mapping_set_id: https://w3id.org/sssom/commons/examples/issue_tracker_item.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification issue_tracker_item HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9123 HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration HP:0000411 skos:exactMatch MP:0000021 semapv:ManualMappingCuration MH_MAPPINGS_GITHUB_ISSUES:9125 ================================================ FILE: examples/schema/literals.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ #mapping_set_id: https://example.org/sets/literal-mappings #license: https://creativecommons.org/licenses/by/4.0/ subject_label predicate_id object_id object_label mapping_justification subject_type alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration rdfs literal bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration rdfs literal daphne skos:closeMatch COMENT:0014 delta semapv:ManualMappingCuration rdfs literal eve skos:closeMatch COMENT:0015 epsilon semapv:ManualMappingCuration rdfs literal ================================================ FILE: examples/schema/mapping_set_confidence.sssom.tsv ================================================ #sssom_version: "1.1" #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ #mapping_set_id: https://w3id.org/sssom/commons/examples/curie_map.sssom.tsv #mapping_set_confidence: 0.8 #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #mapping_tool: AgreementMakerLight #comment: This is an example file for the SSSOM for illustration only. The mapping_set_confidence value expresses the confidence of the creator of the mapping into the agent that produced the mappings, i.e. AgreementMakerLight. subject_id predicate_id object_id mapping_justification HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalMatching HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalMatching ================================================ FILE: examples/schema/mapping_tool_id.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # wikidata: https://www.wikidata.org/wiki/ #mapping_set_id: https://w3id.org/sssom/commons/examples/mapping_tool_id.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #issue_tracker: "https://github.com/mapping-commons/mh_mapping_initiative/issues" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification mapping_tool_id HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration wikidata:Q58057366 HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration wikidata:Q58057366 ================================================ FILE: examples/schema/no_term_found.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # obo: http://purl.obolibrary.org/obo/ # orcid: https://orcid.org/ #mapping_set_id: https://w3id.org/sssom/commons/examples/no_term_found.sssom.tsv #creator_id: # - orcid:0000-0002-7356-1779 #subject_source: obo:hp #object_source: obo:mp #license: "https://creativecommons.org/publicdomain/zero/1.0/" #mapping_provider: "https://w3id.org/sssom/core_team" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification HP:0009124 skos:exactMatch MP:0000003 semapv:ManualMappingCuration HP:0000411 skos:exactMatch sssom:NoTermFound semapv:ManualMappingCuration ================================================ FILE: examples/schema/pipe-escaping.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ # ORGENT: https://example.org/entities/ #mapping_set_id: https://example.org/sets/test-escaping-pipe #license: https://creativecommons.org/licenses/by/4.0/ #comment: >- # This mapping set is intended to demonstrate the backslash-based # escaping mechanism for multi-valued slots. # In this set, the author_label slot of the first record has two values: # "Alice|Bob" and "Charlie" (NOT "Alice\", "Bob", and "Charlie"). # The author_label slot of the second record has three values: # "Alice\Bob", "Charlie\", and "David\|Eve\". subject_id subject_label predicate_id object_id object_label mapping_justification author_label ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration Alice\|Bob|Charlie ORGENT:0002 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration Alice\\Bob|Charlie\\|David\\\|Eve\\ ================================================ FILE: examples/schema/predicate-types.sssom.tsv ================================================ #curie_map: # COMENT: https://example.com/entities/ # ORGENT: https://example.org/entities/ # ORGPRO: https://example.org/properties/ #mapping_set_id: https://w3id.org/sssom/commons/examples/predicate-types.sssom.tsv #license: https://creativecommons.org/licenses/by/4.0/ subject_id subject_label predicate_id object_id object_label mapping_justification predicate_type ORGENT:0001 alice ORGPRO:example_annot_property COMENT:0011 alpha semapv:ManualMappingCuration owl annotation property ORGENT:0002 bob ORGPRO:example_object_property COMENT:0012 beta semapv:ManualMappingCuration owl object property ORGENT:0004 daphne ORGPRO:example_data_property COMENT:0014 delta semapv:ManualMappingCuration owl data property ORGENT:0005 eve ORGPRO:example_rdf_property COMENT:0015 epsilon semapv:ManualMappingCuration rdf property ================================================ FILE: examples/schema/record-ids.sssom.tsv ================================================ # sssom_version: "1.1" # curie_map: # HP: http://purl.obolibrary.org/obo/FBbt_ # MP: http://purl.obolibrary.org/obo/UBERON_ # RI: https://example.org/sets/record-id# # mapping_set_id: https://example.org/sets/record-id # license: https://creativecommons.org/publicdomain/zero/1.0/ record_id subject_id predicate_id object_id mapping_justification RI:0000001 HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalMatching RI:0000002 HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalMatching RI:0000003 HP:0008551 skos:exactMatch MP:0000018 semapv:ManualMappingCuration ================================================ FILE: examples/schema/review_date.sssom.tsv ================================================ # curie_map: # CHEBI: http://purl.obolibrary.org/obo/CHEBI_ # mesh: http://id.nlm.nih.gov/mesh/ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # license: https://creativecommons.org/publicdomain/zero/1.0/ # mapping_set_id: https://github.com/mapping-commons/sssom/blob/master/examples/schema/review_date.sssom.tsv # creator_id: # - orcid:0000-0003-4423-4370 # see_also: # - https://github.com/mapping-commons/sssom/issues/511 # - https://github.com/mapping-commons/sssom/pull/514 # - https://mapping-commons.github.io/sssom/confidence-model subject_id subject_label predicate_id object_id object_label mapping_justification author_id reviewer_id review_date CHEBI:10001 Visnadin skos:exactMatch mesh:C067604 visnadin semapv:ManualMappingCuration orcid:0000-0001-9439-5346 orcid:0000-0003-4423-4370 2026-03-26 ================================================ FILE: examples/schema/reviewer_agreement.sssom.tsv ================================================ # curie_map: # CHEBI: http://purl.obolibrary.org/obo/CHEBI_ # mesh: http://id.nlm.nih.gov/mesh/ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # license: https://creativecommons.org/publicdomain/zero/1.0/ # mapping_set_id: https://github.com/mapping-commons/sssom/blob/master/examples/schema/reviewer_agreement.sssom.tsv # creator_id: # - orcid:0000-0003-4423-4370 # see_also: # - https://github.com/mapping-commons/sssom/issues/510 # - https://github.com/mapping-commons/sssom/pull/519 # - https://mapping-commons.github.io/sssom/confidence-model subject_id subject_label predicate_id object_id object_label mapping_justification author_id reviewer_id reviewer_agreement comment CHEBI:10001 Visnadin skos:exactMatch mesh:C067604 visnadin semapv:ManualMappingCuration orcid:0000-0001-9439-5346 orcid:0000-0003-4423-4370 0.99 the reviewer confidently agrees with the accuracy of the mapping CHEBI:127105 tribromosalicylanilide skos:exactMatch mesh:C004361 tribromsalan semapv:LexicalMatching orcid:0000-0003-4423-4370 0.0 the reviewer is not sure whether the mapping is correct or incorrect CHEBI:10057 9H-xanthene skos:exactMatch mesh:C002563 xanthan gum semapv:ManualMappingCuration orcid:0000-0003-4423-4370 -0.99 the reviewer confidently disagrees with the accuracy of the mapping ================================================ FILE: examples/schema/similarity_score.sssom.tsv ================================================ #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # orcid: https://orcid.org/ # wikidata: https://www.wikidata.org/entity/ #mapping_set_id: https://w3id.org/sssom/commons/examples/similarity_score.sssom.tsv #license: "https://creativecommons.org/publicdomain/zero/1.0/" #creator_id: # - orcid:0000-0002-7356-1779 #mapping_provider: "https://w3id.org/sssom/core_team" #comment: This is an example file for the SSSOM for illustration only. Its contents are entirely fabricated. subject_id predicate_id object_id mapping_justification similarity_score similarity_measure HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalSimilarityThresholdMatching 0.8 wikidata:Q865360 HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalSimilarityThresholdMatching 0.4 wikidata:Q865360 HP:0000411 skos:exactMatch MP:0000021 semapv:SemanticSimilarityThresholdMatching 1.0 wikidata:Q1784941 ================================================ FILE: examples/schema/version.sssom.tsv ================================================ # sssom_version: "1.1" # curie_map: # HP: http://purl.obolibrary.org/obo/FBbt_ # MP: http://purl.obolibrary.org/obo/UBERON_ # license: https://w3id.org/sssom/license/unspecified subject_id predicate_id object_id mapping_justification HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalMatching HP:0008551 skos:exactMatch MP:0000018 semapv:LexicalMatching ================================================ FILE: mkdocs.yml ================================================ site_name: "A Simple Standard for Sharing Ontology Mappings (SSSOM)" site_author: "The SSSOM Community" theme: name: material analytics: gtag: G-2SYBSJVZ23 # palette: # scheme: slate # primary: cyan features: - content.tabs.link extra: version: provider: mike plugins: - search - mermaid2 markdown_extensions: - pymdownx.highlight: use_pygments: true - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.tabbed: - pymdownx.critic - pymdownx.caret - pymdownx.keys - pymdownx.mark - pymdownx.tilde - pymdownx.arithmatex: generic: true - admonition #- pymdownx.emoji: # emoji_index: !!python/name:material.extensions.emoji.twemoji # emoji_generator: !!python/name:material.extensions.emoji.to_svg extra_javascript: - javascripts/mathjax.js - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js nav: - Home: index.md - Introduction: introduction.md - Getting started: getting-started.md - Specification: - Introduction: spec-intro.md - Data model: - Introduction: spec-model.md - LinkML documentation: linkml-index.md - Serialisations: - Introduction: spec-formats.md - SSSOM/TSV serialisation: spec-formats-tsv.md - SSSOM/JSON serialisation: spec-formats-json.md - SSSOM/RDF serialisation: spec-formats-rdf.md - OWL/RDF serialisation: spec-formats-owl.md - Support functions: - Introduction: spec-support.md - Hashing mapping records: spec-support-hashing.md - Applying Chaining Rules: chaining-rules.md - Resources for contributors: contributing.md - Resources for users: - FAQ: faq.md - Community: - Use Cases: usecases.md - Workshops: workshops.md - Presentations: presentations.md - Training materials: - Overview: training.md - Basic Tutorial: tutorial.md - How to: - Mapping Justifications: mapping-justifications.md - How to use mapping predicates: mapping-predicates.md - Set up a mapping registry/commons: mapping-commons.md - A basic guide for the SSSOM toolkit: toolkit.md - 5-Star Entity Mappings - Cheatsheet: 5star-mappings.md - Matching tool implementation guide: matching-tool-implementation-guide.md - How to gradually enrich OMOP mappings with SSSOM: tutorials/omop-mappings.md - How to assess mapping confidence: confidence-model.md - Identifying mapping records: record-identifiers.md - Reference: - Glossary: glossary.md - Related documentation: related-documentation.md - Funding: funding.md site_url: https://mapping-commons.github.io/sssom/ repo_url: https://github.com/mapping-commons/sssom/ edit_uri: "edit/master/src/docs/" ================================================ FILE: project/graphql/sssom_schema.graphql ================================================ # metamodel_version: 1.7.0 enum EntityTypeEnum { owl_class owl_object_property owl_data_property owl_annotation_property owl_named_individual skos_concept rdfs_resource rdfs_class rdfs_literal rdfs_datatype rdf_property composed_entity_expression } enum MappingCardinalityEnum { __1__COLON__1 __1__COLON__n n__COLON__1 n__COLON__n __1__COLON__0 __0__COLON__1 __0__COLON__0 } enum PredicateModifierEnum { Not } enum SssomVersionEnum { __1__FULL_STOP__0 __1__FULL_STOP__1 } type ExtensionDefinition { slotName: String! property: String typeHint: String } type Mapping { recordId: EntityReference subjectId: EntityReference subjectLabel: String subjectCategory: String predicateId: EntityReference! predicateLabel: String predicateModifier: PredicateModifierEnum objectId: EntityReference objectLabel: String objectCategory: String mappingJustification: EntityReference! authorId: [EntityReference] authorLabel: [String] reviewerId: [EntityReference] reviewerLabel: [String] creatorId: [EntityReference] creatorLabel: [String] license: NonRelativeURI subjectType: EntityTypeEnum subjectSource: EntityReference subjectSourceVersion: String objectType: EntityTypeEnum objectSource: EntityReference objectSourceVersion: String predicateType: EntityTypeEnum mappingProvider: NonRelativeURI mappingSource: EntityReference mappingCardinality: MappingCardinalityEnum cardinalityScope: [String] mappingTool: String mappingToolId: EntityReference mappingToolVersion: String mappingDate: String publicationDate: String reviewDate: String confidence: Float reviewerAgreement: Float curationRule: [EntityReference] curationRuleText: [String] subjectMatchField: [EntityReference] objectMatchField: [EntityReference] matchString: [String] subjectPreprocessing: [EntityReference] objectPreprocessing: [EntityReference] similarityScore: Float similarityMeasure: String seeAlso: [NonRelativeURI] issueTrackerItem: EntityReference other: String comment: String } type MappingRegistry { mappingRegistryId: EntityReference! mappingRegistryTitle: String mappingRegistryDescription: String imports: [NonRelativeURI] mappingSetReferences: [MappingSetReference] documentation: NonRelativeURI homepage: NonRelativeURI issueTracker: NonRelativeURI } type MappingSet { sssomVersion: SssomVersionEnum curieMap: [Prefix] mappings: [Mapping] mappingSetId: NonRelativeURI! mappingSetVersion: String mappingSetSource: [NonRelativeURI] mappingSetTitle: String mappingSetDescription: String mappingSetConfidence: Float creatorId: [EntityReference] creatorLabel: [String] license: NonRelativeURI! subjectType: EntityTypeEnum subjectSource: EntityReference subjectSourceVersion: String objectType: EntityTypeEnum objectSource: EntityReference objectSourceVersion: String predicateType: EntityTypeEnum mappingProvider: NonRelativeURI cardinalityScope: [String] mappingTool: String mappingToolId: EntityReference mappingToolVersion: String mappingDate: String publicationDate: String subjectMatchField: [EntityReference] objectMatchField: [EntityReference] subjectPreprocessing: [EntityReference] objectPreprocessing: [EntityReference] similarityMeasure: String curationRule: [EntityReference] curationRuleText: [String] seeAlso: [NonRelativeURI] issueTracker: NonRelativeURI other: String comment: String extensionDefinitions: [ExtensionDefinition] } type MappingSetReference { mappingSetId: NonRelativeURI! mirrorFrom: NonRelativeURI registryConfidence: Float mappingSetGroup: String lastUpdated: String localName: String } type NoTermFound { } type Prefix { prefixName: String! prefixUrl: String } type Propagatable { propagated: Boolean } type Versionable { addedIn: SssomVersionEnum } ================================================ FILE: project/jsonld/sssom_schema.context.jsonld ================================================ { "@context": { "xsd": "http://www.w3.org/2001/XMLSchema#", "dcterms": "http://purl.org/dc/terms/", "linkml": "https://w3id.org/linkml/", "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", "owl": "http://www.w3.org/2002/07/owl#", "pav": "http://purl.org/pav/", "prov": "http://www.w3.org/ns/prov#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "semapv": "https://w3id.org/semapv/vocab/", "skos": "http://www.w3.org/2004/02/skos/core#", "sssom": "https://w3id.org/sssom/", "@vocab": "https://w3id.org/sssom/", "author_id": { "@type": "rdfs:Resource", "@id": "pav:authoredBy" }, "author_label": { "@id": "author_label" }, "cardinality_scope": { "@id": "cardinality_scope" }, "comment": { "@id": "rdfs:comment" }, "confidence": { "@type": "xsd:double", "@id": "confidence" }, "creator_id": { "@type": "rdfs:Resource", "@id": "dcterms:creator" }, "creator_label": { "@id": "creator_label" }, "curation_rule": { "@type": "rdfs:Resource", "@id": "curation_rule" }, "curation_rule_text": { "@id": "curation_rule_text" }, "curie_map": { "@type": "@id", "@id": "curie_map" }, "documentation": { "@type": "xsd:anyURI", "@id": "documentation" }, "extension_definitions": { "@type": "@id", "@id": "extension_definitions" }, "property": { "@type": "xsd:anyURI", "@id": "property" }, "slot_name": { "@id": "slot_name" }, "type_hint": { "@type": "xsd:anyURI", "@id": "type_hint" }, "homepage": { "@type": "xsd:anyURI", "@id": "homepage" }, "imports": { "@type": "xsd:anyURI", "@id": "imports" }, "issue_tracker": { "@type": "xsd:anyURI", "@id": "issue_tracker" }, "issue_tracker_item": { "@type": "rdfs:Resource", "@id": "issue_tracker_item" }, "last_updated": { "@type": "xsd:date", "@id": "last_updated" }, "license": { "@type": "xsd:anyURI", "@id": "dcterms:license" }, "local_name": { "@id": "local_name" }, "mapping_cardinality": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "mapping_cardinality" }, "mapping_date": { "@type": "xsd:date", "@id": "dcterms:created" }, "mapping_justification": { "@type": "rdfs:Resource", "@id": "mapping_justification" }, "mapping_provider": { "@type": "xsd:anyURI", "@id": "mapping_provider" }, "mapping_registry_description": { "@id": "mapping_registry_description" }, "mapping_registry_id": { "@type": "rdfs:Resource", "@id": "mapping_registry_id" }, "mapping_registry_title": { "@id": "mapping_registry_title" }, "mapping_set_confidence": { "@type": "xsd:double", "@id": "mapping_set_confidence" }, "mapping_set_description": { "@id": "dcterms:description" }, "mapping_set_group": { "@id": "mapping_set_group" }, "mapping_set_id": { "@type": "xsd:anyURI", "@id": "mapping_set_id" }, "mapping_set_references": { "@type": "@id", "@id": "mapping_set_references" }, "mapping_set_source": { "@type": "xsd:anyURI", "@id": "prov:wasDerivedFrom" }, "mapping_set_title": { "@id": "dcterms:title" }, "mapping_set_version": { "@id": "owl:versionInfo" }, "mapping_source": { "@type": "rdfs:Resource", "@id": "mapping_source" }, "mapping_tool": { "@id": "mapping_tool" }, "mapping_tool_id": { "@type": "rdfs:Resource", "@id": "mapping_tool_id" }, "mapping_tool_version": { "@id": "mapping_tool_version" }, "mappings": { "@type": "@id", "@id": "mappings" }, "match_string": { "@id": "match_string" }, "mirror_from": { "@type": "xsd:anyURI", "@id": "mirror_from" }, "object_category": { "@id": "object_category" }, "object_id": { "@type": "rdfs:Resource", "@id": "owl:annotatedTarget" }, "object_label": { "@id": "object_label" }, "object_match_field": { "@type": "rdfs:Resource", "@id": "object_match_field" }, "object_preprocessing": { "@type": "rdfs:Resource", "@id": "object_preprocessing" }, "object_source": { "@type": "rdfs:Resource", "@id": "object_source" }, "object_source_version": { "@id": "object_source_version" }, "object_type": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "object_type" }, "other": { "@id": "other" }, "predicate_id": { "@type": "rdfs:Resource", "@id": "owl:annotatedProperty" }, "predicate_label": { "@id": "predicate_label" }, "predicate_modifier": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "predicate_modifier" }, "predicate_type": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "predicate_type" }, "prefix_name": { "@id": "prefix_name" }, "prefix_url": { "@type": "xsd:anyURI", "@id": "prefix_url" }, "propagated": { "@type": "xsd:boolean", "@id": "propagated" }, "publication_date": { "@type": "xsd:date", "@id": "dcterms:issued" }, "record_id": { "@type": "rdfs:Resource", "@id": "record_id" }, "registry_confidence": { "@type": "xsd:double", "@id": "registry_confidence" }, "review_date": { "@type": "xsd:date", "@id": "review_date" }, "reviewer_agreement": { "@type": "xsd:double", "@id": "reviewer_agreement" }, "reviewer_id": { "@type": "rdfs:Resource", "@id": "reviewer_id" }, "reviewer_label": { "@id": "reviewer_label" }, "see_also": { "@type": "xsd:anyURI", "@id": "rdfs:seeAlso" }, "similarity_measure": { "@id": "similarity_measure" }, "similarity_score": { "@type": "xsd:double", "@id": "similarity_score" }, "sssom_version": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "sssom_version" }, "subject_category": { "@id": "subject_category" }, "subject_id": { "@type": "rdfs:Resource", "@id": "owl:annotatedSource" }, "subject_label": { "@id": "subject_label" }, "subject_match_field": { "@type": "rdfs:Resource", "@id": "subject_match_field" }, "subject_preprocessing": { "@type": "rdfs:Resource", "@id": "subject_preprocessing" }, "subject_source": { "@type": "rdfs:Resource", "@id": "subject_source" }, "subject_source_version": { "@id": "subject_source_version" }, "subject_type": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "subject_type" }, "added_in": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "added_in" }, "ExtensionDefinition": { "@id": "ExtensionDefinition" }, "Mapping": { "@id": "owl:Axiom" }, "MappingRegistry": { "@id": "MappingRegistry" }, "MappingSet": { "@id": "MappingSet" }, "MappingSetReference": { "@id": "MappingSetReference" }, "NoTermFound": { "@id": "NoTermFound" }, "Prefix": { "@id": "Prefix" }, "Propagatable": { "@id": "Propagatable" }, "Versionable": { "@id": "Versionable" } } } ================================================ FILE: project/jsonld/sssom_schema.jsonld ================================================ { "name": "sssom", "description": "Datamodel for Simple Standard for Sharing Ontological Mappings (SSSOM)", "see_also": [ "https://github.com/mapping-commons/sssom", "https://mapping-commons.github.io/sssom/home/" ], "id": "https://w3id.org/sssom/schema/", "imports": [ "linkml:types" ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "prefixes": [ { "prefix_prefix": "dcterms", "prefix_reference": "http://purl.org/dc/terms/" }, { "prefix_prefix": "linkml", "prefix_reference": "https://w3id.org/linkml/" }, { "prefix_prefix": "sssom", "prefix_reference": "https://w3id.org/sssom/" }, { "prefix_prefix": "rdfs", "prefix_reference": "http://www.w3.org/2000/01/rdf-schema#" }, { "prefix_prefix": "rdf", "prefix_reference": "http://www.w3.org/1999/02/22-rdf-syntax-ns#" }, { "prefix_prefix": "oboInOwl", "prefix_reference": "http://www.geneontology.org/formats/oboInOwl#" }, { "prefix_prefix": "pav", "prefix_reference": "http://purl.org/pav/" }, { "prefix_prefix": "prov", "prefix_reference": "http://www.w3.org/ns/prov#" }, { "prefix_prefix": "skos", "prefix_reference": "http://www.w3.org/2004/02/skos/core#" }, { "prefix_prefix": "xsd", "prefix_reference": "http://www.w3.org/2001/XMLSchema#" }, { "prefix_prefix": "semapv", "prefix_reference": "https://w3id.org/semapv/vocab/" } ], "default_curi_maps": [ "semweb_context", "obo_context" ], "default_prefix": "sssom", "default_range": "string", "types": [ { "name": "EntityReference", "definition_uri": "https://w3id.org/sssom/EntityReference", "description": "A reference to an entity involved in the mapping.\n", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/spec/#tsv" ], "typeof": "uriorcurie", "base": "str", "uri": "http://www.w3.org/2000/01/rdf-schema#Resource", "repr": "str", "@type": "TypeDefinition" }, { "name": "NonRelativeURI", "definition_uri": "https://w3id.org/sssom/NonRelativeURI", "description": "A URI as per RFC 3986, that is a string that matches the production of the \"URI\" rule defined in Appendix A of that RFC. Contrary to the underlying LinkML type, this specifically excludes _relative URI references_, which do not start with a scheme component. Relative URI references are forbidden because SSSOM has no built-in mechanism to provide the base URI that would be needed to resolve relative URI references into non-relative ones.", "examples": [ { "value": "https://example.org/path/to/file.txt#L4", "description": "A URI that is URL to a HTTP resource.", "@type": "Example" }, { "value": "urn:oasis:names:tc:entity:xmlns:xml:catalog", "description": "A URI that is the URN of the namespace for the OASIS XML Catalogs specification.", "@type": "Example" }, { "value": "ldap://example.org/cn=Alice,dc=example,dc=org?mail", "description": "A URI that is a LDAP query URL.", "@type": "Example" }, { "value": "mailto:alice@example.org", "description": "A URI that is an email address.", "@type": "Example" }, { "value": "file.txt", "description": "An _invalid_ example, as it a relative URI (path only, no scheme).", "@type": "Example" }, { "value": "/path/to/file.txt", "description": "An _invalid_ example; though it appears to be an _absolute path_, it is a _relative URI_ because of the absence of a scheme.", "@type": "Example" }, { "value": "//example.org/path/to/file.txt", "description": "An _invalid_ example; though it includes an authority component (example.org), it has no scheme and is therefore a _relative URI_.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/448" ], "typeof": "uri", "base": "URI", "uri": "http://www.w3.org/2001/XMLSchema#anyURI", "repr": "str", "@type": "TypeDefinition" }, { "name": "string", "definition_uri": "https://w3id.org/linkml/String", "description": "A character string", "notes": [ "In RDF serializations, a slot with range of string is treated as a literal or type xsd:string. If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"string\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Text" ], "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "@type": "TypeDefinition" }, { "name": "integer", "definition_uri": "https://w3id.org/linkml/Integer", "description": "An integer", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"integer\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Integer" ], "base": "int", "uri": "http://www.w3.org/2001/XMLSchema#integer", "@type": "TypeDefinition" }, { "name": "boolean", "definition_uri": "https://w3id.org/linkml/Boolean", "description": "A binary (true or false) value", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"boolean\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Boolean" ], "base": "Bool", "uri": "http://www.w3.org/2001/XMLSchema#boolean", "repr": "bool", "@type": "TypeDefinition" }, { "name": "float", "definition_uri": "https://w3id.org/linkml/Float", "description": "A real number that conforms to the xsd:float specification", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"float\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Float" ], "base": "float", "uri": "http://www.w3.org/2001/XMLSchema#float", "@type": "TypeDefinition" }, { "name": "double", "definition_uri": "https://w3id.org/linkml/Double", "description": "A real number that conforms to the xsd:double specification", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"double\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "close_mappings": [ "schema:Float" ], "base": "float", "uri": "http://www.w3.org/2001/XMLSchema#double", "@type": "TypeDefinition" }, { "name": "decimal", "definition_uri": "https://w3id.org/linkml/Decimal", "description": "A real number with arbitrary precision that conforms to the xsd:decimal specification", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"decimal\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "broad_mappings": [ "schema:Number" ], "base": "Decimal", "uri": "http://www.w3.org/2001/XMLSchema#decimal", "@type": "TypeDefinition" }, { "name": "time", "definition_uri": "https://w3id.org/linkml/Time", "description": "A time object represents a (local) time of day, independent of any particular day", "notes": [ "URI is dateTime because OWL reasoners do not work with straight date or time", "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"time\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Time" ], "base": "XSDTime", "uri": "http://www.w3.org/2001/XMLSchema#time", "repr": "str", "@type": "TypeDefinition" }, { "name": "date", "definition_uri": "https://w3id.org/linkml/Date", "description": "a date (year, month and day) in an idealized calendar", "notes": [ "URI is dateTime because OWL reasoners don't work with straight date or time", "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"date\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Date" ], "base": "XSDDate", "uri": "http://www.w3.org/2001/XMLSchema#date", "repr": "str", "@type": "TypeDefinition" }, { "name": "datetime", "definition_uri": "https://w3id.org/linkml/Datetime", "description": "The combination of a date and time", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"datetime\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:DateTime" ], "base": "XSDDateTime", "uri": "http://www.w3.org/2001/XMLSchema#dateTime", "repr": "str", "@type": "TypeDefinition" }, { "name": "date_or_datetime", "definition_uri": "https://w3id.org/linkml/DateOrDatetime", "description": "Either a date or a datetime", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"date_or_datetime\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "https://w3id.org/linkml/DateOrDatetime", "repr": "str", "@type": "TypeDefinition" }, { "name": "uriorcurie", "definition_uri": "https://w3id.org/linkml/Uriorcurie", "description": "a URI or a CURIE", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"uriorcurie\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "URIorCURIE", "uri": "http://www.w3.org/2001/XMLSchema#anyURI", "repr": "str", "@type": "TypeDefinition" }, { "name": "curie", "definition_uri": "https://w3id.org/linkml/Curie", "conforms_to": "https://www.w3.org/TR/curie/", "description": "a compact URI", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"curie\"." ], "comments": [ "in RDF serializations this MUST be expanded to a URI", "in non-RDF serializations MAY be serialized as the compact representation" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "Curie", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "uri", "definition_uri": "https://w3id.org/linkml/Uri", "conforms_to": "https://www.ietf.org/rfc/rfc3987.txt", "description": "a complete URI", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"uri\"." ], "comments": [ "in RDF serializations a slot with range of uri is treated as a literal or type xsd:anyURI unless it is an identifier or a reference to an identifier, in which case it is translated directly to a node" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "close_mappings": [ "schema:URL" ], "base": "URI", "uri": "http://www.w3.org/2001/XMLSchema#anyURI", "repr": "str", "@type": "TypeDefinition" }, { "name": "ncname", "definition_uri": "https://w3id.org/linkml/Ncname", "description": "Prefix part of CURIE", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"ncname\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "NCName", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "objectidentifier", "definition_uri": "https://w3id.org/linkml/Objectidentifier", "description": "A URI or CURIE that represents an object in the model.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"objectidentifier\"." ], "comments": [ "Used for inheritance and type checking" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "ElementIdentifier", "uri": "http://www.w3.org/ns/shex#iri", "repr": "str", "@type": "TypeDefinition" }, { "name": "nodeidentifier", "definition_uri": "https://w3id.org/linkml/Nodeidentifier", "description": "A URI, CURIE or BNODE that represents a node in a model.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"nodeidentifier\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "NodeIdentifier", "uri": "http://www.w3.org/ns/shex#nonLiteral", "repr": "str", "@type": "TypeDefinition" }, { "name": "jsonpointer", "definition_uri": "https://w3id.org/linkml/Jsonpointer", "conforms_to": "https://datatracker.ietf.org/doc/html/rfc6901", "description": "A string encoding a JSON Pointer. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to a valid object within the current instance document when encoded in tree form.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"jsonpointer\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "jsonpath", "definition_uri": "https://w3id.org/linkml/Jsonpath", "conforms_to": "https://www.ietf.org/archive/id/draft-goessner-dispatch-jsonpath-00.html", "description": "A string encoding a JSON Path. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to zero or more valid objects within the current instance document when encoded in tree form.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"jsonpath\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "sparqlpath", "definition_uri": "https://w3id.org/linkml/Sparqlpath", "conforms_to": "https://www.w3.org/TR/sparql11-query/#propertypaths", "description": "A string encoding a SPARQL Property Path. The value of the string MUST conform to SPARQL syntax and SHOULD dereference to zero or more valid objects within the current instance document when encoded as RDF.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"sparqlpath\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" } ], "enums": [ { "name": "sssom_version_enum", "definition_uri": "https://w3id.org/sssom/SssomVersionEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "1.0", "description": "SSSOM specification version 1.0", "meaning": "sssom:version1.0" }, { "text": "1.1", "description": "SSSOM specification version 1.1", "meaning": "sssom:version1.1" } ] }, { "name": "entity_type_enum", "definition_uri": "https://w3id.org/sssom/EntityTypeEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "owl class", "meaning": "owl:Class" }, { "text": "owl object property", "meaning": "owl:ObjectProperty" }, { "text": "owl data property", "meaning": "owl:DataProperty" }, { "text": "owl annotation property", "meaning": "owl:AnnotationProperty" }, { "text": "owl named individual", "meaning": "owl:NamedIndividual" }, { "text": "skos concept", "meaning": "skos:Concept" }, { "text": "rdfs resource", "meaning": "rdfs:Resource" }, { "text": "rdfs class", "meaning": "rdfs:Class" }, { "text": "rdfs literal", "description": "This value indicates that the entity being mapped is not a semantic entity with a distinct identifier, but is instead represented entirely by its literal label. This value MUST NOT be used in the predicate_type slot.", "meaning": "rdfs:Literal", "see_also": [ "https://mapping-commons.github.io/sssom/spec-model/#literal-mappings", "https://github.com/mapping-commons/sssom/issues/234", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/literals.sssom.tsv" ] }, { "text": "rdfs datatype", "meaning": "rdfs:Datatype" }, { "text": "rdf property", "meaning": "rdf:Property" }, { "text": "composed entity expression", "description": "This value indicates that the entity ID does not represent a single entity, but a composite involving several individual entities. This value MUST NOT be used in the predicate_type slot. This specifications does not prescribe how an ID representing a composite entity should be interpreted; this is left at the discretion of applications.", "meaning": "sssom:ComposedEntityExpression", "see_also": [ "https://github.com/mapping-commons/sssom/issues/402", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/composite-entities.sssom.tsv" ] } ] }, { "name": "predicate_modifier_enum", "definition_uri": "https://w3id.org/sssom/PredicateModifierEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "Not", "description": "Negating the mapping predicate. The meaning of the triple becomes subject_id is not a predicate_id match to object_id.", "meaning": "sssom:NegatedPredicate" } ] }, { "name": "mapping_cardinality_enum", "definition_uri": "https://w3id.org/sssom/MappingCardinalityEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "1:1", "description": "Indicates the mapping record is about a one-to-one mapping, that is, the subject and the object are only mapped to each other, exclusive of any other subject or object." }, { "text": "1:n", "description": "Indicates the mapping record is about a one-to-many mapping, that is, the same subject is mapped to several different objects." }, { "text": "n:1", "description": "Indicates the mapping record is about a many-to-one mapping, that is, several different subjects are mapped to the same object." }, { "text": "n:n", "description": "Indicates the mapping record is about a many-to-many mapping, that is, the subject is mapped to several different objects and the object is mapped to several different subjects." }, { "text": "1:0", "description": "Indicates that the subject has no match in the object vocabulary. This value MUST only be used when the object_id is sssom:NoTermFound." }, { "text": "0:1", "description": "Indicates that the object has no match in the subject vocabulary. This value MUST only be used when the subject_id is sssom:NoTermFound." }, { "text": "0:0", "description": "Indicates that there is no match between the subject vocabulary and the object vocabulary. This value MUST only be used when both the subject_id and the object_id are sssom:NoTermFound." } ] } ], "slots": [ { "name": "prefix_name", "definition_uri": "https://w3id.org/sssom/prefix_name", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/prefix_name", "key": true, "owner": "Prefix", "domain_of": [ "Prefix" ], "range": "ncname", "required": true, "@type": "SlotDefinition" }, { "name": "prefix_url", "definition_uri": "https://w3id.org/sssom/prefix_url", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/prefix_url", "owner": "Prefix", "domain_of": [ "Prefix" ], "range": "uri", "@type": "SlotDefinition" }, { "name": "sssom_version", "definition_uri": "https://w3id.org/sssom/sssom_version", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The version of the SSSOM specification a mapping set is compliant with.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/439", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/version.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/sssom_version", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "sssom_version_enum", "@type": "SlotDefinition" }, { "name": "curie_map", "definition_uri": "https://w3id.org/sssom/curie_map", "description": "A dictionary that contains prefixes as keys and their URI expansions as values.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/225", "https://github.com/mapping-commons/sssom/pull/349", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curie_map.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/curie_map", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "Prefix", "multivalued": true, "inlined": true, "@type": "SlotDefinition" }, { "name": "mirror_from", "definition_uri": "https://w3id.org/sssom/mirror_from", "description": "A URL location from which to obtain a resource, such as a mapping set.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mirror_from", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "registry_confidence", "definition_uri": "https://w3id.org/sssom/registry_confidence", "description": "This value is set by the creator/maintainer of the mapping registry and reflects the confidence the mapping registry has in the correctness (i.e., precision) of mappings in the mapping set.\nWhen not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default.", "examples": [ { "value": "0.95", "description": "A confidence score of 0.95, indicating 95% confidence.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model" ], "slot_uri": "https://w3id.org/sssom/registry_confidence", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "last_updated", "definition_uri": "https://w3id.org/sssom/last_updated", "description": "The date this reference was last updated.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/last_updated", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "date", "@type": "SlotDefinition" }, { "name": "local_name", "definition_uri": "https://w3id.org/sssom/local_name", "description": "The local name assigned to file that corresponds to the downloaded mapping set.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/local_name", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_references", "definition_uri": "https://w3id.org/sssom/mapping_set_references", "description": "A list of mapping set references.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_set_references", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "MappingSetReference", "recommended": true, "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "mapping_registry_id", "definition_uri": "https://w3id.org/sssom/mapping_registry_id", "description": "The unique identifier of a mapping registry.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_registry_id", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "EntityReference", "required": true, "@type": "SlotDefinition" }, { "name": "mapping_registry_title", "definition_uri": "https://w3id.org/sssom/mapping_registry_title", "description": "The title of a mapping registry.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_registry_title", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_registry_description", "definition_uri": "https://w3id.org/sssom/mapping_registry_description", "description": "The description of a mapping registry.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_registry_description", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "string", "@type": "SlotDefinition" }, { "name": "imports", "definition_uri": "https://w3id.org/sssom/imports", "description": "A list of registries that should be imported into this one.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/imports", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "NonRelativeURI", "multivalued": true, "@type": "SlotDefinition" }, { "name": "documentation", "definition_uri": "https://w3id.org/sssom/documentation", "description": "A URL to the documentation of this mapping commons.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/documentation", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "homepage", "definition_uri": "https://w3id.org/sssom/homepage", "description": "A URL to a homepage of this mapping commons.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/homepage", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "mappings", "definition_uri": "https://w3id.org/sssom/mappings", "description": "Contains a list of mapping objects.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mappings", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "Mapping", "recommended": true, "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "subject_id", "definition_uri": "https://w3id.org/sssom/subject_id", "description": "The ID of the subject of the mapping.", "examples": [ { "value": "HP:0009894", "description": "The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears'", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#annotatedSource", "http://www.w3.org/2002/07/owl#annotatedSource" ], "slot_uri": "http://www.w3.org/2002/07/owl#annotatedSource", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "subject_label", "definition_uri": "https://w3id.org/sssom/subject_label", "description": "The label of subject of the mapping.", "examples": [ { "value": "Thickened ears", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "recommended": true, "@type": "SlotDefinition" }, { "name": "subject_category", "definition_uri": "https://w3id.org/sssom/subject_category", "description": "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases.", "examples": [ { "value": "UBERON:0001062", "description": "The CURIE of the Uberon term for \"anatomical entity\".", "@type": "Example" }, { "value": "anatomical entity", "description": "A string, rather than ID, describing the \"anatomical entity\" category. This is possible, but less preferred than using an ID.", "@type": "Example" }, { "value": "biolink:Gene", "description": "The CURIE of the biolink class for genes.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/13", "https://github.com/mapping-commons/sssom/issues/256" ], "slot_uri": "https://w3id.org/sssom/subject_category", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "subject_type", "definition_uri": "https://w3id.org/sssom/subject_type", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The type of entity that is being mapped.", "examples": [ { "value": "owl:Class", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_type", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "entity_type_enum", "@type": "SlotDefinition" }, { "name": "predicate_id", "definition_uri": "https://w3id.org/sssom/predicate_id", "description": "The ID of the predicate or relation that relates the subject and object of this match.", "examples": [ { "value": "owl:sameAs", "description": "The subject and the object are instances (owl individuals), and the two instances are the same.", "@type": "Example" }, { "value": "owl:equivalentClass", "description": "The subject and the object are classes (owl class), and the two classes are the same.", "@type": "Example" }, { "value": "owl:equivalentProperty", "description": "The subject and the object are properties (owl object, data, annotation properties), and the two properties are the same.", "@type": "Example" }, { "value": "rdfs:subClassOf", "description": "The subject and the object are classes (owl class), and the subject is a subclass of the object.", "@type": "Example" }, { "value": "rdfs:subPropertyOf", "description": "The subject and the object are properties (owl object, data, annotation properties), and the subject is a subproperty of the object.", "@type": "Example" }, { "value": "skos:relatedMatch", "description": "The subject and the object are associated in some unspecified way.", "@type": "Example" }, { "value": "skos:closeMatch", "description": "The subject and the object are sufficiently similar that they can be used interchangeably in some information retrieval applications.", "@type": "Example" }, { "value": "skos:exactMatch", "description": "The subject and the object can, with a high degree of confidence, be used interchangeably across a wide range of information retrieval applications.", "@type": "Example" }, { "value": "skos:narrowMatch", "description": "From the SKOS primer: A triple skos:narrower (and skos:narrowMatch) asserts that , the object of the triple, is a narrower concept than , the subject of the triple.", "@type": "Example" }, { "value": "skos:broadMatch", "description": "From the SKOS primer: A triple skos:broader (and skos:broadMatch) asserts that , the object of the triple, is a broader concept than , the subject of the triple.", "@type": "Example" }, { "value": "oboInOwl:hasDbXref", "description": "Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go).", "@type": "Example" }, { "value": "rdfs:seeAlso", "description": "The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#annotatedProperty", "http://www.w3.org/2002/07/owl#annotatedProperty" ], "slot_uri": "http://www.w3.org/2002/07/owl#annotatedProperty", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "required": true, "@type": "SlotDefinition" }, { "name": "predicate_modifier", "definition_uri": "https://w3id.org/sssom/predicate_modifier", "description": "A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion", "examples": [ { "value": "Not", "description": "Negates the predicate, see documentation of predicate_modifier_enum", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/107" ], "slot_uri": "https://w3id.org/sssom/predicate_modifier", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "predicate_modifier_enum", "@type": "SlotDefinition" }, { "name": "predicate_label", "definition_uri": "https://w3id.org/sssom/predicate_label", "description": "The label of the predicate/relation of the mapping.", "examples": [ { "value": "has cross-reference", "description": "The label of the oboInOwl:hasDbXref property to represent cross-references.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/predicate_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "predicate_type", "definition_uri": "https://w3id.org/sssom/predicate_type", "instantiates": [ "sssom:Propagatable", "sssom:Versionable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" }, { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The type of the predicate used to map the subject and object entities.", "examples": [ { "value": "owl:AnnotationProperty", "@type": "Example" }, { "value": "owl:ObjectProperty", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/143", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/predicate-types.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/predicate_type", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "entity_type_enum", "@type": "SlotDefinition" }, { "name": "object_id", "definition_uri": "https://w3id.org/sssom/object_id", "description": "The ID of the object of the mapping.", "examples": [ { "value": "HP:0009894", "description": "The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears'.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#annotatedTarget", "http://www.w3.org/2002/07/owl#annotatedTarget" ], "slot_uri": "http://www.w3.org/2002/07/owl#annotatedTarget", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "object_label", "definition_uri": "https://w3id.org/sssom/object_label", "description": "The label of object of the mapping.", "examples": [ { "value": "Thickened ears", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "recommended": true, "@type": "SlotDefinition" }, { "name": "object_category", "definition_uri": "https://w3id.org/sssom/object_category", "description": "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases.", "examples": [ { "value": "UBERON:0001062", "description": "The CURIE of the Uberon term for \"anatomical entity\".", "@type": "Example" }, { "value": "anatomical entity", "description": "A string, rather than ID, describing the \"anatomical entity\" category. This is possible, but less preferred than using an ID.", "@type": "Example" }, { "value": "biolink:Gene", "description": "The CURIE of the biolink class for genes.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/13", "https://github.com/mapping-commons/sssom/issues/256" ], "slot_uri": "https://w3id.org/sssom/object_category", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_justification", "definition_uri": "https://w3id.org/sssom/mapping_justification", "description": "A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable.", "examples": [ { "value": "semapv:LexicalMatching", "@type": "Example" }, { "value": "semapv:ManualMappingCuration", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/semantic-mapping-vocabulary/", "https://www.ebi.ac.uk/ols4/ontologies/semapv" ], "slot_uri": "https://w3id.org/sssom/mapping_justification", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "required": true, "pattern": "^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining|MappingInversion|StructuralMatching|InstanceBasedMatching|BackgroundKnowledgeBasedMatching)$", "any_of": [ { "equals_string": "semapv:LexicalMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:LogicalReasoning", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:CompositeMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:UnspecifiedMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:SemanticSimilarityThresholdMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:LexicalSimilarityThresholdMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:MappingChaining", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:MappingReview", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:ManualMappingCuration", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:MappingInversion", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:StructuralMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:InstanceBasedMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:BackgroundKnowledgeBasedMatching", "@type": "AnonymousSlotExpression" } ], "@type": "SlotDefinition" }, { "name": "object_type", "definition_uri": "https://w3id.org/sssom/object_type", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The type of entity that is being mapped.", "examples": [ { "value": "owl:Class", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_type", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "entity_type_enum", "@type": "SlotDefinition" }, { "name": "mapping_set_id", "definition_uri": "https://w3id.org/sssom/mapping_set_id", "description": "A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv", "description": "A persistent URI pointing to the latest version of the Mondo - NCIT mapping in the Mondo namespace.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_set_id", "owner": "MappingSetReference", "domain_of": [ "MappingSet", "MappingSetReference" ], "range": "NonRelativeURI", "required": true, "@type": "SlotDefinition" }, { "name": "mapping_set_version", "definition_uri": "https://w3id.org/sssom/mapping_set_version", "description": "A version string for the mapping.", "examples": [ { "value": "2020-01-01", "description": "A date-based version that indicates that the mapping was published on the 1st January in 2021.", "@type": "Example" }, { "value": "1.2.1", "description": "(A semantic version tag that indicates that this is the 1st major, 2nd minor version, patch 1 (https://semver.org/).)", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#versionInfo" ], "slot_uri": "http://www.w3.org/2002/07/owl#versionInfo", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_group", "definition_uri": "https://w3id.org/sssom/mapping_set_group", "description": "Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_set_group", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_title", "definition_uri": "https://w3id.org/sssom/mapping_set_title", "description": "The display name of a mapping set.", "examples": [ { "value": "The Mondo-OMIM mappings by Monarch Initiative.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/title" ], "slot_uri": "http://purl.org/dc/terms/title", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_description", "definition_uri": "https://w3id.org/sssom/mapping_set_description", "description": "A description of the mapping set.", "examples": [ { "value": "This mapping set was produced to integrate human and mouse phenotype data at the IMPC. It is primarily used for making mouse phenotypes searchable by human synonyms at https://mousephenotype.org/.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/description" ], "slot_uri": "http://purl.org/dc/terms/description", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_confidence", "definition_uri": "https://w3id.org/sssom/mapping_set_confidence", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "Mapping-set level confidence is assigned by the creator of the mapping set to indicate their overall confidence in the correctness (i.e., precision) of mappings in the mapping set. Mapping set confidence is intended to be used in cases were the creator wants to express an overall confidence into the agent that curated the individual mappings, for example a lexical matching tool, or a group of students.\nWhen not explicitly specified, confidence estimation algorithms should consider the mapping set confidence to be 1.0 by default.", "examples": [ { "value": "0.95", "description": "A confidence score of 0.95, indicating 95% confidence that the mappings in the mapping set are correct.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model", "https://github.com/mapping-commons/sssom/issues/438", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/mapping_set_confidence.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/mapping_set_confidence", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "creator_id", "definition_uri": "https://w3id.org/sssom/creator_id", "description": "Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "examples": [ { "value": "orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165", "description": "The ORCID of the (multiple) creators of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `creator_id` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "orcid:0000-0002-7356-1779", "description": "The ORCID of the creator of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/creator" ], "slot_uri": "http://purl.org/dc/terms/creator", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "creator_label", "definition_uri": "https://w3id.org/sssom/creator_label", "description": "A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id.", "examples": [ { "value": "Nicolas Matentzoglu|Chris Mungall", "description": "The human-readable names of the (multiple) creators of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `creator_label` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "Nicolas Matentzoglu", "description": "The human-readable name of the creator of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/creator_label", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "author_id", "definition_uri": "https://w3id.org/sssom/author_id", "description": "Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "examples": [ { "value": "orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165", "description": "The ORCID of the (multiple) authors of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `author_id` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "orcid:0000-0002-7356-1779", "description": "The ORCID of the author of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/pav/authoredBy" ], "slot_uri": "http://purl.org/pav/authoredBy", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "author_label", "definition_uri": "https://w3id.org/sssom/author_label", "description": "A string representing the author of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in author_id) for that author. It is not expected that there should be any link between author_id and author_label; in particular, author_label is not intended to provide a human-friendly version of an identifier in author_id.", "examples": [ { "value": "Nicolas Matentzoglu|Chris Mungall", "description": "The human-readable names of the (multiple) authors of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `author_label` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "Nicolas Matentzoglu", "description": "The human-readable name of the author of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/author_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "reviewer_id", "definition_uri": "https://w3id.org/sssom/reviewer_id", "description": "Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "examples": [ { "value": "orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165", "description": "The ORCID of the (multiple) reviewers of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `reviewer_id` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "orcid:0000-0002-7356-1779", "description": "The ORCID of the reviewer of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/reviewer_id", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "reviewer_label", "definition_uri": "https://w3id.org/sssom/reviewer_label", "description": "A string representing the reviewer of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in reviewer_id) for that reviewer. It is not expected that there should be any link between reviewer_id and reviewer_label; in particular, reviewer_label is not intended to provide a human-friendly version of an identifier in reviewer_id.", "examples": [ { "value": "Nicolas Matentzoglu|Chris Mungall", "description": "The human-readable names of the (multiple) reviewers of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `reviewer_label` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "Nicolas Matentzoglu", "description": "The human-readable name of the reviewer of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/reviewer_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "license", "definition_uri": "https://w3id.org/sssom/license", "description": "A url to the license of the mapping. In absence of a license we assume no license.", "examples": [ { "value": "https://creativecommons.org/licenses/by/4.0/", "description": "The URI of the Creative Commons Attribution 4.0 International license.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/license" ], "slot_uri": "http://purl.org/dc/terms/license", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "subject_source", "definition_uri": "https://w3id.org/sssom/subject_source", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "URI of vocabulary or identifier source for the subject.", "examples": [ { "value": "obo:mondo.owl", "description": "A persistent OBO CURIE pointing to the latest version of the Mondo ontology.", "@type": "Example" }, { "value": "wikidata:Q7876491", "description": "A Wikidata identifier for the Uberon ontology resource.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_source", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "subject_source_version", "definition_uri": "https://w3id.org/sssom/subject_source_version", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Version IRI or version string of the source of the subject term.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl", "description": "A persistent Version IRI pointing to the Mondo version '2021-01-30'", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_source_version", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "object_source", "definition_uri": "https://w3id.org/sssom/object_source", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "URI of vocabulary or identifier source for the object.", "examples": [ { "value": "obo:mondo.owl", "description": "A persistent OBO CURIE pointing to the latest version of the Mondo ontology.", "@type": "Example" }, { "value": "wikidata:Q7876491", "description": "A Wikidata identifier for the Uberon ontology resource.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_source", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "object_source_version", "definition_uri": "https://w3id.org/sssom/object_source_version", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Version IRI or version string of the source of the object term.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl", "description": "A persistent Version IRI pointing to the Mondo version '2021-01-30'", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_source_version", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_provider", "definition_uri": "https://w3id.org/sssom/mapping_provider", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived.", "examples": [ { "value": "https://www.ohdsi.org/", "description": "A URL pointing to the Observational Health Data Sciences and Informatics initiative.", "@type": "Example" }, { "value": "https://monarchinitiative.org/", "description": "A URL pointing to the Monarch Initiative Resource.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_provider", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "mapping_set_source", "definition_uri": "https://w3id.org/sssom/mapping_set_source", "description": "A mapping set or set of mapping set that was used to derive the mapping set.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/mappings/2022-05-20/mondo_exactmatch_ncit.sssom.tsv", "description": "A persistent, ideally versioned, link to the mapping set from which the current mapping set is derived.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/ns/prov#wasDerivedFrom" ], "slot_uri": "http://www.w3.org/ns/prov#wasDerivedFrom", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "NonRelativeURI", "multivalued": true, "@type": "SlotDefinition" }, { "name": "mapping_source", "definition_uri": "https://w3id.org/sssom/mapping_source", "description": "The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple mapping sets or deriving one mapping set from another.", "examples": [ { "value": "MONDO_MAPPINGS:mondo_exactmatch_ncit.sssom.tsv", "description": "A reference to the mapping set that originally contained this mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_source", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "mapping_cardinality", "definition_uri": "https://w3id.org/sssom/mapping_cardinality", "description": "A value indicating whether the subject (respectively object) of this mapping record is present in other records involving a different object (respectively subject), within the subset of records defined by the cardinality_scope slot (or within the entire mapping set if cardinality_scope is undefined). Note that this is a convenience field, whose values can always be derived from the mapping set.", "examples": [ { "value": "1:1", "description": "A one-to-one mapping. There are no other records in which the same subject is mapped to a different object, and no other records in which the same object is mapped to a different subject.", "@type": "Example" }, { "value": "1:n", "description": "A one-to-many mapping. There are other records in which the same subject is mapped to at least one different object than the object present in this record; there are no other records in which the object is mapped to a different subject.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-with-unmapped-entities.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-empty.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/mapping_cardinality", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "mapping_cardinality_enum", "@type": "SlotDefinition" }, { "name": "cardinality_scope", "definition_uri": "https://w3id.org/sssom/cardinality_scope", "instantiates": [ "sssom:Propagatable", "sssom:Versionable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" }, { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined.", "examples": [ { "value": "predicate_id", "description": "Indicates that mapping_cardinality is computed relatively to all mappings that have the same predicate.", "@type": "Example" }, { "value": "predicate_id|object_source", "description": "Indicates that mapping_cardinality is computed relatively to all mappings that have the same predicate and the same object source. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots like `cardinality_scope` are represented as a single string containing `|`-separated values.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/467", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-predicate.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-predicate+object_source.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/cardinality_scope", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "mapping_tool", "definition_uri": "https://w3id.org/sssom/mapping_tool", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference.", "examples": [ { "value": "https://github.com/AgreementMakerLight/AML-Project", "description": "A URL pointing to the AgreementMakerLight project.", "@type": "Example" }, { "value": "AgreementMakerLight", "description": "A string (name) denoting the AgreementMakerLight project.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_tool", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_tool_id", "definition_uri": "https://w3id.org/sssom/mapping_tool_id", "instantiates": [ "sssom:Propagatable", "sssom:Versionable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" }, { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The ID (entity reference) of the tool or algorithm that was used to generate the mapping.", "examples": [ { "value": "wikidata:Q58057366", "description": "A wikidata PURL identifying the AgreementMakerLight project.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/blob/master/examples/schema/mapping_tool_id.sssom.tsv", "https://github.com/mapping-commons/sssom/issues/449" ], "slot_uri": "https://w3id.org/sssom/mapping_tool_id", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "mapping_tool_version", "definition_uri": "https://w3id.org/sssom/mapping_tool_version", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Version string that denotes the version of the mapping tool used.", "examples": [ { "value": "v3.2", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_tool_version", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_date", "definition_uri": "https://w3id.org/sssom/mapping_date", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file.", "examples": [ { "value": "2021-01-01", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/created" ], "slot_uri": "http://purl.org/dc/terms/created", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "date", "@type": "SlotDefinition" }, { "name": "publication_date", "definition_uri": "https://w3id.org/sssom/publication_date", "description": "The date the mapping was published. This is different from the date the mapping was asserted.", "examples": [ { "value": "2021-01-01", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/issued" ], "slot_uri": "http://purl.org/dc/terms/issued", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "date", "@type": "SlotDefinition" }, { "name": "review_date", "definition_uri": "https://w3id.org/sssom/review_date", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The date the mapping was reviewed. This is different from the date the mapping was asserted and published. If this field is used in a mapping, reviewer_id and/or reviewer_label MUST also be be set.", "examples": [ { "value": "2021-01-01", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/511", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/review_date.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/review_date", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "date", "@type": "SlotDefinition" }, { "name": "confidence", "definition_uri": "https://w3id.org/sssom/confidence", "description": "A value assigned by the creator of the mapping to denote the creator's confidence or estimated probability that the mapping record is correct. A value of 1.0 means the creator has full confidence in the correctness of the mapping record, while a value of 0.0 means the creator is fully unsure whether the mapping record is correct or not.\nWhen not explicitly specified, confidence estimation algorithms should consider the mapping confidence to be 1.0 by default.", "examples": [ { "value": "0.95", "description": "A confidence score of 0.95, indicating 95% confidence.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model" ], "slot_uri": "https://w3id.org/sssom/confidence", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "reviewer_agreement", "definition_uri": "https://w3id.org/sssom/reviewer_agreement", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A value assigned by the reviewer of the mapping to denote their confidence that the mapping record is correct. A value of 1.0 means the reviewer fully agrees with the mapping record. A value of -1.0 means the reviewer fully disagrees with the mapping record. A value of 0.0 means the reviewer is not sure whether the mapping record is correct or not.\nWhen not explicitly specified, confidence estimation algorithms should consider the reviewer agreement to be 1.0 by default.", "examples": [ { "value": "1.0", "description": "A reviewer agreement of 1.0 denotes that the reviewer considers the mapping record to be correct with full confidence", "@type": "Example" }, { "value": "-1.0", "description": "A reviewer agreement of -1.0 denotes that the reviewer considers the mapping record to be incorrect with full confidence", "@type": "Example" }, { "value": "0.0", "description": "A reviewer agreement of 0.0 denotes that the reviewer is not sure whether the mapping record is correct or not.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model", "https://github.com/mapping-commons/sssom/issues/510", "https://github.com/mapping-commons/sssom/pull/519" ], "slot_uri": "https://w3id.org/sssom/reviewer_agreement", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "double", "minimum_value": -1.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "subject_match_field", "definition_uri": "https://w3id.org/sssom/subject_match_field", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "examples": [ { "value": "rdfs:label", "description": "The RDFS label property (rdfs:label) was used to match the subject.", "@type": "Example" }, { "value": "skos:prefLabel", "description": "The SKOS preferred label property (skos:prefLabel) was used to match the subject.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/mapping-justifications/#lexical-matching", "https://github.com/mapping-commons/sssom/issues/413" ], "slot_uri": "https://w3id.org/sssom/subject_match_field", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "object_match_field", "definition_uri": "https://w3id.org/sssom/object_match_field", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "examples": [ { "value": "rdfs:label", "description": "The RDFS label property (rdfs:label) was used to match the object.", "@type": "Example" }, { "value": "skos:prefLabel", "description": "The SKOS preferred label property (skos:prefLabel) was used to match the object.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/mapping-justifications/#lexical-matching", "https://github.com/mapping-commons/sssom/issues/413" ], "slot_uri": "https://w3id.org/sssom/object_match_field", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "match_string", "definition_uri": "https://w3id.org/sssom/match_string", "description": "String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots.", "examples": [ { "value": "gala", "description": "The 'gala' string was matched for both subject and object.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/match_string", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "subject_preprocessing", "definition_uri": "https://w3id.org/sssom/subject_preprocessing", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "examples": [ { "value": "semapv:Stemming", "@type": "Example" }, { "value": "semapv:StopWordRemoval", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_preprocessing", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "object_preprocessing", "definition_uri": "https://w3id.org/sssom/object_preprocessing", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "examples": [ { "value": "semapv:Stemming", "@type": "Example" }, { "value": "semapv:StopWordRemoval", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_preprocessing", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "curation_rule", "definition_uri": "https://w3id.org/sssom/curation_rule", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule.", "examples": [ { "value": "DISEASE_MAPPING_COMMONS_RULES:MPR2", "description": "A reference to the Disease Mapping Commons rule with the ID MPR2.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule-propagated.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/curation_rule", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "curation_rule_text", "definition_uri": "https://w3id.org/sssom/curation_rule_text", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider.", "examples": [ { "value": "The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality.", "@type": "Example" }, { "value": "The two diseases are used synonymous in the medical literature.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text-propagated.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/curation_rule_text", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "similarity_score", "definition_uri": "https://w3id.org/sssom/similarity_score", "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.", "examples": [ { "value": "0.95", "description": "A similarity score of 0.95, indicating 95% similarity.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/385", "https://github.com/mapping-commons/sssom/pull/386", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/similarity_score", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "similarity_measure", "definition_uri": "https://w3id.org/sssom/similarity_measure", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.", "examples": [ { "value": "https://www.wikidata.org/entity/Q865360", "description": "the Wikidata IRI for the Jaccard index measure).", "@type": "Example" }, { "value": "wikidata:Q865360", "description": "the Wikidata CURIE for the Jaccard index measure).", "@type": "Example" }, { "value": "Levenshtein distance", "description": "a score to measure the distance between two character sequences).", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/385", "https://github.com/mapping-commons/sssom/pull/386", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/similarity_measure", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "issue_tracker_item", "definition_uri": "https://w3id.org/sssom/issue_tracker_item", "description": "The issue tracker item discussing this mapping.", "examples": [ { "value": "SSSOM_GITHUB_ISSUE:166", "description": "A URL resolving to an issue discussing a new SSSOM element request", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/78", "https://github.com/mapping-commons/sssom/pull/259", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker_item.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/issue_tracker_item", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "issue_tracker", "definition_uri": "https://w3id.org/sssom/issue_tracker", "description": "A URL location of the issue tracker for this entity.", "examples": [ { "value": "https://github.com/mapping-commons/mh_mapping_initiative/issues", "description": "A URL resolving to the issue tracker of the Mouse-Human mapping initiative", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/78", "https://github.com/mapping-commons/sssom/pull/259", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/issue_tracker", "owner": "MappingRegistry", "domain_of": [ "MappingSet", "MappingRegistry" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "see_also", "definition_uri": "https://w3id.org/sssom/see_also", "description": "A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment", "examples": [ { "value": "https://github.com/mapping-commons/mh_mapping_initiative/pull/41", "description": "A URL pointing to the pull request that introduced the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/422" ], "mappings": [ "http://www.w3.org/2000/01/rdf-schema#seeAlso" ], "slot_uri": "http://www.w3.org/2000/01/rdf-schema#seeAlso", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "NonRelativeURI", "multivalued": true, "@type": "SlotDefinition" }, { "name": "other", "definition_uri": "https://w3id.org/sssom/other", "description": "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/other", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "comment", "definition_uri": "https://w3id.org/sssom/comment", "description": "Free text field containing either curator notes or text generated by tool providing additional informative information.", "examples": [ { "value": "This mapping is weird in that the hierarchical position of the two terms is very different.", "description": "A comment explaining a mapping authors reservation on a mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2000/01/rdf-schema#comment" ], "slot_uri": "http://www.w3.org/2000/01/rdf-schema#comment", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "extension_definitions", "definition_uri": "https://w3id.org/sssom/extension_definitions", "description": "A list that defines the extension slots used in the mapping set.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/328", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/extension_definitions", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "ExtensionDefinition", "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "record_id", "definition_uri": "https://w3id.org/sssom/record_id", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A unique identifier for a mapping record, that is for an instance of the Mapping class (in the SSSOM/TSV serialisation, this corresponds to an individual row after propagation is applied). This slot is intended to uniquely identify one such record within a mapping set and may for example act as the resource identifier for the record when it is serialised into RDF. This slot MUST NOT be used to \u201cgroup\u201d several records together to indicate that they pertain to a single mapping (for example, that they represent different versions of the same mapping), by assigning the same ID to several records. When it is used, every record within a set MUST have a unique, non-empty value. The identifier MUST be a URI; beyond that, its format is unconstrained and the identifier MUST be treated as an opaque string.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/359", "https://github.com/mapping-commons/blob/master/examples/schema/record-ids.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/record_id", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "extensionDefinition__slot_name", "description": "The name of the extension slot.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/slot_name", "alias": "slot_name", "owner": "ExtensionDefinition", "domain_of": [ "ExtensionDefinition" ], "range": "ncname", "required": true, "@type": "SlotDefinition" }, { "name": "extensionDefinition__property", "description": "The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous).", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/property", "alias": "property", "owner": "ExtensionDefinition", "domain_of": [ "ExtensionDefinition" ], "range": "uriorcurie", "@type": "SlotDefinition" }, { "name": "extensionDefinition__type_hint", "description": "Expected type of the values of the extension slot.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/type_hint", "alias": "type_hint", "owner": "ExtensionDefinition", "domain_of": [ "ExtensionDefinition" ], "range": "uriorcurie", "@type": "SlotDefinition" }, { "name": "propagatable__propagated", "description": "Indicates whether a slot can be propagated from a mapping down to individual mappings.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/propagated", "alias": "propagated", "owner": "Propagatable", "domain_of": [ "Propagatable" ], "range": "boolean", "@type": "SlotDefinition" }, { "name": "versionable__added_in", "description": "The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/added_in", "alias": "added_in", "owner": "Versionable", "domain_of": [ "Versionable" ], "range": "sssom_version_enum", "@type": "SlotDefinition" }, { "name": "mapping_set_license", "definition_uri": "https://w3id.org/sssom/license", "description": "A url to the license of the mapping. In absence of a license we assume no license.", "examples": [ { "value": "https://creativecommons.org/licenses/by/4.0/", "description": "The URI of the Creative Commons Attribution 4.0 International license.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/license" ], "is_a": "license", "domain": "MappingSet", "slot_uri": "http://purl.org/dc/terms/license", "alias": "license", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "license", "range": "NonRelativeURI", "required": true, "@type": "SlotDefinition" }, { "name": "mapping_set_similarity_measure", "definition_uri": "https://w3id.org/sssom/similarity_measure", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.", "examples": [ { "value": "https://www.wikidata.org/entity/Q865360", "description": "the Wikidata IRI for the Jaccard index measure).", "@type": "Example" }, { "value": "wikidata:Q865360", "description": "the Wikidata CURIE for the Jaccard index measure).", "@type": "Example" }, { "value": "Levenshtein distance", "description": "a score to measure the distance between two character sequences).", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/385", "https://github.com/mapping-commons/sssom/pull/386", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv" ], "is_a": "similarity_measure", "domain": "MappingSet", "slot_uri": "https://w3id.org/sssom/similarity_measure", "alias": "similarity_measure", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "similarity_measure", "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_curation_rule", "definition_uri": "https://w3id.org/sssom/curation_rule", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule.", "examples": [ { "value": "DISEASE_MAPPING_COMMONS_RULES:MPR2", "description": "A reference to the Disease Mapping Commons rule with the ID MPR2.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule-propagated.sssom.tsv" ], "is_a": "curation_rule", "domain": "MappingSet", "slot_uri": "https://w3id.org/sssom/curation_rule", "alias": "curation_rule", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "curation_rule", "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "mapping_set_curation_rule_text", "definition_uri": "https://w3id.org/sssom/curation_rule_text", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider.", "examples": [ { "value": "The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality.", "@type": "Example" }, { "value": "The two diseases are used synonymous in the medical literature.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text-propagated.sssom.tsv" ], "is_a": "curation_rule_text", "domain": "MappingSet", "slot_uri": "https://w3id.org/sssom/curation_rule_text", "alias": "curation_rule_text", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "curation_rule_text", "range": "string", "multivalued": true, "@type": "SlotDefinition" } ], "classes": [ { "name": "MappingSet", "definition_uri": "https://w3id.org/sssom/MappingSet", "description": "Represents a set of mappings.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "sssom_version", "curie_map", "mappings", "mapping_set_id", "mapping_set_version", "mapping_set_source", "mapping_set_title", "mapping_set_description", "mapping_set_confidence", "creator_id", "creator_label", "mapping_set_license", "subject_type", "subject_source", "subject_source_version", "object_type", "object_source", "object_source_version", "predicate_type", "mapping_provider", "cardinality_scope", "mapping_tool", "mapping_tool_id", "mapping_tool_version", "mapping_date", "publication_date", "subject_match_field", "object_match_field", "subject_preprocessing", "object_preprocessing", "mapping_set_similarity_measure", "mapping_set_curation_rule", "mapping_set_curation_rule_text", "see_also", "issue_tracker", "other", "comment", "extension_definitions" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/MappingSet", "@type": "ClassDefinition" }, { "name": "Mapping", "definition_uri": "https://w3id.org/sssom/Mapping", "description": "Represents an individual mapping between a pair of entities.", "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "owl:Axiom" ], "slots": [ "record_id", "subject_id", "subject_label", "subject_category", "predicate_id", "predicate_label", "predicate_modifier", "object_id", "object_label", "object_category", "mapping_justification", "author_id", "author_label", "reviewer_id", "reviewer_label", "creator_id", "creator_label", "license", "subject_type", "subject_source", "subject_source_version", "object_type", "object_source", "object_source_version", "predicate_type", "mapping_provider", "mapping_source", "mapping_cardinality", "cardinality_scope", "mapping_tool", "mapping_tool_id", "mapping_tool_version", "mapping_date", "publication_date", "review_date", "confidence", "reviewer_agreement", "curation_rule", "curation_rule_text", "subject_match_field", "object_match_field", "match_string", "subject_preprocessing", "object_preprocessing", "similarity_score", "similarity_measure", "see_also", "issue_tracker_item", "other", "comment" ], "slot_usage": {}, "class_uri": "http://www.w3.org/2002/07/owl#Axiom", "unique_keys": [ { "unique_key_name": "record_identifier", "unique_key_slots": [ "record_id" ], "description": "Each mapping within a mapping set MAY be identified by a unique, opaque record identifier. This slot MUST be used consistently, in that either all mappings in the set have a such a record identifier, or none of them have one. The behaviour when a set contains both mappings with a record identifier and mappings without a record identifier is unspecified. The behaviour when two mappings have the same record identifier is unspecified.", "@type": "UniqueKey" } ], "rules": [ { "preconditions": { "slot_conditions": [ { "name": "subject_type", "equals_string": "rdfs literal", "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "subject_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "subject_type", "none_of": [ { "equals_string": "rdfs literal", "@type": "AnonymousSlotExpression" } ], "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "subject_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "object_type", "equals_string": "rdfs literal", "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "object_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "object_type", "none_of": [ { "equals_string": "rdfs literal", "@type": "AnonymousSlotExpression" } ], "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "object_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "review_date", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "any_of": [ { "slot_conditions": [ { "name": "reviewer_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, { "slot_conditions": [ { "name": "reviewer_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" } ], "@type": "AnonymousClassExpression" }, "description": "If a review date is provided, then at at least one of reviewer_id or reviewer_label must also be provided", "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "reviewer_agreement", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "any_of": [ { "slot_conditions": [ { "name": "reviewer_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, { "slot_conditions": [ { "name": "reviewer_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" } ], "@type": "AnonymousClassExpression" }, "description": "If a reviewer agreement value is provided, then at at least one of reviewer_id or reviewer_label must also be provided", "@type": "ClassRule" } ], "@type": "ClassDefinition" }, { "name": "MappingRegistry", "definition_uri": "https://w3id.org/sssom/MappingRegistry", "description": "A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "mapping_registry_id", "mapping_registry_title", "mapping_registry_description", "imports", "mapping_set_references", "documentation", "homepage", "issue_tracker" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/MappingRegistry", "@type": "ClassDefinition" }, { "name": "MappingSetReference", "definition_uri": "https://w3id.org/sssom/MappingSetReference", "description": "A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "mapping_set_id", "mirror_from", "registry_confidence", "mapping_set_group", "last_updated", "local_name" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/MappingSetReference", "@type": "ClassDefinition" }, { "name": "Prefix", "definition_uri": "https://w3id.org/sssom/Prefix", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "prefix_name", "prefix_url" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/Prefix", "@type": "ClassDefinition" }, { "name": "ExtensionDefinition", "definition_uri": "https://w3id.org/sssom/ExtensionDefinition", "description": "A definition of an extension (non-standard) slot.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "extensionDefinition__slot_name", "extensionDefinition__property", "extensionDefinition__type_hint" ], "slot_usage": {}, "attributes": [ { "name": "slot_name", "description": "The name of the extension slot.", "range": "ncname", "required": true, "@type": "SlotDefinition" }, { "name": "property", "description": "The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous).", "range": "uriorcurie", "@type": "SlotDefinition" }, { "name": "type_hint", "description": "Expected type of the values of the extension slot.", "range": "uriorcurie", "@type": "SlotDefinition" } ], "class_uri": "https://w3id.org/sssom/ExtensionDefinition", "@type": "ClassDefinition" }, { "name": "Propagatable", "definition_uri": "https://w3id.org/sssom/Propagatable", "description": "Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/305" ], "mappings": [ "sssom:Propagatable" ], "slots": [ "propagatable__propagated" ], "slot_usage": {}, "attributes": [ { "name": "propagated", "description": "Indicates whether a slot can be propagated from a mapping down to individual mappings.", "range": "boolean", "@type": "SlotDefinition" } ], "class_uri": "https://w3id.org/sssom/Propagatable", "@type": "ClassDefinition" }, { "name": "Versionable", "definition_uri": "https://w3id.org/sssom/Versionable", "description": "Metamodel extension class to manage slots that may not exist in all versions of the model.", "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "sssom:Versionable" ], "slots": [ "versionable__added_in" ], "slot_usage": {}, "attributes": [ { "name": "added_in", "description": "The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0.", "range": "sssom_version_enum", "@type": "SlotDefinition" } ], "class_uri": "https://w3id.org/sssom/Versionable", "@type": "ClassDefinition" }, { "name": "NoTermFound", "definition_uri": "https://w3id.org/sssom/NoTermFound", "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/28", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/no_term_found.sssom.tsv" ], "mappings": [ "sssom:NoTermFound" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/NoTermFound", "@type": "ClassDefinition" } ], "metamodel_version": "1.7.0", "source_file": "sssom_schema.yaml", "source_file_date": "2026-04-04T08:06:51", "source_file_size": 58930, "generation_date": "2026-04-14T16:00:32", "@type": "SchemaDefinition", "@context": [ "project/jsonld/sssom_schema.context.jsonld", "https://w3id.org/linkml/types.context.jsonld", { "@base": "https://w3id.org/sssom/" } ] } ================================================ FILE: project/jsonschema/sssom_schema.schema.json ================================================ { "$defs": { "EntityTypeEnum": { "description": "", "enum": [ "owl class", "owl object property", "owl data property", "owl annotation property", "owl named individual", "skos concept", "rdfs resource", "rdfs class", "rdfs literal", "rdfs datatype", "rdf property", "composed entity expression" ], "title": "EntityTypeEnum", "type": "string" }, "ExtensionDefinition": { "additionalProperties": false, "description": "A definition of an extension (non-standard) slot.", "properties": { "property": { "description": "The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous).", "type": [ "string", "null" ] }, "slot_name": { "description": "The name of the extension slot.", "type": "string" }, "type_hint": { "description": "Expected type of the values of the extension slot.", "type": [ "string", "null" ] } }, "required": [ "slot_name" ], "title": "ExtensionDefinition", "type": "object" }, "Mapping": { "additionalProperties": false, "allOf": [ { "if": { "properties": { "subject_type": { "const": "rdfs literal" } }, "required": [ "subject_type" ] }, "then": { "properties": { "subject_label": {} }, "required": [ "subject_label" ] } }, { "if": { "properties": { "subject_type": { "not": { "anyOf": [ { "const": "rdfs literal" } ] } } }, "required": [ "subject_type" ] }, "then": { "properties": { "subject_id": {} }, "required": [ "subject_id" ] } }, { "if": { "properties": { "object_type": { "const": "rdfs literal" } }, "required": [ "object_type" ] }, "then": { "properties": { "object_label": {} }, "required": [ "object_label" ] } }, { "if": { "properties": { "object_type": { "not": { "anyOf": [ { "const": "rdfs literal" } ] } } }, "required": [ "object_type" ] }, "then": { "properties": { "object_id": {} }, "required": [ "object_id" ] } }, { "if": { "properties": { "review_date": {} }, "required": [ "review_date" ] }, "then": { "anyOf": [ { "properties": { "reviewer_id": {} }, "required": [ "reviewer_id" ] }, { "properties": { "reviewer_label": {} }, "required": [ "reviewer_label" ] } ] } }, { "if": { "properties": { "reviewer_agreement": {} }, "required": [ "reviewer_agreement" ] }, "then": { "anyOf": [ { "properties": { "reviewer_id": {} }, "required": [ "reviewer_id" ] }, { "properties": { "reviewer_label": {} }, "required": [ "reviewer_label" ] } ] } } ], "description": "Represents an individual mapping between a pair of entities.", "properties": { "author_id": { "description": "Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "author_label": { "description": "A string representing the author of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in author_id) for that author. It is not expected that there should be any link between author_id and author_label; in particular, author_label is not intended to provide a human-friendly version of an identifier in author_id.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "cardinality_scope": { "description": "A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "comment": { "description": "Free text field containing either curator notes or text generated by tool providing additional informative information.", "type": [ "string", "null" ] }, "confidence": { "description": "A value assigned by the creator of the mapping to denote the creator's confidence or estimated probability that the mapping record is correct. A value of 1.0 means the creator has full confidence in the correctness of the mapping record, while a value of 0.0 means the creator is fully unsure whether the mapping record is correct or not.\nWhen not explicitly specified, confidence estimation algorithms should consider the mapping confidence to be 1.0 by default.", "maximum": 1.0, "minimum": 0.0, "type": [ "number", "null" ] }, "creator_id": { "description": "Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "creator_label": { "description": "A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "curation_rule": { "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "curation_rule_text": { "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "issue_tracker_item": { "description": "The issue tracker item discussing this mapping.", "type": [ "string", "null" ] }, "license": { "description": "A url to the license of the mapping. In absence of a license we assume no license.", "type": [ "string", "null" ] }, "mapping_cardinality": { "$ref": "#/$defs/MappingCardinalityEnum", "description": "A value indicating whether the subject (respectively object) of this mapping record is present in other records involving a different object (respectively subject), within the subset of records defined by the cardinality_scope slot (or within the entire mapping set if cardinality_scope is undefined). Note that this is a convenience field, whose values can always be derived from the mapping set." }, "mapping_date": { "description": "The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file.", "format": "date", "type": [ "string", "null" ] }, "mapping_justification": { "anyOf": [ { "const": "semapv:LexicalMatching" }, { "const": "semapv:LogicalReasoning" }, { "const": "semapv:CompositeMatching" }, { "const": "semapv:UnspecifiedMatching" }, { "const": "semapv:SemanticSimilarityThresholdMatching" }, { "const": "semapv:LexicalSimilarityThresholdMatching" }, { "const": "semapv:MappingChaining" }, { "const": "semapv:MappingReview" }, { "const": "semapv:ManualMappingCuration" }, { "const": "semapv:MappingInversion" }, { "const": "semapv:StructuralMatching" }, { "const": "semapv:InstanceBasedMatching" }, { "const": "semapv:BackgroundKnowledgeBasedMatching" } ], "description": "A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable.", "pattern": "^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining|MappingInversion|StructuralMatching|InstanceBasedMatching|BackgroundKnowledgeBasedMatching)$", "type": "string" }, "mapping_provider": { "description": "URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived.", "type": [ "string", "null" ] }, "mapping_source": { "description": "The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple mapping sets or deriving one mapping set from another.", "type": [ "string", "null" ] }, "mapping_tool": { "description": "A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference.", "type": [ "string", "null" ] }, "mapping_tool_id": { "description": "The ID (entity reference) of the tool or algorithm that was used to generate the mapping.", "type": [ "string", "null" ] }, "mapping_tool_version": { "description": "Version string that denotes the version of the mapping tool used.", "type": [ "string", "null" ] }, "match_string": { "description": "String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "object_category": { "description": "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases.", "type": [ "string", "null" ] }, "object_id": { "description": "The ID of the object of the mapping.", "type": [ "string", "null" ] }, "object_label": { "description": "The label of object of the mapping.", "type": [ "string", "null" ] }, "object_match_field": { "description": "A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "object_preprocessing": { "description": "Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "object_source": { "description": "URI of vocabulary or identifier source for the object.", "type": [ "string", "null" ] }, "object_source_version": { "description": "Version IRI or version string of the source of the object term.", "type": [ "string", "null" ] }, "object_type": { "$ref": "#/$defs/EntityTypeEnum", "description": "The type of entity that is being mapped." }, "other": { "description": "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots.", "type": [ "string", "null" ] }, "predicate_id": { "description": "The ID of the predicate or relation that relates the subject and object of this match.", "type": "string" }, "predicate_label": { "description": "The label of the predicate/relation of the mapping.", "type": [ "string", "null" ] }, "predicate_modifier": { "$ref": "#/$defs/PredicateModifierEnum", "description": "A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion" }, "predicate_type": { "$ref": "#/$defs/EntityTypeEnum", "description": "The type of the predicate used to map the subject and object entities." }, "publication_date": { "description": "The date the mapping was published. This is different from the date the mapping was asserted.", "format": "date", "type": [ "string", "null" ] }, "record_id": { "description": "A unique identifier for a mapping record, that is for an instance of the Mapping class (in the SSSOM/TSV serialisation, this corresponds to an individual row after propagation is applied). This slot is intended to uniquely identify one such record within a mapping set and may for example act as the resource identifier for the record when it is serialised into RDF. This slot MUST NOT be used to \u201cgroup\u201d several records together to indicate that they pertain to a single mapping (for example, that they represent different versions of the same mapping), by assigning the same ID to several records. When it is used, every record within a set MUST have a unique, non-empty value. The identifier MUST be a URI; beyond that, its format is unconstrained and the identifier MUST be treated as an opaque string.", "type": [ "string", "null" ] }, "review_date": { "description": "The date the mapping was reviewed. This is different from the date the mapping was asserted and published. If this field is used in a mapping, reviewer_id and/or reviewer_label MUST also be be set.", "format": "date", "type": [ "string", "null" ] }, "reviewer_agreement": { "description": "A value assigned by the reviewer of the mapping to denote their confidence that the mapping record is correct. A value of 1.0 means the reviewer fully agrees with the mapping record. A value of -1.0 means the reviewer fully disagrees with the mapping record. A value of 0.0 means the reviewer is not sure whether the mapping record is correct or not.\nWhen not explicitly specified, confidence estimation algorithms should consider the reviewer agreement to be 1.0 by default.", "maximum": 1.0, "minimum": -1.0, "type": [ "number", "null" ] }, "reviewer_id": { "description": "Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "reviewer_label": { "description": "A string representing the reviewer of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in reviewer_id) for that reviewer. It is not expected that there should be any link between reviewer_id and reviewer_label; in particular, reviewer_label is not intended to provide a human-friendly version of an identifier in reviewer_id.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "see_also": { "description": "A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment", "items": { "type": "string" }, "type": [ "array", "null" ] }, "similarity_measure": { "description": "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.", "type": [ "string", "null" ] }, "similarity_score": { "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.", "maximum": 1.0, "minimum": 0.0, "type": [ "number", "null" ] }, "subject_category": { "description": "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases.", "type": [ "string", "null" ] }, "subject_id": { "description": "The ID of the subject of the mapping.", "type": [ "string", "null" ] }, "subject_label": { "description": "The label of subject of the mapping.", "type": [ "string", "null" ] }, "subject_match_field": { "description": "A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "subject_preprocessing": { "description": "Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "subject_source": { "description": "URI of vocabulary or identifier source for the subject.", "type": [ "string", "null" ] }, "subject_source_version": { "description": "Version IRI or version string of the source of the subject term.", "type": [ "string", "null" ] }, "subject_type": { "$ref": "#/$defs/EntityTypeEnum", "description": "The type of entity that is being mapped." } }, "required": [ "predicate_id", "mapping_justification" ], "title": "Mapping", "type": "object" }, "MappingCardinalityEnum": { "description": "", "enum": [ "1:1", "1:n", "n:1", "n:n", "1:0", "0:1", "0:0" ], "title": "MappingCardinalityEnum", "type": "string" }, "MappingRegistry": { "additionalProperties": false, "description": "A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries.", "properties": { "documentation": { "description": "A URL to the documentation of this mapping commons.", "type": [ "string", "null" ] }, "homepage": { "description": "A URL to a homepage of this mapping commons.", "type": [ "string", "null" ] }, "imports": { "description": "A list of registries that should be imported into this one.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "issue_tracker": { "description": "A URL location of the issue tracker for this entity.", "type": [ "string", "null" ] }, "mapping_registry_description": { "description": "The description of a mapping registry.", "type": [ "string", "null" ] }, "mapping_registry_id": { "description": "The unique identifier of a mapping registry.", "type": "string" }, "mapping_registry_title": { "description": "The title of a mapping registry.", "type": [ "string", "null" ] }, "mapping_set_references": { "description": "A list of mapping set references.", "items": { "$ref": "#/$defs/MappingSetReference" }, "type": [ "array", "null" ] } }, "required": [ "mapping_registry_id" ], "title": "MappingRegistry", "type": "object" }, "MappingSet": { "additionalProperties": false, "description": "Represents a set of mappings.", "properties": { "cardinality_scope": { "description": "A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "comment": { "description": "Free text field containing either curator notes or text generated by tool providing additional informative information.", "type": [ "string", "null" ] }, "creator_id": { "description": "Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "creator_label": { "description": "A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "curation_rule": { "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "curation_rule_text": { "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "curie_map": { "additionalProperties": { "anyOf": [ { "$ref": "#/$defs/Prefix__identifier_optional" }, { "type": "string" }, { "type": "null" } ] }, "description": "A dictionary that contains prefixes as keys and their URI expansions as values.", "type": [ "object", "null" ] }, "extension_definitions": { "description": "A list that defines the extension slots used in the mapping set.", "items": { "$ref": "#/$defs/ExtensionDefinition" }, "type": [ "array", "null" ] }, "issue_tracker": { "description": "A URL location of the issue tracker for this entity.", "type": [ "string", "null" ] }, "license": { "description": "A url to the license of the mapping. In absence of a license we assume no license.", "type": "string" }, "mapping_date": { "description": "The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file.", "format": "date", "type": [ "string", "null" ] }, "mapping_provider": { "description": "URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived.", "type": [ "string", "null" ] }, "mapping_set_confidence": { "description": "Mapping-set level confidence is assigned by the creator of the mapping set to indicate their overall confidence in the correctness (i.e., precision) of mappings in the mapping set. Mapping set confidence is intended to be used in cases were the creator wants to express an overall confidence into the agent that curated the individual mappings, for example a lexical matching tool, or a group of students.\nWhen not explicitly specified, confidence estimation algorithms should consider the mapping set confidence to be 1.0 by default.", "maximum": 1.0, "minimum": 0.0, "type": [ "number", "null" ] }, "mapping_set_description": { "description": "A description of the mapping set.", "type": [ "string", "null" ] }, "mapping_set_id": { "description": "A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable.", "type": "string" }, "mapping_set_source": { "description": "A mapping set or set of mapping set that was used to derive the mapping set.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "mapping_set_title": { "description": "The display name of a mapping set.", "type": [ "string", "null" ] }, "mapping_set_version": { "description": "A version string for the mapping.", "type": [ "string", "null" ] }, "mapping_tool": { "description": "A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference.", "type": [ "string", "null" ] }, "mapping_tool_id": { "description": "The ID (entity reference) of the tool or algorithm that was used to generate the mapping.", "type": [ "string", "null" ] }, "mapping_tool_version": { "description": "Version string that denotes the version of the mapping tool used.", "type": [ "string", "null" ] }, "mappings": { "description": "Contains a list of mapping objects.", "items": { "$ref": "#/$defs/Mapping" }, "type": [ "array", "null" ] }, "object_match_field": { "description": "A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "object_preprocessing": { "description": "Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "object_source": { "description": "URI of vocabulary or identifier source for the object.", "type": [ "string", "null" ] }, "object_source_version": { "description": "Version IRI or version string of the source of the object term.", "type": [ "string", "null" ] }, "object_type": { "$ref": "#/$defs/EntityTypeEnum", "description": "The type of entity that is being mapped." }, "other": { "description": "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots.", "type": [ "string", "null" ] }, "predicate_type": { "$ref": "#/$defs/EntityTypeEnum", "description": "The type of the predicate used to map the subject and object entities." }, "publication_date": { "description": "The date the mapping was published. This is different from the date the mapping was asserted.", "format": "date", "type": [ "string", "null" ] }, "see_also": { "description": "A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment", "items": { "type": "string" }, "type": [ "array", "null" ] }, "similarity_measure": { "description": "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.", "type": [ "string", "null" ] }, "sssom_version": { "$ref": "#/$defs/SssomVersionEnum", "description": "The version of the SSSOM specification a mapping set is compliant with." }, "subject_match_field": { "description": "A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "subject_preprocessing": { "description": "Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "items": { "type": "string" }, "type": [ "array", "null" ] }, "subject_source": { "description": "URI of vocabulary or identifier source for the subject.", "type": [ "string", "null" ] }, "subject_source_version": { "description": "Version IRI or version string of the source of the subject term.", "type": [ "string", "null" ] }, "subject_type": { "$ref": "#/$defs/EntityTypeEnum", "description": "The type of entity that is being mapped." } }, "required": [ "mapping_set_id", "license" ], "title": "MappingSet", "type": "object" }, "MappingSetReference": { "additionalProperties": false, "description": "A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping.", "properties": { "last_updated": { "description": "The date this reference was last updated.", "format": "date", "type": [ "string", "null" ] }, "local_name": { "description": "The local name assigned to file that corresponds to the downloaded mapping set.", "type": [ "string", "null" ] }, "mapping_set_group": { "description": "Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes.", "type": [ "string", "null" ] }, "mapping_set_id": { "description": "A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable.", "type": "string" }, "mirror_from": { "description": "A URL location from which to obtain a resource, such as a mapping set.", "type": [ "string", "null" ] }, "registry_confidence": { "description": "This value is set by the creator/maintainer of the mapping registry and reflects the confidence the mapping registry has in the correctness (i.e., precision) of mappings in the mapping set.\nWhen not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default.", "maximum": 1.0, "minimum": 0.0, "type": [ "number", "null" ] } }, "required": [ "mapping_set_id" ], "title": "MappingSetReference", "type": "object" }, "NoTermFound": { "additionalProperties": false, "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found.", "title": "NoTermFound", "type": "object" }, "PredicateModifierEnum": { "description": "", "enum": [ "Not" ], "title": "PredicateModifierEnum", "type": "string" }, "Prefix": { "additionalProperties": false, "description": "", "properties": { "prefix_name": { "type": [ "string", "null" ] }, "prefix_url": { "type": [ "string", "null" ] } }, "required": [ "prefix_name" ], "title": "Prefix", "type": "object" }, "Prefix__identifier_optional": { "additionalProperties": false, "description": "", "properties": { "prefix_name": { "type": [ "string", "null" ] }, "prefix_url": { "type": [ "string", "null" ] } }, "required": [], "title": "Prefix", "type": "object" }, "Propagatable": { "additionalProperties": false, "description": "Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class.", "properties": { "propagated": { "description": "Indicates whether a slot can be propagated from a mapping down to individual mappings.", "type": [ "boolean", "null" ] } }, "title": "Propagatable", "type": "object" }, "SssomVersionEnum": { "description": "", "enum": [ "1.0", "1.1" ], "title": "SssomVersionEnum", "type": "string" }, "Versionable": { "additionalProperties": false, "description": "Metamodel extension class to manage slots that may not exist in all versions of the model.", "properties": { "added_in": { "$ref": "#/$defs/SssomVersionEnum", "description": "The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0." } }, "title": "Versionable", "type": "object" } }, "$id": "https://w3id.org/sssom/schema/", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "metamodel_version": "1.7.0", "title": "sssom", "type": "object", "version": null } ================================================ FILE: project/prefixmap/sssom_schema.yaml ================================================ { "dcterms": "http://purl.org/dc/terms/", "linkml": "https://w3id.org/linkml/", "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", "owl": "http://www.w3.org/2002/07/owl#", "pav": "http://purl.org/pav/", "prov": "http://www.w3.org/ns/prov#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "semapv": "https://w3id.org/semapv/vocab/", "skos": "http://www.w3.org/2004/02/skos/core#", "sssom": "https://w3id.org/sssom/", "xsd": "http://www.w3.org/2001/XMLSchema#", "Mapping": { "@id": "owl:Axiom" } } ================================================ FILE: project/protobuf/sssom_schema.proto ================================================ syntax="proto3"; package // metamodel_version: 1.7.0 // A definition of an extension (non-standard) slot. message ExtensionDefinition { ncname slotName = 0 uriorcurie property = 0 uriorcurie typeHint = 0 } // Represents an individual mapping between a pair of entities. message Mapping { entityReference recordId = 0 entityReference subjectId = 0 string subjectLabel = 0 string subjectCategory = 0 entityReference predicateId = 0 string predicateLabel = 0 predicateModifierEnum predicateModifier = 0 entityReference objectId = 0 string objectLabel = 0 string objectCategory = 0 entityReference mappingJustification = 0 repeated entityReference authorId = 0 repeated string authorLabel = 0 repeated entityReference reviewerId = 0 repeated string reviewerLabel = 0 repeated entityReference creatorId = 0 repeated string creatorLabel = 0 nonRelativeURI license = 0 entityTypeEnum subjectType = 0 entityReference subjectSource = 0 string subjectSourceVersion = 0 entityTypeEnum objectType = 0 entityReference objectSource = 0 string objectSourceVersion = 0 entityTypeEnum predicateType = 0 nonRelativeURI mappingProvider = 0 entityReference mappingSource = 0 mappingCardinalityEnum mappingCardinality = 0 repeated string cardinalityScope = 0 string mappingTool = 0 entityReference mappingToolId = 0 string mappingToolVersion = 0 date mappingDate = 0 date publicationDate = 0 date reviewDate = 0 double confidence = 0 double reviewerAgreement = 0 repeated entityReference curationRule = 0 repeated string curationRuleText = 0 repeated entityReference subjectMatchField = 0 repeated entityReference objectMatchField = 0 repeated string matchString = 0 repeated entityReference subjectPreprocessing = 0 repeated entityReference objectPreprocessing = 0 double similarityScore = 0 string similarityMeasure = 0 repeated nonRelativeURI seeAlso = 0 entityReference issueTrackerItem = 0 string other = 0 string comment = 0 } // A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries. message MappingRegistry { entityReference mappingRegistryId = 0 string mappingRegistryTitle = 0 string mappingRegistryDescription = 0 repeated nonRelativeURI imports = 0 repeated mappingSetReference mappingSetReferences = 0 nonRelativeURI documentation = 0 nonRelativeURI homepage = 0 nonRelativeURI issueTracker = 0 } // Represents a set of mappings. message MappingSet { sssomVersionEnum sssomVersion = 0 repeated prefix curieMap = 0 repeated mapping mappings = 0 nonRelativeURI mappingSetId = 0 string mappingSetVersion = 0 repeated nonRelativeURI mappingSetSource = 0 string mappingSetTitle = 0 string mappingSetDescription = 0 double mappingSetConfidence = 0 repeated entityReference creatorId = 0 repeated string creatorLabel = 0 nonRelativeURI license = 0 entityTypeEnum subjectType = 0 entityReference subjectSource = 0 string subjectSourceVersion = 0 entityTypeEnum objectType = 0 entityReference objectSource = 0 string objectSourceVersion = 0 entityTypeEnum predicateType = 0 nonRelativeURI mappingProvider = 0 repeated string cardinalityScope = 0 string mappingTool = 0 entityReference mappingToolId = 0 string mappingToolVersion = 0 date mappingDate = 0 date publicationDate = 0 repeated entityReference subjectMatchField = 0 repeated entityReference objectMatchField = 0 repeated entityReference subjectPreprocessing = 0 repeated entityReference objectPreprocessing = 0 string similarityMeasure = 0 repeated entityReference curationRule = 0 repeated string curationRuleText = 0 repeated nonRelativeURI seeAlso = 0 nonRelativeURI issueTracker = 0 string other = 0 string comment = 0 repeated extensionDefinition extensionDefinitions = 0 } // A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping. message MappingSetReference { nonRelativeURI mappingSetId = 0 nonRelativeURI mirrorFrom = 0 double registryConfidence = 0 string mappingSetGroup = 0 date lastUpdated = 0 string localName = 0 } message Prefix { ncname prefixName = 0 uri prefixUrl = 0 } // Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class. message Propagatable { boolean propagated = 0 } // Metamodel extension class to manage slots that may not exist in all versions of the model. message Versionable { sssomVersionEnum addedIn = 0 } ================================================ FILE: project/shacl/sssom_schema.shacl.ttl ================================================ @prefix dcterms: . @prefix owl: . @prefix pav: . @prefix prov: . @prefix rdf: . @prefix rdfs: . @prefix sh: . @prefix skos: . @prefix sssom: . @prefix xsd: . sssom:MappingRegistry a sh:NodeShape ; rdfs:comment "A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:class sssom:MappingSetReference ; sh:description "A list of mapping set references." ; sh:nodeKind sh:BlankNodeOrIRI ; sh:order 4 ; sh:path sssom:mapping_set_references ], [ sh:datatype rdfs:Resource ; sh:description "The unique identifier of a mapping registry." ; sh:maxCount 1 ; sh:minCount 1 ; sh:nodeKind sh:Literal ; sh:order 0 ; sh:path sssom:mapping_registry_id ], [ sh:datatype xsd:string ; sh:description "The title of a mapping registry." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 1 ; sh:path sssom:mapping_registry_title ], [ sh:datatype xsd:string ; sh:description "The description of a mapping registry." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 2 ; sh:path sssom:mapping_registry_description ], [ sh:description "A URL location of the issue tracker for this entity." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 7 ; sh:path sssom:issue_tracker ], [ sh:description "A URL to the documentation of this mapping commons." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 5 ; sh:path sssom:documentation ], [ sh:description "A URL to a homepage of this mapping commons." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 6 ; sh:path sssom:homepage ], [ sh:description "A list of registries that should be imported into this one." ; sh:nodeKind sh:Literal ; sh:order 3 ; sh:path sssom:imports ] ; sh:targetClass sssom:MappingRegistry . sssom:MappingSet a sh:NodeShape ; rdfs:comment "Represents a set of mappings." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:datatype rdfs:Resource ; sh:description "Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows." ; sh:nodeKind sh:Literal ; sh:order 28 ; sh:path sssom:subject_preprocessing ], [ sh:datatype xsd:string ; sh:description "The display name of a mapping set." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 6 ; sh:path dcterms:title ], [ sh:datatype rdfs:Resource ; sh:description "A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section." ; sh:nodeKind sh:Literal ; sh:order 27 ; sh:path sssom:object_match_field ], [ sh:description "The type of the predicate used to map the subject and object entities." ; sh:in ( owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property sssom:ComposedEntityExpression ) ; sh:maxCount 1 ; sh:order 18 ; sh:path sssom:predicate_type ], [ sh:datatype xsd:string ; sh:description "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 35 ; sh:path sssom:other ], [ sh:description "The type of entity that is being mapped." ; sh:in ( owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property sssom:ComposedEntityExpression ) ; sh:maxCount 1 ; sh:order 15 ; sh:path sssom:object_type ], [ sh:datatype xsd:string ; sh:description "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 30 ; sh:path sssom:similarity_measure ], [ sh:class sssom:Prefix ; sh:description "A dictionary that contains prefixes as keys and their URI expansions as values." ; sh:nodeKind sh:BlankNodeOrIRI ; sh:order 1 ; sh:path sssom:curie_map ], [ sh:datatype xsd:string ; sh:description "Version string that denotes the version of the mapping tool used." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 23 ; sh:path sssom:mapping_tool_version ], [ sh:class sssom:ExtensionDefinition ; sh:description "A list that defines the extension slots used in the mapping set." ; sh:nodeKind sh:BlankNodeOrIRI ; sh:order 37 ; sh:path sssom:extension_definitions ], [ sh:description "The type of entity that is being mapped." ; sh:in ( owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property sssom:ComposedEntityExpression ) ; sh:maxCount 1 ; sh:order 12 ; sh:path sssom:subject_type ], [ sh:datatype rdfs:Resource ; sh:description "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule." ; sh:nodeKind sh:Literal ; sh:order 31 ; sh:path sssom:curation_rule ], [ sh:datatype xsd:string ; sh:description "A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 21 ; sh:path sssom:mapping_tool ], [ sh:datatype rdfs:Resource ; sh:description "A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section." ; sh:nodeKind sh:Literal ; sh:order 26 ; sh:path sssom:subject_match_field ], [ sh:datatype rdfs:Resource ; sh:description "URI of vocabulary or identifier source for the subject." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 13 ; sh:path sssom:subject_source ], [ sh:datatype rdfs:Resource ; sh:description "Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows." ; sh:nodeKind sh:Literal ; sh:order 29 ; sh:path sssom:object_preprocessing ], [ sh:class owl:Axiom ; sh:description "Contains a list of mapping objects." ; sh:nodeKind sh:BlankNodeOrIRI ; sh:order 2 ; sh:path sssom:mappings ], [ sh:datatype xsd:string ; sh:description "Version IRI or version string of the source of the object term." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 17 ; sh:path sssom:object_source_version ], [ sh:datatype xsd:string ; sh:description "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider." ; sh:nodeKind sh:Literal ; sh:order 32 ; sh:path sssom:curation_rule_text ], [ sh:description "The version of the SSSOM specification a mapping set is compliant with." ; sh:in ( sssom:version1.0 sssom:version1.1 ) ; sh:maxCount 1 ; sh:order 0 ; sh:path sssom:sssom_version ], [ sh:datatype xsd:string ; sh:description "Version IRI or version string of the source of the subject term." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 14 ; sh:path sssom:subject_source_version ], [ sh:datatype xsd:string ; sh:description "A description of the mapping set." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 7 ; sh:path dcterms:description ], [ sh:datatype xsd:date ; sh:description "The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 24 ; sh:path dcterms:created ], [ sh:datatype rdfs:Resource ; sh:description "URI of vocabulary or identifier source for the object." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 16 ; sh:path sssom:object_source ], [ sh:datatype xsd:string ; sh:description "Free text field containing either curator notes or text generated by tool providing additional informative information." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 36 ; sh:path rdfs:comment ], [ sh:description "A mapping set or set of mapping set that was used to derive the mapping set." ; sh:nodeKind sh:Literal ; sh:order 5 ; sh:path prov:wasDerivedFrom ], [ sh:description "A URL location of the issue tracker for this entity." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 34 ; sh:path sssom:issue_tracker ], [ sh:datatype xsd:string ; sh:description "A version string for the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 4 ; sh:path owl:versionInfo ], [ sh:description "A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable." ; sh:maxCount 1 ; sh:minCount 1 ; sh:nodeKind sh:Literal ; sh:order 3 ; sh:path sssom:mapping_set_id ], [ sh:datatype rdfs:Resource ; sh:description "The ID (entity reference) of the tool or algorithm that was used to generate the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 22 ; sh:path sssom:mapping_tool_id ], [ sh:datatype xsd:string ; sh:description "A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id." ; sh:nodeKind sh:Literal ; sh:order 10 ; sh:path sssom:creator_label ], [ sh:datatype xsd:double ; sh:description """Mapping-set level confidence is assigned by the creator of the mapping set to indicate their overall confidence in the correctness (i.e., precision) of mappings in the mapping set. Mapping set confidence is intended to be used in cases were the creator wants to express an overall confidence into the agent that curated the individual mappings, for example a lexical matching tool, or a group of students. When not explicitly specified, confidence estimation algorithms should consider the mapping set confidence to be 1.0 by default.""" ; sh:maxCount 1 ; sh:maxInclusive 1e+00 ; sh:minInclusive 0e+00 ; sh:nodeKind sh:Literal ; sh:order 8 ; sh:path sssom:mapping_set_confidence ], [ sh:datatype xsd:date ; sh:description "The date the mapping was published. This is different from the date the mapping was asserted." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 25 ; sh:path dcterms:issued ], [ sh:description "A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment" ; sh:nodeKind sh:Literal ; sh:order 33 ; sh:path rdfs:seeAlso ], [ sh:datatype rdfs:Resource ; sh:description "Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs." ; sh:nodeKind sh:Literal ; sh:order 9 ; sh:path dcterms:creator ], [ sh:datatype xsd:string ; sh:description "A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined." ; sh:nodeKind sh:Literal ; sh:order 20 ; sh:path sssom:cardinality_scope ], [ sh:description "A url to the license of the mapping. In absence of a license we assume no license." ; sh:maxCount 1 ; sh:minCount 1 ; sh:nodeKind sh:Literal ; sh:order 11 ; sh:path dcterms:license ], [ sh:description "URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 19 ; sh:path sssom:mapping_provider ] ; sh:targetClass sssom:MappingSet . sssom:NoTermFound a sh:NodeShape ; rdfs:comment "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:targetClass sssom:NoTermFound . sssom:Propagatable a sh:NodeShape ; rdfs:comment "Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:datatype xsd:boolean ; sh:description "Indicates whether a slot can be propagated from a mapping down to individual mappings." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 0 ; sh:path sssom:propagated ] ; sh:targetClass sssom:Propagatable . sssom:Versionable a sh:NodeShape ; rdfs:comment "Metamodel extension class to manage slots that may not exist in all versions of the model." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:description "The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0." ; sh:in ( sssom:version1.0 sssom:version1.1 ) ; sh:maxCount 1 ; sh:order 0 ; sh:path sssom:added_in ] ; sh:targetClass sssom:Versionable . owl:Axiom a sh:NodeShape ; rdfs:comment "Represents an individual mapping between a pair of entities." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:datatype rdfs:Resource ; sh:description "Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs." ; sh:nodeKind sh:Literal ; sh:order 13 ; sh:path sssom:reviewer_id ], [ sh:datatype rdfs:Resource ; sh:description "Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs." ; sh:nodeKind sh:Literal ; sh:order 15 ; sh:path dcterms:creator ], [ sh:datatype xsd:string ; sh:description "The label of object of the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 8 ; sh:path sssom:object_label ], [ sh:datatype xsd:string ; sh:description "Version string that denotes the version of the mapping tool used." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 31 ; sh:path sssom:mapping_tool_version ], [ sh:datatype rdfs:Resource ; sh:description "The ID (entity reference) of the tool or algorithm that was used to generate the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 30 ; sh:path sssom:mapping_tool_id ], [ sh:description "A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment" ; sh:nodeKind sh:Literal ; sh:order 46 ; sh:path rdfs:seeAlso ], [ sh:datatype xsd:string ; sh:description "Version IRI or version string of the source of the object term." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 23 ; sh:path sssom:object_source_version ], [ sh:datatype rdfs:Resource ; sh:description "A unique identifier for a mapping record, that is for an instance of the Mapping class (in the SSSOM/TSV serialisation, this corresponds to an individual row after propagation is applied). This slot is intended to uniquely identify one such record within a mapping set and may for example act as the resource identifier for the record when it is serialised into RDF. This slot MUST NOT be used to “group” several records together to indicate that they pertain to a single mapping (for example, that they represent different versions of the same mapping), by assigning the same ID to several records. When it is used, every record within a set MUST have a unique, non-empty value. The identifier MUST be a URI; beyond that, its format is unconstrained and the identifier MUST be treated as an opaque string." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 0 ; sh:path sssom:record_id ], [ sh:datatype rdfs:Resource ; sh:description "Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise identifying URIs." ; sh:nodeKind sh:Literal ; sh:order 11 ; sh:path pav:authoredBy ], [ sh:description "URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 25 ; sh:path sssom:mapping_provider ], [ sh:datatype rdfs:Resource ; sh:description "The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple mapping sets or deriving one mapping set from another." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 26 ; sh:path sssom:mapping_source ], [ sh:datatype xsd:date ; sh:description "The date the mapping was reviewed. This is different from the date the mapping was asserted and published. If this field is used in a mapping, reviewer_id and/or reviewer_label MUST also be be set." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 34 ; sh:path sssom:review_date ], [ sh:description "A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable." ; sh:maxCount 1 ; sh:minCount 1 ; sh:or ( [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] [ ] ) ; sh:order 10 ; sh:path sssom:mapping_justification ], [ sh:datatype rdfs:Resource ; sh:description "The issue tracker item discussing this mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 47 ; sh:path sssom:issue_tracker_item ], [ sh:datatype xsd:string ; sh:description "Free text field containing either curator notes or text generated by tool providing additional informative information." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 49 ; sh:path rdfs:comment ], [ sh:description "The type of the predicate used to map the subject and object entities." ; sh:in ( owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property sssom:ComposedEntityExpression ) ; sh:maxCount 1 ; sh:order 24 ; sh:path sssom:predicate_type ], [ sh:description "A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion" ; sh:in ( sssom:NegatedPredicate ) ; sh:maxCount 1 ; sh:order 6 ; sh:path sssom:predicate_modifier ], [ sh:datatype rdfs:Resource ; sh:description "The ID of the subject of the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 1 ; sh:path owl:annotatedSource ], [ sh:datatype rdfs:Resource ; sh:description "The ID of the object of the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 7 ; sh:path owl:annotatedTarget ], [ sh:datatype rdfs:Resource ; sh:description "A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section." ; sh:nodeKind sh:Literal ; sh:order 39 ; sh:path sssom:subject_match_field ], [ sh:datatype rdfs:Resource ; sh:description "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule." ; sh:nodeKind sh:Literal ; sh:order 37 ; sh:path sssom:curation_rule ], [ sh:datatype rdfs:Resource ; sh:description "URI of vocabulary or identifier source for the subject." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 19 ; sh:path sssom:subject_source ], [ sh:datatype xsd:string ; sh:description "A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined." ; sh:nodeKind sh:Literal ; sh:order 28 ; sh:path sssom:cardinality_scope ], [ sh:description "A url to the license of the mapping. In absence of a license we assume no license." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 17 ; sh:path dcterms:license ], [ sh:datatype xsd:string ; sh:description "A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 29 ; sh:path sssom:mapping_tool ], [ sh:datatype xsd:string ; sh:description "A string representing the reviewer of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in reviewer_id) for that reviewer. It is not expected that there should be any link between reviewer_id and reviewer_label; in particular, reviewer_label is not intended to provide a human-friendly version of an identifier in reviewer_id." ; sh:nodeKind sh:Literal ; sh:order 14 ; sh:path sssom:reviewer_label ], [ sh:datatype rdfs:Resource ; sh:description "URI of vocabulary or identifier source for the object." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 22 ; sh:path sssom:object_source ], [ sh:description "The type of entity that is being mapped." ; sh:in ( owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property sssom:ComposedEntityExpression ) ; sh:maxCount 1 ; sh:order 21 ; sh:path sssom:object_type ], [ sh:datatype xsd:string ; sh:description "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 3 ; sh:path sssom:subject_category ], [ sh:datatype xsd:date ; sh:description "The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 32 ; sh:path dcterms:created ], [ sh:datatype rdfs:Resource ; sh:description "A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section." ; sh:nodeKind sh:Literal ; sh:order 40 ; sh:path sssom:object_match_field ], [ sh:datatype xsd:string ; sh:description "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 9 ; sh:path sssom:object_category ], [ sh:datatype xsd:date ; sh:description "The date the mapping was published. This is different from the date the mapping was asserted." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 33 ; sh:path dcterms:issued ], [ sh:datatype xsd:double ; sh:description "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm." ; sh:maxCount 1 ; sh:maxInclusive 1e+00 ; sh:minInclusive 0e+00 ; sh:nodeKind sh:Literal ; sh:order 44 ; sh:path sssom:similarity_score ], [ sh:datatype xsd:string ; sh:description "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 48 ; sh:path sssom:other ], [ sh:datatype rdfs:Resource ; sh:description "The ID of the predicate or relation that relates the subject and object of this match." ; sh:maxCount 1 ; sh:minCount 1 ; sh:nodeKind sh:Literal ; sh:order 4 ; sh:path owl:annotatedProperty ], [ sh:datatype xsd:string ; sh:description "A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id." ; sh:nodeKind sh:Literal ; sh:order 16 ; sh:path sssom:creator_label ], [ sh:description "The type of entity that is being mapped." ; sh:in ( owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property sssom:ComposedEntityExpression ) ; sh:maxCount 1 ; sh:order 18 ; sh:path sssom:subject_type ], [ sh:datatype xsd:string ; sh:description "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 45 ; sh:path sssom:similarity_measure ], [ sh:datatype xsd:double ; sh:description """A value assigned by the creator of the mapping to denote the creator's confidence or estimated probability that the mapping record is correct. A value of 1.0 means the creator has full confidence in the correctness of the mapping record, while a value of 0.0 means the creator is fully unsure whether the mapping record is correct or not. When not explicitly specified, confidence estimation algorithms should consider the mapping confidence to be 1.0 by default.""" ; sh:maxCount 1 ; sh:maxInclusive 1e+00 ; sh:minInclusive 0e+00 ; sh:nodeKind sh:Literal ; sh:order 35 ; sh:path sssom:confidence ], [ sh:datatype rdfs:Resource ; sh:description "Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows." ; sh:nodeKind sh:Literal ; sh:order 43 ; sh:path sssom:object_preprocessing ], [ sh:datatype xsd:double ; sh:description """A value assigned by the reviewer of the mapping to denote their confidence that the mapping record is correct. A value of 1.0 means the reviewer fully agrees with the mapping record. A value of -1.0 means the reviewer fully disagrees with the mapping record. A value of 0.0 means the reviewer is not sure whether the mapping record is correct or not. When not explicitly specified, confidence estimation algorithms should consider the reviewer agreement to be 1.0 by default.""" ; sh:maxCount 1 ; sh:maxInclusive 1e+00 ; sh:minInclusive -1e+00 ; sh:nodeKind sh:Literal ; sh:order 36 ; sh:path sssom:reviewer_agreement ], [ sh:description "A value indicating whether the subject (respectively object) of this mapping record is present in other records involving a different object (respectively subject), within the subset of records defined by the cardinality_scope slot (or within the entire mapping set if cardinality_scope is undefined). Note that this is a convenience field, whose values can always be derived from the mapping set." ; sh:in ( "1:1" "1:n" "n:1" "n:n" "1:0" "0:1" "0:0" ) ; sh:maxCount 1 ; sh:order 27 ; sh:path sssom:mapping_cardinality ], [ sh:datatype xsd:string ; sh:description "A string representing the author of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in author_id) for that author. It is not expected that there should be any link between author_id and author_label; in particular, author_label is not intended to provide a human-friendly version of an identifier in author_id." ; sh:nodeKind sh:Literal ; sh:order 12 ; sh:path sssom:author_label ], [ sh:datatype rdfs:Resource ; sh:description "Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows." ; sh:nodeKind sh:Literal ; sh:order 42 ; sh:path sssom:subject_preprocessing ], [ sh:datatype xsd:string ; sh:description "The label of the predicate/relation of the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 5 ; sh:path sssom:predicate_label ], [ sh:datatype xsd:string ; sh:description "Version IRI or version string of the source of the subject term." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 20 ; sh:path sssom:subject_source_version ], [ sh:datatype xsd:string ; sh:description "String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots." ; sh:nodeKind sh:Literal ; sh:order 41 ; sh:path sssom:match_string ], [ sh:datatype xsd:string ; sh:description "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider." ; sh:nodeKind sh:Literal ; sh:order 38 ; sh:path sssom:curation_rule_text ], [ sh:datatype xsd:string ; sh:description "The label of subject of the mapping." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 2 ; sh:path sssom:subject_label ] ; sh:targetClass owl:Axiom . sssom:ExtensionDefinition a sh:NodeShape ; rdfs:comment "A definition of an extension (non-standard) slot." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:datatype xsd:anyURI ; sh:description "The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous)." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 1 ; sh:path sssom:property ], [ sh:datatype xsd:string ; sh:description "The name of the extension slot." ; sh:maxCount 1 ; sh:minCount 1 ; sh:nodeKind sh:Literal ; sh:order 0 ; sh:path sssom:slot_name ], [ sh:datatype xsd:anyURI ; sh:description "Expected type of the values of the extension slot." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 2 ; sh:path sssom:type_hint ] ; sh:targetClass sssom:ExtensionDefinition . sssom:MappingSetReference a sh:NodeShape ; rdfs:comment "A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping." ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:datatype xsd:double ; sh:description """This value is set by the creator/maintainer of the mapping registry and reflects the confidence the mapping registry has in the correctness (i.e., precision) of mappings in the mapping set. When not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default.""" ; sh:maxCount 1 ; sh:maxInclusive 1e+00 ; sh:minInclusive 0e+00 ; sh:nodeKind sh:Literal ; sh:order 2 ; sh:path sssom:registry_confidence ], [ sh:description "A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable." ; sh:maxCount 1 ; sh:minCount 1 ; sh:nodeKind sh:Literal ; sh:order 0 ; sh:path sssom:mapping_set_id ], [ sh:datatype xsd:date ; sh:description "The date this reference was last updated." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 4 ; sh:path sssom:last_updated ], [ sh:description "A URL location from which to obtain a resource, such as a mapping set." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 1 ; sh:path sssom:mirror_from ], [ sh:datatype xsd:string ; sh:description "The local name assigned to file that corresponds to the downloaded mapping set." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 5 ; sh:path sssom:local_name ], [ sh:datatype xsd:string ; sh:description "Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes." ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 3 ; sh:path sssom:mapping_set_group ] ; sh:targetClass sssom:MappingSetReference . sssom:Prefix a sh:NodeShape ; sh:closed true ; sh:ignoredProperties ( rdf:type ) ; sh:property [ sh:datatype xsd:anyURI ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 1 ; sh:path sssom:prefix_url ], [ sh:datatype xsd:string ; sh:maxCount 1 ; sh:nodeKind sh:Literal ; sh:order 0 ; sh:path sssom:prefix_name ] ; sh:targetClass sssom:Prefix . ================================================ FILE: project/shex/sssom_schema.shex ================================================ # metamodel_version: 1.7.0 BASE PREFIX prov: PREFIX skos: PREFIX owl: PREFIX rdf: PREFIX rdfs: PREFIX xsd: PREFIX linkml: PREFIX pav: PREFIX dc1: rdfs:Resource IRI linkml:String xsd:string linkml:Integer xsd:integer linkml:Boolean xsd:boolean linkml:Float xsd:float linkml:Double xsd:double linkml:Decimal xsd:decimal linkml:Time xsd:time linkml:Date xsd:date linkml:Datetime xsd:dateTime linkml:DateOrDatetime linkml:DateOrDatetime linkml:Uriorcurie IRI linkml:Curie xsd:string linkml:Uri IRI linkml:Ncname xsd:string linkml:Objectidentifier IRI linkml:Nodeidentifier NONLITERAL linkml:Jsonpointer xsd:string linkml:Jsonpath xsd:string linkml:Sparqlpath xsd:string CLOSED { ( $ ( @linkml:Ncname ; @linkml:Uriorcurie ? ; @linkml:Uriorcurie ? ) ; rdf:type [ ] ? ) } CLOSED { ( $ ( @ ? ; owl:annotatedSource @ ? ; @linkml:String ? ; @linkml:String ? ; owl:annotatedProperty @ ; @linkml:String ? ; [ ] ? ; owl:annotatedTarget @ ? ; @linkml:String ? ; @linkml:String ? ; @ ; pav:authoredBy @ * ; @linkml:String * ; @ * ; @linkml:String * ; dc1:creator @ * ; @linkml:String * ; dc1:license @ ? ; [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; @ ? ; @linkml:String ? ; [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; @ ? ; @linkml:String ? ; [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; @ ? ; @ ? ; [ ] ? ; @linkml:String * ; @linkml:String ? ; @ ? ; @linkml:String ? ; dc1:created @linkml:Date ? ; dc1:issued @linkml:Date ? ; @linkml:Date ? ; @linkml:Double ? ; @linkml:Double ? ; @ * ; @linkml:String * ; @ * ; @ * ; @linkml:String * ; @ * ; @ * ; @linkml:Double ? ; @linkml:String ? ; rdfs:seeAlso @ * ; @ ? ; @linkml:String ? ; rdfs:comment @linkml:String ? ) ; rdf:type [ owl:Axiom ] ? ) } CLOSED { ( $ ( @ ; @linkml:String ? ; @linkml:String ? ; @ * ; @ * ; @ ? ; @ ? ; @ ? ) ; rdf:type [ ] ? ) } CLOSED { ( $ ( [ ] ? ; @ * ; @ * ; @ ; owl:versionInfo @linkml:String ? ; prov:wasDerivedFrom @ * ; dc1:title @linkml:String ? ; dc1:description @linkml:String ? ; @linkml:Double ? ; dc1:creator @ * ; @linkml:String * ; dc1:license @ ; [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; @ ? ; @linkml:String ? ; [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; @ ? ; @linkml:String ? ; [ owl:Class owl:ObjectProperty owl:DataProperty owl:AnnotationProperty owl:NamedIndividual skos:Concept rdfs:Resource rdfs:Class rdfs:Literal rdfs:Datatype rdf:Property ] ? ; @ ? ; @linkml:String * ; @linkml:String ? ; @ ? ; @linkml:String ? ; dc1:created @linkml:Date ? ; dc1:issued @linkml:Date ? ; @ * ; @ * ; @ * ; @ * ; @linkml:String ? ; @ * ; @linkml:String * ; rdfs:seeAlso @ * ; @ ? ; @linkml:String ? ; rdfs:comment @linkml:String ? ; @ * ) ; rdf:type [ ] ? ) } CLOSED { ( $ ( @ ; @ ? ; @linkml:Double ? ; @linkml:String ? ; @linkml:Date ? ; @linkml:String ? ) ; rdf:type [ ] ? ) } CLOSED { ( $ rdf:type . * ; rdf:type [ ] ? ) } CLOSED { ( $ ( @linkml:Ncname ; @linkml:Uri ? ) ; rdf:type [ ] ) } CLOSED { ( $ @linkml:Boolean ? ; rdf:type [ ] ? ) } CLOSED { ( $ [ ] ? ; rdf:type [ ] ? ) } ================================================ FILE: project/sqlschema/sssom_schema.sql ================================================ -- # Class: mapping set Description: Represents a set of mappings. -- * Slot: id -- * Slot: sssom_version Description: The version of the SSSOM specification a mapping set is compliant with. -- * Slot: mapping_set_id Description: A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable. -- * Slot: mapping_set_version Description: A version string for the mapping. -- * Slot: mapping_set_title Description: The display name of a mapping set. -- * Slot: mapping_set_description Description: A description of the mapping set. -- * Slot: mapping_set_confidence Description: Mapping-set level confidence is assigned by the creator of the mapping set to indicate their overall confidence in the correctness (i.e., precision) of mappings in the mapping set. Mapping set confidence is intended to be used in cases were the creator wants to express an overall confidence into the agent that curated the individual mappings, for example a lexical matching tool, or a group of students.When not explicitly specified, confidence estimation algorithms should consider the mapping set confidence to be 1.0 by default. -- * Slot: license Description: A url to the license of the mapping. In absence of a license we assume no license. -- * Slot: subject_type Description: The type of entity that is being mapped. -- * Slot: subject_source Description: URI of vocabulary or identifier source for the subject. -- * Slot: subject_source_version Description: Version IRI or version string of the source of the subject term. -- * Slot: object_type Description: The type of entity that is being mapped. -- * Slot: object_source Description: URI of vocabulary or identifier source for the object. -- * Slot: object_source_version Description: Version IRI or version string of the source of the object term. -- * Slot: predicate_type Description: The type of the predicate used to map the subject and object entities. -- * Slot: mapping_provider Description: URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived. -- * Slot: mapping_tool Description: A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference. -- * Slot: mapping_tool_id Description: The ID (entity reference) of the tool or algorithm that was used to generate the mapping. -- * Slot: mapping_tool_version Description: Version string that denotes the version of the mapping tool used. -- * Slot: mapping_date Description: The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file. -- * Slot: publication_date Description: The date the mapping was published. This is different from the date the mapping was asserted. -- * Slot: similarity_measure Description: The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified. -- * Slot: issue_tracker Description: A URL location of the issue tracker for this entity. -- * Slot: other Description: Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots. -- * Slot: comment Description: Free text field containing either curator notes or text generated by tool providing additional informative information. -- # Class: mapping Description: Represents an individual mapping between a pair of entities. -- * Slot: id -- * Slot: record_id Description: A unique identifier for a mapping record, that is for an instance of the Mapping class (in the SSSOM/TSV serialisation, this corresponds to an individual row after propagation is applied). This slot is intended to uniquely identify one such record within a mapping set and may for example act as the resource identifier for the record when it is serialised into RDF. This slot MUST NOT be used to “group” several records together to indicate that they pertain to a single mapping (for example, that they represent different versions of the same mapping), by assigning the same ID to several records. When it is used, every record within a set MUST have a unique, non-empty value. The identifier MUST be a URI; beyond that, its format is unconstrained and the identifier MUST be treated as an opaque string. -- * Slot: subject_id Description: The ID of the subject of the mapping. -- * Slot: subject_label Description: The label of subject of the mapping. -- * Slot: subject_category Description: The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases. -- * Slot: predicate_id Description: The ID of the predicate or relation that relates the subject and object of this match. -- * Slot: predicate_label Description: The label of the predicate/relation of the mapping. -- * Slot: predicate_modifier Description: A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion -- * Slot: object_id Description: The ID of the object of the mapping. -- * Slot: object_label Description: The label of object of the mapping. -- * Slot: object_category Description: The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases. -- * Slot: mapping_justification Description: A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable. -- * Slot: license Description: A url to the license of the mapping. In absence of a license we assume no license. -- * Slot: subject_type Description: The type of entity that is being mapped. -- * Slot: subject_source Description: URI of vocabulary or identifier source for the subject. -- * Slot: subject_source_version Description: Version IRI or version string of the source of the subject term. -- * Slot: object_type Description: The type of entity that is being mapped. -- * Slot: object_source Description: URI of vocabulary or identifier source for the object. -- * Slot: object_source_version Description: Version IRI or version string of the source of the object term. -- * Slot: predicate_type Description: The type of the predicate used to map the subject and object entities. -- * Slot: mapping_provider Description: URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived. -- * Slot: mapping_source Description: The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple mapping sets or deriving one mapping set from another. -- * Slot: mapping_cardinality Description: A value indicating whether the subject (respectively object) of this mapping record is present in other records involving a different object (respectively subject), within the subset of records defined by the cardinality_scope slot (or within the entire mapping set if cardinality_scope is undefined). Note that this is a convenience field, whose values can always be derived from the mapping set. -- * Slot: mapping_tool Description: A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference. -- * Slot: mapping_tool_id Description: The ID (entity reference) of the tool or algorithm that was used to generate the mapping. -- * Slot: mapping_tool_version Description: Version string that denotes the version of the mapping tool used. -- * Slot: mapping_date Description: The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file. -- * Slot: publication_date Description: The date the mapping was published. This is different from the date the mapping was asserted. -- * Slot: review_date Description: The date the mapping was reviewed. This is different from the date the mapping was asserted and published. If this field is used in a mapping, reviewer_id and/or reviewer_label MUST also be be set. -- * Slot: confidence Description: A value assigned by the creator of the mapping to denote the creator's confidence or estimated probability that the mapping record is correct. A value of 1.0 means the creator has full confidence in the correctness of the mapping record, while a value of 0.0 means the creator is fully unsure whether the mapping record is correct or not.When not explicitly specified, confidence estimation algorithms should consider the mapping confidence to be 1.0 by default. -- * Slot: reviewer_agreement Description: A value assigned by the reviewer of the mapping to denote their confidence that the mapping record is correct. A value of 1.0 means the reviewer fully agrees with the mapping record. A value of -1.0 means the reviewer fully disagrees with the mapping record. A value of 0.0 means the reviewer is not sure whether the mapping record is correct or not.When not explicitly specified, confidence estimation algorithms should consider the reviewer agreement to be 1.0 by default. -- * Slot: similarity_score Description: A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm. -- * Slot: similarity_measure Description: The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified. -- * Slot: issue_tracker_item Description: The issue tracker item discussing this mapping. -- * Slot: other Description: Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots. -- * Slot: comment Description: Free text field containing either curator notes or text generated by tool providing additional informative information. -- * Slot: mapping set_id Description: Autocreated FK slot -- # Class: mapping registry Description: A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries. -- * Slot: id -- * Slot: mapping_registry_id Description: The unique identifier of a mapping registry. -- * Slot: mapping_registry_title Description: The title of a mapping registry. -- * Slot: mapping_registry_description Description: The description of a mapping registry. -- * Slot: documentation Description: A URL to the documentation of this mapping commons. -- * Slot: homepage Description: A URL to a homepage of this mapping commons. -- * Slot: issue_tracker Description: A URL location of the issue tracker for this entity. -- # Class: mapping set reference Description: A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping. -- * Slot: id -- * Slot: mapping_set_id Description: A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable. -- * Slot: mirror_from Description: A URL location from which to obtain a resource, such as a mapping set. -- * Slot: registry_confidence Description: This value is set by the creator/maintainer of the mapping registry and reflects the confidence the mapping registry has in the correctness (i.e., precision) of mappings in the mapping set.When not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default. -- * Slot: mapping_set_group Description: Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes. -- * Slot: last_updated Description: The date this reference was last updated. -- * Slot: local_name Description: The local name assigned to file that corresponds to the downloaded mapping set. -- # Class: prefix -- * Slot: prefix_name -- * Slot: prefix_url -- * Slot: mapping set_id Description: Autocreated FK slot -- # Class: extension definition Description: A definition of an extension (non-standard) slot. -- * Slot: id -- * Slot: slot_name Description: The name of the extension slot. -- * Slot: property Description: The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous). -- * Slot: type_hint Description: Expected type of the values of the extension slot. -- # Class: Propagatable Description: Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class. -- * Slot: id -- * Slot: propagated Description: Indicates whether a slot can be propagated from a mapping down to individual mappings. -- # Class: Versionable Description: Metamodel extension class to manage slots that may not exist in all versions of the model. -- * Slot: id -- * Slot: added_in Description: The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0. -- # Class: NoTermFound Description: sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found. -- * Slot: id -- # Class: mapping set_mapping_set_source -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: mapping_set_source Description: A mapping set or set of mapping set that was used to derive the mapping set. -- # Class: mapping set_creator_id -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: creator_id Description: Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs. -- # Class: mapping set_creator_label -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: creator_label Description: A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id. -- # Class: mapping set_cardinality_scope -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: cardinality_scope Description: A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined. -- # Class: mapping set_subject_match_field -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: subject_match_field Description: A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section. -- # Class: mapping set_object_match_field -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: object_match_field Description: A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section. -- # Class: mapping set_subject_preprocessing -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: subject_preprocessing Description: Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows. -- # Class: mapping set_object_preprocessing -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: object_preprocessing Description: Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows. -- # Class: mapping set_curation_rule -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: curation_rule Description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule. -- # Class: mapping set_curation_rule_text -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: curation_rule_text Description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider. -- # Class: mapping set_see_also -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: see_also Description: A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment -- # Class: mapping set_extension_definitions -- * Slot: mapping set_id Description: Autocreated FK slot -- * Slot: extension_definitions_id Description: A list that defines the extension slots used in the mapping set. -- # Class: mapping_author_id -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: author_id Description: Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise identifying URIs. -- # Class: mapping_author_label -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: author_label Description: A string representing the author of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in author_id) for that author. It is not expected that there should be any link between author_id and author_label; in particular, author_label is not intended to provide a human-friendly version of an identifier in author_id. -- # Class: mapping_reviewer_id -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: reviewer_id Description: Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs. -- # Class: mapping_reviewer_label -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: reviewer_label Description: A string representing the reviewer of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in reviewer_id) for that reviewer. It is not expected that there should be any link between reviewer_id and reviewer_label; in particular, reviewer_label is not intended to provide a human-friendly version of an identifier in reviewer_id. -- # Class: mapping_creator_id -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: creator_id Description: Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs. -- # Class: mapping_creator_label -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: creator_label Description: A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id. -- # Class: mapping_cardinality_scope -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: cardinality_scope Description: A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined. -- # Class: mapping_curation_rule -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: curation_rule Description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule. -- # Class: mapping_curation_rule_text -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: curation_rule_text Description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider. -- # Class: mapping_subject_match_field -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: subject_match_field Description: A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section. -- # Class: mapping_object_match_field -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: object_match_field Description: A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section. -- # Class: mapping_match_string -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: match_string Description: String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots. -- # Class: mapping_subject_preprocessing -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: subject_preprocessing Description: Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows. -- # Class: mapping_object_preprocessing -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: object_preprocessing Description: Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows. -- # Class: mapping_see_also -- * Slot: mapping_id Description: Autocreated FK slot -- * Slot: see_also Description: A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment -- # Class: mapping registry_imports -- * Slot: mapping registry_id Description: Autocreated FK slot -- * Slot: imports Description: A list of registries that should be imported into this one. -- # Class: mapping registry_mapping_set_references -- * Slot: mapping registry_id Description: Autocreated FK slot -- * Slot: mapping_set_references_id Description: A list of mapping set references. CREATE TABLE "mapping set" ( id INTEGER NOT NULL, sssom_version VARCHAR(3), mapping_set_id TEXT NOT NULL, mapping_set_version TEXT, mapping_set_title TEXT, mapping_set_description TEXT, mapping_set_confidence FLOAT, license TEXT NOT NULL, subject_type VARCHAR(26), subject_source TEXT, subject_source_version TEXT, object_type VARCHAR(26), object_source TEXT, object_source_version TEXT, predicate_type VARCHAR(26), mapping_provider TEXT, mapping_tool TEXT, mapping_tool_id TEXT, mapping_tool_version TEXT, mapping_date DATE, publication_date DATE, similarity_measure TEXT, issue_tracker TEXT, other TEXT, comment TEXT, PRIMARY KEY (id) ); CREATE INDEX "ix_mapping set_id" ON "mapping set" (id); CREATE TABLE "mapping registry" ( id INTEGER NOT NULL, mapping_registry_id TEXT NOT NULL, mapping_registry_title TEXT, mapping_registry_description TEXT, documentation TEXT, homepage TEXT, issue_tracker TEXT, PRIMARY KEY (id) ); CREATE INDEX "ix_mapping registry_id" ON "mapping registry" (id); CREATE TABLE "mapping set reference" ( id INTEGER NOT NULL, mapping_set_id TEXT NOT NULL, mirror_from TEXT, registry_confidence FLOAT, mapping_set_group TEXT, last_updated DATE, local_name TEXT, PRIMARY KEY (id) ); CREATE INDEX "ix_mapping set reference_id" ON "mapping set reference" (id); CREATE TABLE "extension definition" ( id INTEGER NOT NULL, slot_name TEXT NOT NULL, property TEXT, type_hint TEXT, PRIMARY KEY (id) ); CREATE INDEX "ix_extension definition_id" ON "extension definition" (id); CREATE TABLE "Propagatable" ( id INTEGER NOT NULL, propagated BOOLEAN, PRIMARY KEY (id) ); CREATE INDEX "ix_Propagatable_id" ON "Propagatable" (id); CREATE TABLE "Versionable" ( id INTEGER NOT NULL, added_in VARCHAR(3), PRIMARY KEY (id) ); CREATE INDEX "ix_Versionable_id" ON "Versionable" (id); CREATE TABLE "NoTermFound" ( id INTEGER NOT NULL, PRIMARY KEY (id) ); CREATE INDEX "ix_NoTermFound_id" ON "NoTermFound" (id); CREATE TABLE mapping ( id INTEGER NOT NULL, record_id TEXT, subject_id TEXT, subject_label TEXT, subject_category TEXT, predicate_id TEXT NOT NULL, predicate_label TEXT, predicate_modifier VARCHAR(3), object_id TEXT, object_label TEXT, object_category TEXT, mapping_justification TEXT NOT NULL, license TEXT, subject_type VARCHAR(26), subject_source TEXT, subject_source_version TEXT, object_type VARCHAR(26), object_source TEXT, object_source_version TEXT, predicate_type VARCHAR(26), mapping_provider TEXT, mapping_source TEXT, mapping_cardinality VARCHAR(3), mapping_tool TEXT, mapping_tool_id TEXT, mapping_tool_version TEXT, mapping_date DATE, publication_date DATE, review_date DATE, confidence FLOAT, reviewer_agreement FLOAT, similarity_score FLOAT, similarity_measure TEXT, issue_tracker_item TEXT, other TEXT, comment TEXT, "mapping set_id" INTEGER, PRIMARY KEY (id), UNIQUE (record_id), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX mapping_record_id_idx ON mapping (record_id); CREATE INDEX ix_mapping_id ON mapping (id); CREATE TABLE prefix ( prefix_name TEXT NOT NULL, prefix_url TEXT, "mapping set_id" INTEGER, PRIMARY KEY (prefix_name, prefix_url, "mapping set_id"), UNIQUE ("mapping set_id", prefix_name), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "prefix_mapping set_id_prefix_name_idx" ON prefix ("mapping set_id", prefix_name); CREATE INDEX "ix_prefix_mapping set_id" ON prefix ("mapping set_id"); CREATE INDEX ix_prefix_prefix_url ON prefix (prefix_url); CREATE INDEX ix_prefix_prefix_name ON prefix (prefix_name); CREATE TABLE "mapping set_mapping_set_source" ( "mapping set_id" INTEGER, mapping_set_source TEXT, PRIMARY KEY ("mapping set_id", mapping_set_source), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_mapping_set_source_mapping_set_source" ON "mapping set_mapping_set_source" (mapping_set_source); CREATE INDEX "ix_mapping set_mapping_set_source_mapping set_id" ON "mapping set_mapping_set_source" ("mapping set_id"); CREATE TABLE "mapping set_creator_id" ( "mapping set_id" INTEGER, creator_id TEXT, PRIMARY KEY ("mapping set_id", creator_id), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_creator_id_creator_id" ON "mapping set_creator_id" (creator_id); CREATE INDEX "ix_mapping set_creator_id_mapping set_id" ON "mapping set_creator_id" ("mapping set_id"); CREATE TABLE "mapping set_creator_label" ( "mapping set_id" INTEGER, creator_label TEXT, PRIMARY KEY ("mapping set_id", creator_label), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_creator_label_mapping set_id" ON "mapping set_creator_label" ("mapping set_id"); CREATE INDEX "ix_mapping set_creator_label_creator_label" ON "mapping set_creator_label" (creator_label); CREATE TABLE "mapping set_cardinality_scope" ( "mapping set_id" INTEGER, cardinality_scope TEXT, PRIMARY KEY ("mapping set_id", cardinality_scope), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_cardinality_scope_mapping set_id" ON "mapping set_cardinality_scope" ("mapping set_id"); CREATE INDEX "ix_mapping set_cardinality_scope_cardinality_scope" ON "mapping set_cardinality_scope" (cardinality_scope); CREATE TABLE "mapping set_subject_match_field" ( "mapping set_id" INTEGER, subject_match_field TEXT, PRIMARY KEY ("mapping set_id", subject_match_field), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_subject_match_field_subject_match_field" ON "mapping set_subject_match_field" (subject_match_field); CREATE INDEX "ix_mapping set_subject_match_field_mapping set_id" ON "mapping set_subject_match_field" ("mapping set_id"); CREATE TABLE "mapping set_object_match_field" ( "mapping set_id" INTEGER, object_match_field TEXT, PRIMARY KEY ("mapping set_id", object_match_field), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_object_match_field_object_match_field" ON "mapping set_object_match_field" (object_match_field); CREATE INDEX "ix_mapping set_object_match_field_mapping set_id" ON "mapping set_object_match_field" ("mapping set_id"); CREATE TABLE "mapping set_subject_preprocessing" ( "mapping set_id" INTEGER, subject_preprocessing TEXT, PRIMARY KEY ("mapping set_id", subject_preprocessing), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_subject_preprocessing_mapping set_id" ON "mapping set_subject_preprocessing" ("mapping set_id"); CREATE INDEX "ix_mapping set_subject_preprocessing_subject_preprocessing" ON "mapping set_subject_preprocessing" (subject_preprocessing); CREATE TABLE "mapping set_object_preprocessing" ( "mapping set_id" INTEGER, object_preprocessing TEXT, PRIMARY KEY ("mapping set_id", object_preprocessing), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_object_preprocessing_mapping set_id" ON "mapping set_object_preprocessing" ("mapping set_id"); CREATE INDEX "ix_mapping set_object_preprocessing_object_preprocessing" ON "mapping set_object_preprocessing" (object_preprocessing); CREATE TABLE "mapping set_curation_rule" ( "mapping set_id" INTEGER, curation_rule TEXT, PRIMARY KEY ("mapping set_id", curation_rule), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_curation_rule_mapping set_id" ON "mapping set_curation_rule" ("mapping set_id"); CREATE INDEX "ix_mapping set_curation_rule_curation_rule" ON "mapping set_curation_rule" (curation_rule); CREATE TABLE "mapping set_curation_rule_text" ( "mapping set_id" INTEGER, curation_rule_text TEXT, PRIMARY KEY ("mapping set_id", curation_rule_text), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_curation_rule_text_curation_rule_text" ON "mapping set_curation_rule_text" (curation_rule_text); CREATE INDEX "ix_mapping set_curation_rule_text_mapping set_id" ON "mapping set_curation_rule_text" ("mapping set_id"); CREATE TABLE "mapping set_see_also" ( "mapping set_id" INTEGER, see_also TEXT, PRIMARY KEY ("mapping set_id", see_also), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id) ); CREATE INDEX "ix_mapping set_see_also_mapping set_id" ON "mapping set_see_also" ("mapping set_id"); CREATE INDEX "ix_mapping set_see_also_see_also" ON "mapping set_see_also" (see_also); CREATE TABLE "mapping set_extension_definitions" ( "mapping set_id" INTEGER, extension_definitions_id INTEGER, PRIMARY KEY ("mapping set_id", extension_definitions_id), FOREIGN KEY("mapping set_id") REFERENCES "mapping set" (id), FOREIGN KEY(extension_definitions_id) REFERENCES "extension definition" (id) ); CREATE INDEX "ix_mapping set_extension_definitions_mapping set_id" ON "mapping set_extension_definitions" ("mapping set_id"); CREATE INDEX "ix_mapping set_extension_definitions_extension_definitions_id" ON "mapping set_extension_definitions" (extension_definitions_id); CREATE TABLE "mapping registry_imports" ( "mapping registry_id" INTEGER, imports TEXT, PRIMARY KEY ("mapping registry_id", imports), FOREIGN KEY("mapping registry_id") REFERENCES "mapping registry" (id) ); CREATE INDEX "ix_mapping registry_imports_mapping registry_id" ON "mapping registry_imports" ("mapping registry_id"); CREATE INDEX "ix_mapping registry_imports_imports" ON "mapping registry_imports" (imports); CREATE TABLE "mapping registry_mapping_set_references" ( "mapping registry_id" INTEGER, mapping_set_references_id INTEGER, PRIMARY KEY ("mapping registry_id", mapping_set_references_id), FOREIGN KEY("mapping registry_id") REFERENCES "mapping registry" (id), FOREIGN KEY(mapping_set_references_id) REFERENCES "mapping set reference" (id) ); CREATE INDEX "ix_mapping registry_mapping_set_references_mapping_set_references_id" ON "mapping registry_mapping_set_references" (mapping_set_references_id); CREATE INDEX "ix_mapping registry_mapping_set_references_mapping registry_id" ON "mapping registry_mapping_set_references" ("mapping registry_id"); CREATE TABLE mapping_author_id ( mapping_id INTEGER, author_id TEXT, PRIMARY KEY (mapping_id, author_id), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_author_id_author_id ON mapping_author_id (author_id); CREATE INDEX ix_mapping_author_id_mapping_id ON mapping_author_id (mapping_id); CREATE TABLE mapping_author_label ( mapping_id INTEGER, author_label TEXT, PRIMARY KEY (mapping_id, author_label), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_author_label_author_label ON mapping_author_label (author_label); CREATE INDEX ix_mapping_author_label_mapping_id ON mapping_author_label (mapping_id); CREATE TABLE mapping_reviewer_id ( mapping_id INTEGER, reviewer_id TEXT, PRIMARY KEY (mapping_id, reviewer_id), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_reviewer_id_reviewer_id ON mapping_reviewer_id (reviewer_id); CREATE INDEX ix_mapping_reviewer_id_mapping_id ON mapping_reviewer_id (mapping_id); CREATE TABLE mapping_reviewer_label ( mapping_id INTEGER, reviewer_label TEXT, PRIMARY KEY (mapping_id, reviewer_label), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_reviewer_label_mapping_id ON mapping_reviewer_label (mapping_id); CREATE INDEX ix_mapping_reviewer_label_reviewer_label ON mapping_reviewer_label (reviewer_label); CREATE TABLE mapping_creator_id ( mapping_id INTEGER, creator_id TEXT, PRIMARY KEY (mapping_id, creator_id), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_creator_id_creator_id ON mapping_creator_id (creator_id); CREATE INDEX ix_mapping_creator_id_mapping_id ON mapping_creator_id (mapping_id); CREATE TABLE mapping_creator_label ( mapping_id INTEGER, creator_label TEXT, PRIMARY KEY (mapping_id, creator_label), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_creator_label_creator_label ON mapping_creator_label (creator_label); CREATE INDEX ix_mapping_creator_label_mapping_id ON mapping_creator_label (mapping_id); CREATE TABLE mapping_cardinality_scope ( mapping_id INTEGER, cardinality_scope TEXT, PRIMARY KEY (mapping_id, cardinality_scope), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_cardinality_scope_cardinality_scope ON mapping_cardinality_scope (cardinality_scope); CREATE INDEX ix_mapping_cardinality_scope_mapping_id ON mapping_cardinality_scope (mapping_id); CREATE TABLE mapping_curation_rule ( mapping_id INTEGER, curation_rule TEXT, PRIMARY KEY (mapping_id, curation_rule), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_curation_rule_mapping_id ON mapping_curation_rule (mapping_id); CREATE INDEX ix_mapping_curation_rule_curation_rule ON mapping_curation_rule (curation_rule); CREATE TABLE mapping_curation_rule_text ( mapping_id INTEGER, curation_rule_text TEXT, PRIMARY KEY (mapping_id, curation_rule_text), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_curation_rule_text_curation_rule_text ON mapping_curation_rule_text (curation_rule_text); CREATE INDEX ix_mapping_curation_rule_text_mapping_id ON mapping_curation_rule_text (mapping_id); CREATE TABLE mapping_subject_match_field ( mapping_id INTEGER, subject_match_field TEXT, PRIMARY KEY (mapping_id, subject_match_field), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_subject_match_field_subject_match_field ON mapping_subject_match_field (subject_match_field); CREATE INDEX ix_mapping_subject_match_field_mapping_id ON mapping_subject_match_field (mapping_id); CREATE TABLE mapping_object_match_field ( mapping_id INTEGER, object_match_field TEXT, PRIMARY KEY (mapping_id, object_match_field), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_object_match_field_object_match_field ON mapping_object_match_field (object_match_field); CREATE INDEX ix_mapping_object_match_field_mapping_id ON mapping_object_match_field (mapping_id); CREATE TABLE mapping_match_string ( mapping_id INTEGER, match_string TEXT, PRIMARY KEY (mapping_id, match_string), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_match_string_mapping_id ON mapping_match_string (mapping_id); CREATE INDEX ix_mapping_match_string_match_string ON mapping_match_string (match_string); CREATE TABLE mapping_subject_preprocessing ( mapping_id INTEGER, subject_preprocessing TEXT, PRIMARY KEY (mapping_id, subject_preprocessing), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_subject_preprocessing_subject_preprocessing ON mapping_subject_preprocessing (subject_preprocessing); CREATE INDEX ix_mapping_subject_preprocessing_mapping_id ON mapping_subject_preprocessing (mapping_id); CREATE TABLE mapping_object_preprocessing ( mapping_id INTEGER, object_preprocessing TEXT, PRIMARY KEY (mapping_id, object_preprocessing), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_object_preprocessing_object_preprocessing ON mapping_object_preprocessing (object_preprocessing); CREATE INDEX ix_mapping_object_preprocessing_mapping_id ON mapping_object_preprocessing (mapping_id); CREATE TABLE mapping_see_also ( mapping_id INTEGER, see_also TEXT, PRIMARY KEY (mapping_id, see_also), FOREIGN KEY(mapping_id) REFERENCES mapping (id) ); CREATE INDEX ix_mapping_see_also_see_also ON mapping_see_also (see_also); CREATE INDEX ix_mapping_see_also_mapping_id ON mapping_see_also (mapping_id); ================================================ FILE: project.Makefile ================================================ ## Add your own custom Makefile targets here # Added by H2 EXCEL_DIR = $(DEST)/excel gen-excel: mkdir -p $(EXCEL_DIR) $(RUN) gen-excel --output $(EXCEL_DIR)/sssom_schema.xlsx $(SOURCE_SCHEMA_PATH) build: uv build pypi: uv publish ####################################### ##### Mapping validation ############# ####################################### MAPPING_DIR_SCHEMA=examples/schema MAPPING_DIR_EMBEDDED=examples/embedded TMPDIR = tmp validate-example-schema-%: mkdir -p $(TMPDIR) tsvalid $(MAPPING_DIR_SCHEMA)/$*.sssom.tsv --comment "#" --skip E1 sssom validate $(MAPPING_DIR_SCHEMA)/$*.sssom.tsv sssom convert $(MAPPING_DIR_SCHEMA)/$*.sssom.tsv -o $(TMPDIR)/schema-$*.sssom.ttl validate-example-embedded-%: mkdir -p $(TMPDIR) tsvalid $(MAPPING_DIR_EMBEDDED)/$*.sssom.tsv --comment "#" --skip E1 sssom validate $(MAPPING_DIR_EMBEDDED)/$*.sssom.tsv sssom convert $(MAPPING_DIR_EMBEDDED)/$*.sssom.tsv -o $(TMPDIR)/embedded-$*.sssom.ttl MAPPINGS_SCHEMA=$(notdir $(wildcard $(MAPPING_DIR_SCHEMA)/*.sssom.tsv)) VALIDATE_MAPPINGS_SCHEMA=$(patsubst %.sssom.tsv, validate-example-schema-%, $(notdir $(wildcard $(MAPPING_DIR_SCHEMA)/*.sssom.tsv))) MAPPINGS_EMBEDDED=$(notdir $(wildcard $(MAPPING_DIR_EMBEDDED)/*.sssom.tsv)) VALIDATE_MAPPINGS_EMBEDDED=$(patsubst %.sssom.tsv, validate-example-embedded-%, $(notdir $(wildcard $(MAPPING_DIR_EMBEDDED)/*.sssom.tsv))) validate_mappings: $(MAKE) $(VALIDATE_MAPPINGS_SCHEMA) $(MAKE) $(VALIDATE_MAPPINGS_EMBEDDED) ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"] build-backend = "poetry_dynamic_versioning.backend" [project] name = "sssom-schema" description = "SSSOM is a Simple Standard for Sharing Ontology Mappings." authors = [ {name = "Nicolas Matentzoglu", email = "nicolas.matentzoglu@gmail.com"}, {name = "Harshad Hegde", email = "hhegde@lbl.gov"}, ] license = "MIT" license-files = ["LICENSE"] readme = "README.md" keywords = ["schema", "ontology", "mappings", "sssom"] classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", ] requires-python = ">=3.10.0" dynamic = ["version"] dependencies = [ "jinjanator", "linkml>=1.10.0", "linkml-runtime>=1.10.0", ] [dependency-groups] tests = [ "pytest" ] docs = [ "mkdocs-material==9.0.0", "mkdocs-mermaid2-plugin==1.1.1", "mike" ] [tool.poetry] requires-poetry = ">=2.0" version = "0.0.0" [tool.poetry.dependencies] python = "^3.10" [tool.poetry.requires-plugins] poetry-dynamic-versioning = ">=1.8.2" [tool.poetry-dynamic-versioning] enable = true vcs = "git" style = "pep440" [tool.codespell] # Ref: https://github.com/codespell-project/codespell#using-a-config-file skip = '.git*,*.pdf,*.lock,*.svg' check-hidden = true ignore-regex = '\b(COMENT|EHR|LOD)\b' ignore-words-list = 'disjointness' ================================================ FILE: run.sh ================================================ #!/bin/sh # Wrapper script for docker. # # This is used primarily for wrapping the GNU Make workflow. # Instead of typing "make TARGET", type "./run.sh make TARGET". # This will run the make workflow within a docker container. # # The assumption is that you are working in the src/ontology folder; # we therefore map the whole repo (../..) to a docker volume. # # See README-editors.md for more details. docker run -v $PWD:/work -w /work -e ROBOT_JAVA_ARGS='-Xmx8G' --rm -ti obolibrary/odkfull "$@" ================================================ FILE: scripts/gh_table.pl ================================================ #!/usr/bin/perl my $n=0; my $len; my $hlen; while(<>) { chomp; if ($n==0 && m@^\#@) { s@^\#@@; } my (@vals) = split(/\t/,$_); @vals = map {s@\|@, @g; $_} @vals; if (!$hlen) { $hlen = scalar(@vals); } while (scalar(@vals) < $hlen) { push(@vals, ''); } print '|'.join('|',@vals)."|\n"; $nulen = scalar(@vals); if ($n > 0) { if ($len ne $nulen) { print STDERR "MISMATCH: $len != $nulen\n"; } } $len = $nulen; if ($n ==0) { @vals = map {"---"} @vals; print '|'.join('|',@vals)."|\n"; } $n++; } ================================================ FILE: src/CONFIG.yaml ================================================ model_organization: mapping-commons # Name of github individual organization where this cone exists (e.g. linkml / mygithub model_name: sssom # Name of target repository (e.g. my-wonderful-model) root_schema: sssom # the name of the base schema file (w/o .yaml) model_root_class: MappingSet # {% endif %} ## Slots | Name | Cardinality and Range | Description | Inheritance | | --- | --- | --- | --- | {% if gen.get_direct_slots(element)|length > 0 %} {%- for slot in gen.get_direct_slots(element) -%} | {{ gen.link(slot) }} | {{ gen.cardinality(slot) }}
{{ gen.link(slot.range) }} | {{ slot.description|enshorten }} | direct | {% endfor -%} {% endif -%} {% if gen.get_indirect_slots(element)|length > 0 %} {%- for slot in gen.get_indirect_slots(element) -%} | {{ gen.link(slot) }} | {{ gen.cardinality(slot) }}
{{ gen.link(slot.range) }} | {{ slot.description|enshorten }} | {{ gen.links(gen.get_slot_inherited_from(element.name, slot.name))|join(', ') }} | {% endfor -%} {% endif %} {% if schemaview.is_mixin(element.name) %} ## Mixin Usage | mixed into | description | | --- | --- | {% for c in schemaview.class_children(element.name, is_a=False) -%} | {{ gen.link(c) }} | {{ schemaview.get_class(c).description|enshorten }} | {% endfor %} {% endif %} {% if schemaview.usage_index().get(element.name) %} ## Usages | used by | used in | type | used | | --- | --- | --- | --- | {% for usage in schemaview.usage_index().get(element.name) -%} | {{gen.link(usage.used_by)}} | {{gen.link(usage.slot)}} | {{usage.metaslot}} | {{ gen.link(usage.used) }} | {% endfor %} {% endif %} {% include "common_metadata.md.jinja2" %} {% if schemaview.get_mappings(element.name).items() -%} ## Mappings | Mapping Type | Mapped Value | | --- | --- | {% for m, mt in schemaview.get_mappings(element.name).items() -%} {% if mt|length > 0 -%} | {{ m }} | {{ mt|join(', ') }} | {% endif -%} {% endfor %} {% endif -%} {% if gen.example_object_blobs(element.name) -%} ## Examples {% for name, blob in gen.example_object_blobs(element.name) -%} ### Example: {{name}} ```yaml {{ blob }} ``` {% endfor %} {% endif %} ## LinkML Source ### Direct
```yaml {{gen.yaml(element)}} ```
### Induced
```yaml {{gen.yaml(element, inferred=True)}} ```
{%- if footer -%} {{footer}} {%- endif -%} ================================================ FILE: src/doc-templates/class_diagram.md.jinja2 ================================================ {% if schemaview.class_parents(element.name) and schemaview.class_children(element.name) %} ```{{ gen.mermaid_directive() }} classDiagram class {{ gen.name(element) }} {% for s in schemaview.class_parents(element.name)|sort(attribute='name') -%} {{ gen.name(schemaview.get_element(s)) }} <|-- {{ gen.name(element) }} {% endfor %} {% for s in schemaview.class_children(element.name)|sort(attribute='name') -%} {{ gen.name(element) }} <|-- {{ gen.name(schemaview.get_element(s)) }} {% endfor %} {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} {{ gen.name(element) }} : {{gen.name(s)}} {% if s.range not in gen.all_type_object_names() %} {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} {% endif %} {% endfor %} ``` {% elif schemaview.class_parents(element.name) %} ```{{ gen.mermaid_directive() }} classDiagram class {{ gen.name(element) }} {% for s in schemaview.class_parents(element.name)|sort(attribute='name') -%} {{ gen.name(schemaview.get_element(s)) }} <|-- {{ gen.name(element) }} {% endfor %} {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} {{ gen.name(element) }} : {{gen.name(s)}} {% if s.range not in gen.all_type_object_names() %} {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} {% endif %} {% endfor %} ``` {% elif schemaview.class_children(element.name) %} ```{{ gen.mermaid_directive() }} classDiagram class {{ gen.name(element) }} {% for s in schemaview.class_children(element.name)|sort(attribute='name') -%} {{ gen.name(element) }} <|-- {{ gen.name(schemaview.get_element(s)) }} {% endfor %} {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} {{ gen.name(element) }} : {{gen.name(s)}} {% if s.range not in gen.all_type_object_names() %} {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} {% endif %} {% endfor %} ``` {% else %} ```{{ gen.mermaid_directive() }} classDiagram class {{ gen.name(element) }} {% for s in schemaview.class_induced_slots(element.name)|sort(attribute='name') -%} {{ gen.name(element) }} : {{gen.name(s)}} {% if s.range not in gen.all_type_object_names() %} {{ gen.name(element) }} --|> {{ s.range }} : {{ gen.name(s) }} {% endif %} {% endfor %} ``` {% endif %} ================================================ FILE: src/doc-templates/common_metadata.md.jinja2 ================================================ {% if element.aliases %} ## Aliases {% for alias in element.aliases %} * {{ alias }} {%- endfor %} {% endif %} {% if element.examples %} ## Examples | Value | | --- | {% for x in element.examples -%} | {{ x.value }} | {% endfor %} {% endif -%} {% if element.comments -%} ## Comments {% for x in element.comments -%} * {{x}} {% endfor %} {% endif -%} {% if element.todos -%} ## TODOs {% for x in element.todos -%} * {{x}} {% endfor %} {% endif -%} {% if element.see_also -%} ## See Also {% for x in element.see_also -%} * {{ gen.uri_link(x) }} {% endfor %} {% endif -%} ## Identifier and Mapping Information {% if element.id_prefixes %} ### Valid ID Prefixes Instances of this class *should* have identifiers with one of the following prefixes: {% for p in element.id_prefixes %} * {{p}} {% endfor %} {% endif %} {% if element.annotations %} ### Annotations | property | value | | --- | --- | {% for a in element.annotations -%} {%- if a|string|first != '_' -%} | {{ a }} | {{ element.annotations[a].value }} | {%- endif -%} {% endfor %} {% endif %} {% if element.from_schema or element.imported_from %} ### Schema Source {% if element.from_schema %} * from schema: {{ element.from_schema }} {% endif %} {% if element.imported_from %} * imported from: {{ element.imported_from }} {% endif %} {% endif %} ================================================ FILE: src/doc-templates/frontpage.md.jinja2 ================================================ --- hide: - edit --- [edit page](https://github.com/mapping-commons/sssom/edit/master/src/doc-templates/frontpage.md.jinja2) [//]: # (This file is automatically generated, please edit src/doc-templates/frontpage.md.jinja2) # Simple Standard for Sharing Ontological Mappings (SSSOM) ![SSSOM banner](images/sssom-banner.png) The Simple Standard for Sharing Ontological Mappings (SSSOM) is a community-driven standard designed to facilitate the exchange and integration of semantic entity mappings. As data interoperability becomes increasingly crucial across various domains, SSSOM provides a standardized format to share mappings, enabling researchers and developers to more easily connect and utilize diverse datasets. By establishing a common framework, SSSOM enhances the consistency, quality, and discoverability of mappings, thereby supporting more effective data integration and analysis. - **Standardization**: SSSOM provides a unified format for representing semantic, or ontological, mappings, making it easier for different systems and organizations to exchange mapping data consistently. - **Interoperability**: By using SSSOM, data from diverse sources can be integrated more seamlessly, allowing for improved data analysis and research across various fields, including biology, healthcare, and information technology. Beyond defining the standard itself, the **SSSOM Core Team** and the SSSOM community also develop reference tools and software libraries for working with the standard. ## SSSOM at a glance: Model and Exchange Format ### Basic model The [data model](spec-model.md) of SSSOM is centered around two fundamental concepts: mappings and mapping sets. A **SSSOM mapping** is a statement that there is a correspondence between two semantic entities. It comprises two components: 1. The **core mapping** (or **raw mapping**), which is a triple `` that represents the correspondence itself between a subject entity, for example a class in an ontology, and an object entity, for example an identifier in some database, via a semantic mapping predicate, for example `skos:exactMatch`. 2. **Metadata** that provide supplementary pieces of information about the core mapping. This notably includes information about the *provenance* of the statement (for example, who authored the statement), the *confidence* with which the mappings holds, and its *justification* (a reason that supports the fidelity of the mapping between the subject and the object, such as _expert review_, or _exact lexical matching_ on the entities' primary names). A **SSSOM mapping set** is a collection of SSSOM mappings. Mapping sets can also be associated with metadata, such as license statements, or a description. ### Example While the SSSOM model is quite general and mappings can be shared in different formats, the most common format is the [SSSOM/TSV format](spec-formats-tsv.md). Here is a tabular representation of some example mappings for illustration purposes: | subject_id | subject_label | predicate_id | object_id | object_label | mapping_justification | author_id | confidence | comment | |---------------|---------------|-----------------|-----------------|----------------------|-----------------------------|-------------------------|------------|---------------------------------------------------------------------------------------------------------------------------------| | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 0.95 | "We could map to FOODON:03310788 instead to cover sliced apples, but only 'whole' apple types exist." | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 1.0 | | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 0.9 | "We could map to FOODON:00004187 instead which more specifically refers to 'raw' Pink apples. Decided against to be consistent with other mapping choices." | | KF_FOOD:F004 | braeburn | skos:broadMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 1.0 | | In the TSV format, mapping set metadata is included at the top of the file, before the mappings themselves, in yaml-like key-value pairs: ```yaml curie_map: FOODON: http://purl.obolibrary.org/obo/FOODON_ KF_FOOD: https://kewl-foodie.inc/food/ orcid: https://orcid.org/ mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data. license: https://creativecommons.org/licenses/by/4.0/ mapping_date: 2022-05-02 ``` See [here](https://github.com/mapping-commons/sssom/tree/master/examples/schema) for concrete examples. ### Quick reference for mapping metadata For mapping set metadata please see [here](MappingSet.md). | Column/Field | Description | Value | Examples | Required | |--------------------|---------------------------------------------------------|----------------------------------------|---------------------------|-------------| {%- for slot in classes['mapping'].slots %} {%- set slot_info = slots[slot] %} | **[{{ slot }}]({{ slot }}.md)** | {{ slot_info.description | default("No description") }} | {% if slot_info.range in enums %}{{ enums[slot_info.range].permissible_values.keys() | join(", ") }}{% else %}{% if slot_info.range == "EntityReference" %}entity reference (e.g. CURIE in TSV){% else %}{{ slot_info.range }}{% endif %}{% endif %} | {% if slot_info.examples is defined and slot_info.examples | length > 0 %}{% if slot_info.examples[0] is string %}{{ slot_info.examples[0].replace("|", "\|") }}{% elif slot_info.examples[0] is mapping %}{{ slot_info.examples[0].value | default("N/A") | replace("|", "\|") }}{% else %}{{ slot_info.examples[0] }}{% endif %}{% else %}N/A{% endif %} | {% if slot_info.required | default(false) %}Required{% elif slot_info.recommended | default(false) %}Recommended{% else %}Optional{% endif %} | {%- endfor %} ### Quick links **General** - [GitHub page](https://github.com/mapping-commons/sssom) - [Detailed description](introduction.md) - [Formal specification](spec-intro.md) **Publications** - [A Simple Standard for Sharing Ontological Mappings (SSSOM)](https://doi.org/10.1093/database/baac035) (initial publication in _Database_) - [A Simple Standard for Ontological Mappings 2022: Updates of data model and outlook](https://zenodo.org/record/7672104) (paper and presentation at the Ontology Matching Workshop 2022) - [A Simple Standard for Ontological Mappings 2023: Updates on data model, collaborations and tooling](https://zenodo.org/record/8202395) (paper and presentation at the Ontology Matching Workshop 2023) - [Other presentations](presentations.md) **Related software** - [SSSOM-Java](https://incenp.org/dvlpt/sssom-java/) (reference implementation of the SSSOM standard in Java, most up to date with the current standard; provides a Java library, a command-line tool, and a ROBOT plugin) - [sssom-py](https://mapping-commons.github.io/sssom-py/) (an implementation of the standard, a toolkit and API for processing mappings, written in Python using Pandas dataframes and/or LinkML objects as a primary data structure) - [sssom-pydantic](https://github.com/cthoyt/sssom-pydantic) (an implementation of the standard, a toolkit and API for processing mappings, written in Python using Pydantic as a primary data structure) - [sssom-js](https://www.npmjs.org/package/sssom-js) (an implementation of the SSSOM standard in JavaScript) ## The SSSOM Core Team ### Contact The preferred way to contact the SSSOM team is through the [issue tracker](https://github.com/mapping-commons/issues) (for problems with SSSOM) or the [GitHub discussion forums](https://github.com/mapping-commons/sssom/discussions) (for general questions). You can find any of the members of the SSSOM core team [on GitHub](https://github.com/orgs/mapping-commons/teams/sssom-core). Their GitHub profiles usually also provide email addresses. You can also reach us in the [OBO Foundry Slack](https://obo-communitygroup.slack.com/archives/C01DP18L5GW), in the `#sssom` channel. ### Steering committee The Steering committee is a self-appointed group of SSSOM contributors, whose aim is to drive the evolution of the standard and coordinate community contributions. * [Nicolas Matentzoglu](https://orcid.org/0000-0002-7356-1779) (Independent Consultant (semanticly.ai), [@matentzn](https://github.com/matentzn)) * [Damien Goutte-Gattat](https://orcid.org/0000-0002-6095-8718) (German BioImaging e.V.) * [Chris Mungall](https://orcid.org/0000-0002-6601-2165) (LBNL) * [Melissa Haendel](https://orcid.org/0000-0001-9114-8737) (UNC) * [Charles Tapley Hoyt](https://orcid.org/0000-0003-4423-4370) (RWTH Aachen University; [@cthoyt](https://github.com/cthoyt)) ### Documentation/specification editors * [Anita Caron](https://orcid.org/0000-0002-6523-4866) (EMBL-EBI) * [David Osumi-Sutherland](https://orcid.org/0000-0002-7073-9172) (Wellcome Sanger Institute) * [Emily Hartley](https://orcid.org/0000-0001-5839-2535) (Critical Path Institute) * [Ernesto Jimenez-Ruiz](https://orcid.org/0000-0002-9083-4599) (City, University of London) * [Harry Caufield](https://orcid.org/0000-0001-5705-7831) (LBNL) * [Harshad Hegde](https://orcid.org/0000-0002-2411-565X) (LBNL) * [Henriette Harmse](https://orcid.org/0000-0001-7251-9504) (EMBL-EBI) * [James McLaughlin](https://orcid.org/0000-0002-8361-2795) (EMBL-EBI) * [John Graybeal](https://orcid.org/0000-0001-6875-5360) (Independent Consultant) * [Sierra Moxon](https://orcid.org/0000-0002-8719-7760) (LBNL) * [Simon Jupp](https://orcid.org/0000-0002-0643-3144) (SciBite) * [Thomas Liener](https://orcid.org/0000-0003-3257-9937) (Independent Consultant) * [Tiffany Callahan](https://orcid.org/0000-0002-8169-9049) ([@callahantiff](https://github.com/callahantiff)) * [William Duncan](https://orcid.org/0000-0001-9625-1899) (University of Florida) ### Contributors * [Alasdair Gray](https://orcid.org/0000-0002-5711-4872) * [Alex Wagner](https://orcid.org/0000-0002-2502-8961) * [Amelia L. Hoyt](https://orcid.org/0000-0003-1307-2508) * [Andrew Williams](https://orcid.org/0000-0002-0692-412X) * [Anne Thessen](https://orcid.org/0000-0002-2908-3327) * [Benjamin M. Gyori](https://orcid.org/0000-0001-9439-5346) * [Bill Baumgartner](https://orcid.org/0000-0001-6717-5313) * [Cassia Trojahn](https://orcid.org/0000-0002-0096-2766) * [Clement Jonquet](https://orcid.org/0000-0002-2404-1582) * [Christopher Chute](https://orcid.org/0000-0001-5437-2545) * [Chris T. Evelo](https://orcid.org/0000-0002-5301-3142) * [Damion Dooley](https://orcid.org/0000-0002-8844-9165) * [Davera Gabriel](https://orcid.org/0000-0001-9041-4597) * [Harold Solbrig](https://www.wikidata.org/wiki/Q44607574) * [HyeongSik Kim](https://orcid.org/0000-0002-3002-9838) * [Ian Harrow](https://orcid.org/0000-0003-0109-0522) * [Ian Braun](https://orcid.org/0000-0002-2389-9288) * [James Malone](https://orcid.org/0000-0002-1615-2899) * [James Overton](https://orcid.org/0000-0001-5139-5557) * [James P. Balhoff](https://orcid.org/0000-0002-8688-6599) * [James Stevenson](https://orcid.org/0000-0002-2568-6163) * [Javier Millán Acosta](https://orcid.org/0000-0002-4166-7093) * [Jiao Dahzi](https://orcid.org/0000-0001-5052-3836) * [Joe Flack](https://orcid.org/0000-0002-2906-7319) * [Jooho Lee](https://orcid.org/0000-0002-2955-3405) * [Julie McMurry](https://orcid.org/0000-0002-9353-5498) * [Kori Kuzma](https://orcid.org/0000-0002-9954-7449) * [Kristin Kostka](https://orcid.org/0000-0003-2595-8736) * [Lauren Chan](https://orcid.org/0000-0002-7463-6306) * [Melissa Haendel](https://orcid.org/0000-0001-9114-8737) * [Monica Munoz-Torres](https://orcid.org/0000-0001-8430-6039) * [Nicole Vasilevsky](https://orcid.org/0000-0001-5208-3432) * [Nomi Harris](https://orcid.org/0000-0001-6315-3707) * [Núria Queralt-Rosinach](https://orcid.org/0000-0003-0169-8159) * [Sabrina Toro](https://orcid.org/0000-0002-4142-7153) * [Sebastian Koehler](https://orcid.org/0000-0002-5316-1399) * [Shahim Essaid](https://orcid.org/0000-0003-2338-2550) * [Sophie Aubin](https://orcid.org/0000-0003-4805-8220) * [Sue Bello](https://orcid.org/0000-0003-4606-0597) * [Sujay Patil](https://orcid.org/0000-0001-6142-1106) * [Sven Hertling](https://orcid.org/0000-0003-0333-5888) * [Tim Putman](https://orcid.org/0000-0002-4291-0737) * [Vinicius de Souza](https://orcid.org/0000-0002-4971-0439) ## Acknowledgements - See [Funding](funding.md) for details on direct contributions. - We thank the [Link Model Language (LinkML) project](https://github.com/linkml) and team for their great framework and the LinkML team for their support developing the schema. ================================================ FILE: src/doc-templates/index.md.jinja2 ================================================ # SSSOM Official Data Model Documentation ![SSSOM banner](images/sssom-banner.png) {% if schema.description %}{{ schema.description }}{% endif %} **Schema PURL**: {{ schema.id }} ## Introduction While the SSSOM model is quite general and mappings can be shared in different formats, the most common format is the [SSSOM/TSV format](spec-formats-tsv.md). Here is a tabular representation of some example mappings for illustration purposes: | subject_id | subject_label | predicate_id | object_id | object_label | mapping_justification | author_id | confidence | comment | |---------------|---------------|-----------------|-----------------|----------------------|-----------------------------|-------------------------|------------|---------------------------------------------------------------------------------------------------------------------------------| | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 0.95 | "We could map to FOODON:03310788 instead to cover sliced apples, but only 'whole' apple types exist." | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 1.0 | | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 0.9 | "We could map to FOODON:00004187 instead which more specifically refers to 'raw' Pink apples. Decided against to be consistent with other mapping choices." | | KF_FOOD:F004 | braeburn | skos:broadMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | 1.0 | | In the TSV format, mapping set metadata is included at the top of the file, before the mappings themselves, in yaml-like key-value pairs: !!! info "Example header (YAML format)"
    curie_map:
      FOODON: http://purl.obolibrary.org/obo/FOODON_
      KF_FOOD: https://kewl-foodie.inc/food/
      orcid: https://orcid.org/
    mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv
    mapping_set_description: >
      Manually curated alignment of KEWL FOODIE INC internal food and 
      nutrition database with Food Ontology (FOODON). Intended to be 
      used for ontological analysis and grouping of KEWL FOODIE INC 
      related data.
    license: https://creativecommons.org/licenses/by/4.0/
    mapping_date: 2022-05-02
    
See [here](https://github.com/mapping-commons/sssom/tree/master/examples/schema) for concrete examples. ## Mapping metadata elements {% for c in gen.all_class_objects()|sort(attribute=sort_by) -%} {% if c.name == "mapping" %} {{gen.link(c)}}: {{c.description|enshorten}} | Column/Field | Description | Required | |--------------------|---------------------------------------------------------|-------------| {%- for slot in c.slots %} {%- set slot_info = schemaview.induced_slot(slot, c.name) %} | **{{ gen.link(slot) }}** | {{ slot_info.description | default("No description") }} | {% if slot_info.required | default(false) %}Required{% elif slot_info.recommended | default(false) %}Recommended{% else %}Optional{% endif %} | {%- endfor %} {%- endif %} {%- endfor %} ## Mappings set metadata elements {% for c in gen.all_class_objects()|sort(attribute=sort_by) -%} {% if c.name == "mapping set" %} {{gen.link(c)}}: {{c.description|enshorten}} | Column/Field | Description | Required | |--------------------|---------------------------------------------------------|-------------| {%- for slot in c.slots %} {%- set slot_info = schemaview.induced_slot(slot, c.name) %} | **{{ gen.link(slot) }}** | {{ slot_info.description | default("No description") }} | {% if slot_info.required | default(false) %}Required{% elif slot_info.recommended | default(false) %}Recommended{% else %}Optional{% endif %} | {%- endfor %} {%- endif %} {%- endfor %} {# ## Schema Diagram ```{{ gen.mermaid_directive() }} {{ gen.mermaid_diagram() }} ``` #} ## Index (all classes, enums and elements) ### Columns/Slots/Fields | Slot | Description | | --- | --- | {% for s in gen.all_slot_objects()|sort(attribute=sort_by) -%} | {{gen.link(s)}} | {{s.description|enshorten}} | {% endfor %} ### Classes | Class | Description | | --- | --- | {% if gen.hierarchical_class_view -%} {% for u, v in gen.class_hierarchy_as_tuples() -%} | {{ " "|safe*u*8 }}{{ gen.link(schemaview.get_class(v)) }} | {{ schemaview.get_class(v).description }} | {% endfor %} {% else -%} {% for c in gen.all_class_objects()|sort(attribute=sort_by) -%} | {{gen.link(c)}} | {{c.description|enshorten}} | {% endfor %} {% endif %} ### Enumerations | Enumeration | Description | | --- | --- | {% for e in gen.all_enum_objects()|sort(attribute=sort_by) -%} | {{gen.link(e)}} | {{e.description|enshorten}} | {% endfor %} ### Types | Type | Description | | --- | --- | {% for t in gen.all_type_objects()|sort(attribute=sort_by) -%} | {{gen.link(t)}} | {{t.description|enshorten}} | {% endfor %} ================================================ FILE: src/doc-templates/slot.md.jinja2 ================================================ # {{ gen.name(element) }} **URI**: {{ gen.uri_link(element) }} **Applicable to**: {% for c in schemaview.get_classes_by_slot(element, include_induced=True) %}{{ gen.link(c) }}{% if not loop.last %}, {% endif %}{% endfor -%} {%- if element.description %} {%- set element_description_lines = element.description.split('\n') %} !!! info "Description" {% for element_description_line in element_description_lines %} _{{ element_description_line }}_ {%- endfor %} {%- endif -%} {# {% if schemaview.slot_parents(element.name) or schemaview.slot_children(element.name, mixins=False) %} ## Inheritance {{ gen.inheritance_tree(element, mixins=True) }} {% else %} {% endif %} {% if schemaview.get_classes_by_slot(element, include_induced=True) %} #} {# {% if schemaview.is_mixin(element.name) %} ## Mixin Usage | mixed into | description | range | domain | | --- | --- | --- | --- | {% for s in schemaview.slot_children(element.name, is_a=False) -%} | {{ gen.link(s) }} | {{ schemaview.get_slot(s).description|enshorten }} | {{ schemaview.get_slot(s).range }} | {{ schemaview.get_classes_by_slot(schemaview.get_slot(s))|join(', ') }} | {% endfor %} {% endif %} #} ## Properties * **Range**: {{gen.link(element.range)}}. The range of the element is the type of the value that can be assigned to it. {% if element.multivalued %}* **Multivalued**: {{ element.multivalued }}. If the element is multivalued, more than one value can be attached to the same field. In the TSV format, these are `|` separated.{% endif %} {% if element.minimum_value is not none %}* **Minimum Value**: {{ element.minimum_value|int }}{% endif -%} {% if element.maximum_value is not none %}* **Maximum Value**: {{ element.maximum_value|int }}{% endif -%} {% if element.pattern %}* **Regex pattern**: {{ '`' }}{{ element.pattern }}{{ '`' }}{% endif -%} {# {% if schemaview.usage_index().get(element.name) %} ## Usages | used by | used in | type | used | | --- | --- | --- | --- | {% for usage in schemaview.usage_index().get(element.name) -%} | {{gen.link(usage.used_by)}} | {{gen.link(usage.slot)}} | {{usage.metaslot}} | {{ gen.link(usage.used) }} | {% endfor %} {% endif %} #} {% if element.examples %} ## Examples {% for x in element.examples -%} !!! example "Example: {{ x.value }}" Example value: ``` {{ x.value }} ``` {% if x.description %}Description: {{ x.description }}{% endif %} {% endfor %} {% endif -%} {% if element.comments -%} ## Comments {% for x in element.comments -%} * {{x}} {% endfor %} {% endif -%} {% if element.see_also -%} ## See Also These are some relevant resources you might find useful to get additional information about the element, such as example implementations, issues and pull requests. {% for x in element.see_also -%} * {{ gen.uri_link(x) }} {% endfor %} {% endif -%} ## Schema developer documentation
LinkML source ```yaml {{ gen.yaml(element) }} ```
================================================ FILE: src/docs/5star-mappings.md ================================================ # 5-Star Entity Mappings - Cheatsheet [Download as PDF](resources/sssom_5star_mappings.pdf). This document is under development. Get involved by opening an issue on the [issue tracker](https://github.com/mapping-commons/sssom/issues). ## Towards more reusable and transparent mappings for Open Science. Entity Mappings connect clinical codes, data model enums, ontology classes and terms in clinical terminologies across knowledge organization systems and databases. Entity mappings are pivotal for the integration of healthcare data, but they are expensive to produce and often use-case-dependent. Despite the cost of creating these mappings, they are rarely shared across organizations, and even if they are made available, they lack standardization and metadata. Here we outline a 5-Star mapping system (inspired by Tim Berners-Lee’s 5-star system for Linked Data) to help you bring mapping production in your organization to the next level - step by step. ## :star: 1-Star Mappings * **Goal**: Export mapping in a computationally accessible format, make it publicly available and record mapping precision. * **Implementation**: * Record subject id, object id and mapping precision (exact, broad, narrow, close, related) * Use globally unique and persistent identifiers for subject id and object id (e.g. OMOP:123456) * Use a computable file format (JSON, XML, CSV, TSV) rather than XLSX or HTML * Make mappings available in a public space without access restrictions * **Optional**: record the subject and object labels to make it easier for humans to read the file * **Enables**: * Direct integration into ETL pipelines * Dropping societal costs by enabling others to reuse mappings * Moving data between semantic spaces ## :star::star: 2-Star Mappings * **Goal**: Make mappings available in a place with version control suitable for providing community feedback, make your own uncertainty explicit, add license and select semantic mapping predicate. * **Implementation** * Make mapping set available in a public version control system (e.g. GitHub) with an issue tracker * Record the semantic predicate explicitly (e.g. owl:equivalentClass, skos:exactMatch) * Record a confidence value for the mapping between 0 and 1, where appropriate * Use a standard open license for the use of the mapping set (e.g. Creative Commons) * **Enables**: * The worry-free reuse of mappings even if target or source terminologies are “closed” * Transparently versioned access to mappings and the opportunity to provide more direct feedback * Downstream users can filter for high-confidence mappings ## :star::star::star: 3-Star Mappings * **Goal**: Export mappings in a community standard format with basic versioning and provenance information. * **Implementation** * Export mappings in SSSOM ([https://w3id.org/sssom](https://w3id.org/sssom)) format (you do not have to curate using SSSOM!) * Record the following additional metadata * mapping_justification(s) (Lexical, Logical match, Human curated etc.) * mapping_date * subject_source, object_source, subject_source_version, object_source_version * mapping_tool (if the mapping was automatically computed using a tool), creator_id * **Enables**: * Dropping costs of reusing mappings further by providing a standard format to exchange mappings * Enabling the decentralised production of mappings by independent expert communities * Basic metadata, in particular justifications, enable downstream users to assess “fitness for purpose” for a different context ## :star::star::star::star: 4-Star Mappings * **Goal**: Make it easier to discover mappings by registering them at public mapping registry * **Implementation** * Register the mapping at a mapping commons (if none exists, create one) * Record the following additional metadata: * mapping_set_id, mapping_set_description, mapping_set_version * mapping_provider (if the mapping is not original, i.e. it is not derived from another source) * Provide an executable mapping_justification (see https://w3id.org/sssom#minimum) * **Enables**: * Mappings can easily be made available by Open Terminology services which enable scalable data mapping services ## :star::star::star::star::star: 5-Star Mappings * **Goal**: Ensure currency of mappings * **Implementation**: * Mappings are up-to-date with the latest versions of the sources being mapped * Have no issue on their issue tracker open for more than 3 months without an interaction * Usually requires a lifecycle management system that integrates automated matching * **Enables**: * Reduced effort dealing with mappings to deprecated codes or classes * Worry-free application of mappings in automated ETL processes ================================================ FILE: src/docs/chaining-rules.md ================================================ ## SSSOM Mapping Chains The goal of this document is to capture all obvious mapping chaining rules that could be applied to SSSOM, and later delivered as part of `sssom toolkit`. This is all structural, and should not be confused with proper reasoning or mapping reconciliation ala [boomer](https://github.com/INCATools/boomer). The idea is to provide the functionality to apply these chaining rules over a given mapping set, and record the appropriate metadata for that rule. Rules: - [Transitivity Rule](#transitivity) - [Role chains over exact/equivalent matches](#rce) - [Inverse Rule](#inverse) - [Generalisation Rule](#generalisation) ## Transitivity Rule Transitivity of a relation `R` implies that if an entity `A` is `R`-related to an entity `B` which in turn is `R`-related to an entity `C`, `A` is also `R`-related to `C`. ### Predicates applicable in transitivity rules We consider the following predicates transitive: - skos:exactMatch - skos:narrowMatch - skos:broadMatch - owl:equivalentClass / owl:equivalentProperty - rdfs:subClassOf / rdfs:subPropertyOf - owl:sameAs Note that technically speaking `skos:narrowMatch` and `skos:broadMatch` are not considered transitive (`skos:broaderTransitive` would be), but we are not defining a new semantics here, just a reasonable default for a mapping tool, which will nearly always hold true. Predicates we do not consider transitive include: `skos:relatedMatch` (for practical reasons), `oboInOwl:hasDbXref`, `skos:closeMatch`, `rdfs:seeAlso` (weakest form of a mapping link), `rdf:type`. ### Rules - T1: `(:A)-[predicate_id]->(:B)-[predicate_id]->(:C)` -> `(:A)-[predicate_id]->(:C)` ### Examples - T1-EX: `(:A)-[skos:broadMatch]->(:B)-[skos:broadMatch]->(:C)` -> `(:A)-[skos:broadMatch]->(:C)` ## Role chains over exact/equivalent matches Role chains are rules that allow us to bridge across mappings across multiple different properties. Role chains over exact are simple to define, so we start with these ### Predicates applicable in transitity rules - skos:narrowMatch - skos:broadMatch - skos:closeMatch - skos:relatedMatch ### Rules for SKOS - RCE1: `(:A)-[skos:exactMatch|owl:equivalentClass]->(:B)-[predicate_id]->(:C)` -> `(:A)-[predicate_id]->(:C)` - RCE2: `(:A)-[predicate_id]->(:B)-[skos:exactMatch]->(:C)` -> `(:A)-[predicate_id]->(:C)` ### Rules that should probably not be inferred (OWL) The following rules hold true, but will be left to a reasoner to be inferred: - RCE-N1: `(:A)-[owl:equivalentClass]->(:B)-[rdfs:subClassOf]->(:C)` -> `(:A)-[rdfs:subClassOf]->(:C)` - RCE-N2: `(:A)-[rdfs:subClassOf]->(:B)-[owl:equivalentClass]->(:C)` -> `(:A)-[rdfs:subClassOf]->(:C)` - RCE-N3: `(:A)-[owl:equivalentProperty]->(:B)-[rdfs:subPropertyOf]->(:C)` -> `(:A)-[rdfs:subPropertyOf]->(:C)` - RCE-N4: `(:A)-[rdfs:subPropertyOf]->(:B)-[owl:equivalentProperty]->(:C)` -> `(:A)-[rdfs:subPropertyOf]->(:C)` ## Inverse Rules `R` inverse of `S` implies that if an entity `A` is `R`-related to an entity `B` then `B` is also `S`-related to `A`. We like to call the output of an inverse rule a `walk-back`. A command that applies an inverse rule could be called `flip`. ### Predicates applicable in inverse rules This excludes the exact predicates for which inverse rules are redundant. ### Rules for SKOS - RI1: `(:A)-[skos:narrowMatch]->(:B)` -> `(:B)-[skos:broadMatch]->(:A)` - RI2: `(:A)-[skos:broadMatch]->(:B)` -> `(:B)-[skos:narrowMatch]->(:A)` ### Rules for SEMAPV - RI3: `(:A)-[semapv:crossSpeciesExactMatch]->(:B)` -> `(:B)-[semapv:crossSpeciesExactMatch]->(:A)` - RI4: `(:A)-[semapv:crossSpeciesNarrowMatch]->(:B)` -> `(:B)-[semapv:crossSpeciesBroadMatch]->(:A)` - RI5: `(:A)-[semapv:crossSpeciesBroadMatch]->(:B)` -> `(:B)-[semapv:crossSpeciesNarrowMatch]->(:A)` ## Generalisation Rules Generalisation rules are rules that can be applied to weaken a mapping deliberately. This is sometimes useful, for example when combining strong OWL-Semantics mappings with weaker SKOS-based ones. ## Rules - RG1: `(:A)-[owl:equivalentTo]->(:B)` -> `(:A)-[skos:exactMatch]->(:B)` - RG2: `(:A)-[owl:subClassOf]->(:B)` -> `(:A)-[skos:broadMatch]->(:B)` ================================================ FILE: src/docs/confidence-model.md ================================================ # Confidence SSSOM enables annotating confidence in several ways for individual mappings records and for mapping sets. ## Confidence in Positive Semantic Mappings The following example shows a high confidence (0.99) manually curated semantic mapping, between two disease resources. ```tsv #curie_map: # mesh: https://meshb.nlm.nih.gov/record/ui?ui= # MONDO: http://purl.obolibrary.org/obo/MONDO_ # oboinowl: http://www.geneontology.org/formats/oboInOwl# # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# #mapping_set_id: https://w3id.org/biopragmatics/biomappings/sssom/positive.sssom.tsv subject_id subject_label predicate_id object_id object_label mapping_justification author_id confidence MONDO:0000455 cone dystrophy skos:exactMatch mesh:D000077765 Cone Dystrophy semapv:ManualMappingCuration orcid:0000-0003-4423-4370 .99 ``` The following example shows a medium-confidence semantic mapping produced through a lexical matching process. While this semantic mapping is actually incorrect, the lexical matching process assigned it a confidence of 0.65. ```tsv #curie_map: # DOID: http://purl.obolibrary.org/obo/DOID_ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # umls: https://uts.nlm.nih.gov/uts/umls/concept/ #mapping_set_id: https://w3id.org/biopragmatics/biomappings/sssom/negative.sssom.tsv subject_id subject_label predicate_id object_id object_label mapping_justification confidence DOID:0050052 Rocky Mountain spotted fever skos:exactMatch umls:C0035795 Rocky mountain spotted fever vaccine semapv:LexicalMapping 0.65 ``` When not explicitly specified, confidence estimation algorithms should consider the confidence of a semantic mapping to be 1.0 by default. ## Confidence with Negated Semantic Mappings SSSOM has explicit support for curating negative semantic mappings (i.e., subject-predicate-object triples known to be false) by using the `predicate_modifier` column. The following example shows a highly confident negative semantic mapping, because _Rocky Mountain spotted fever_ (a disease curated in DOID) is not the same as _Rocky mountain spotted fever vaccine_ (a vaccine curated in UMLS). ```tsv #curie_map: # DOID: http://purl.obolibrary.org/obo/DOID_ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # umls: https://uts.nlm.nih.gov/uts/umls/concept/ #mapping_set_id: https://w3id.org/biopragmatics/biomappings/sssom/negative.sssom.tsv subject_id subject_label predicate_id predicate_modifier object_id object_label mapping_justification author_id confidence DOID:0050052 Rocky Mountain spotted fever skos:exactMatch Not umls:C0035795 Rocky mountain spotted fever vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 1.0 ``` It's also possible to curate a negative semantic mapping with low confidence, but this is done less commonly in practice. Both human curators and semantic mapping prediction workflows typically focus on the production of _positive_ knowledge. Similarly, there are a large number of trivial negative semantic mappings that are typically ignored by curators and algorithms that consume semantic mappings. When not explicitly specified, confidence estimation algorithms should consider the confidence of a negative semantic mapping to be 1.0 by default. ## Estimating Overall Confidence in a Mapping Set There are two places where the confidence in a mapping set can be reported: 1. The creator of the mapping set can report their confidence in the mapping set with the `mapping_set_confidence` slot in the mapping set's metadata. 2. The maintainer of a mapping set registry who indexes a mapping set can report their own confidence in the mapping set. In some situations, it may be sufficient to choose a mapping set confidence based on knowledge about the scope/domain of the mapping set, who the curators were, etc. Alternatively, an empirical confidence can be estimated by randomly sampling semantic mappings from the mapping set, manually reviewing them, then reporting the percentage that were correct as a decimal value between zero and one. This estimate becomes more accurate as the size of the sample increases, so it's suggested to sample a minimum 50-100 semantic mappings. When not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default. ## Reviewer Agreement In addition to the `confidence` slot which denotes the creator's confidence in the accuracy of a mapping record, the `reviewer_agreement` slot allows for the reviewer to state if they disagree or agree on a scale of $[-1, 1]$. In the following example, the reviewer confidently agrees with the accuracy of the mapping that was previously asserted by another curator and denotes this with a high agreement (near 1.0): ```tsv # curie_map: # CHEBI: http://purl.obolibrary.org/obo/CHEBI_ # mesh: http://id.nlm.nih.gov/mesh/ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # mapping_set_id: https://github.com/mapping-commons/sssom/blob/master/examples/schema/reviewer_agreement.sssom.tsv subject_id subject_label predicate_id object_id object_label mapping_justification author_id reviewer_id reviewer_agreement CHEBI:10001 Visnadin skos:exactMatch mesh:C067604 visnadin semapv:ManualMappingCuration orcid:0000-0001-9439-5346 orcid:0000-0003-4423-4370 0.99 ``` In the following example, a semantic mapping was predicted by the [Biomappings](https://www.wikidata.org/wiki/Q111239110) workflow. The reviewer confidently disagrees with the accuracy of the mapping, and denotes this by adding a low agreement (near -1.0): ```tsv # curie_map: # CHEBI: http://purl.obolibrary.org/obo/CHEBI_ # mesh: http://id.nlm.nih.gov/mesh/ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # wikidata: http://www.wikidata.org/entity/ # mapping_set_id: https://github.com/mapping-commons/sssom/blob/master/examples/schema/reviewer_agreement.sssom.tsv subject_id subject_label predicate_id object_id object_label mapping_justification mapping_tool_id reviewer_id reviewer_agreement CHEBI:10057 9H-xanthene skos:exactMatch mesh:C002563 xanthan gum semapv:ManualMappingCuration wikidata:Q111239110 orcid:0000-0003-4423-4370 -0.99 ``` In the following example, a semantic mapping was predicted by the [Biomappings](https://www.wikidata.org/wiki/Q111239110) workflow. Because MeSH does not include detailed information about the chemical's structure, it's not clear to the reviewer if it should be mapped or not. Therefore, the reviewer denotes they are unsure of whether the semantic mapping is correct or not with an agreement of 0.0 (halfway between 1.0 for fully agree and -1.0 for fully disagree). ```tsv # curie_map: # CHEBI: http://purl.obolibrary.org/obo/CHEBI_ # mesh: http://id.nlm.nih.gov/mesh/ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # mapping_set_id: https://github.com/mapping-commons/sssom/blob/master/examples/schema/reviewer_agreement.sssom.tsv subject_id subject_label predicate_id object_id object_label mapping_justification mapping_tool_id reviewer_id reviewer_agreement CHEBI:127105 tribromosalicylanilide skos:exactMatch mesh:C004361 tribromsalan semapv:LexicalMatching wikidata:Q111239110 orcid:0000-0003-4423-4370 0.0 ``` ## Aggregating Confidence for a Semantic Mapping Record [Hoyt et al. (2025)](https://doi.org/10.1093/bioinformatics/btaf542) proposed a model for aggregating confidences on semantic mappings that was implemented in the [Semantic Mapping Reasoner and Assembler](https://github.com/biopragmatics/semra). With the introduction of reviewer agreements, this section proposes one potential way of using it to weight confidence: $$f(c,r)=(1−\left| r \right|)\times c+\left| r \right|\times \frac{r + 1}{2}$$ with creator confidence ($c$) and reviewer agreement ($r$). The $\frac{r + 1}{2}$ term reweights the agreement score to work better on the $[0,1]$ range. This function has the nice properties: 1. When the reviewer's agreement is closer to 0.0, it doesn't have an effect on the creator's confidence 2. When the reviewer's agreement is closer to -1.0 or 1.0, it should override the creator's confidence proportionally to how close it is to the extremes Here's how it looks over all possible values for the creator confidence and reviewer agreement: ![](images/reviewer-agreement-aggregation.svg)
Code that produced this chart ```python import matplotlib.pyplot as plt import numpy as np def aggregate(c: float, r: float) -> float: w = np.abs(r) return (1 - w) * c + w * (r + 1) / 2 reviewer, creator = np.meshgrid(np.linspace(-1, 1, 100), np.linspace(0, 1, 100)) z = aggregate(creator, reviewer) fig, ax = plt.subplots() mesh = ax.pcolormesh(creator, reviewer, z, cmap="RdBu") ax.set_xlabel("Creator Confidence") ax.set_ylabel("Reviewer Agreement") ax.set_title("Aggregation of Creator Confidence\nand Reviewer Agreement") ax.axis([0, 1, -1, 1]) fig.colorbar(mesh, ax=ax) plt.show() plt.savefig("images/reviewer-agreement-aggregation.svg") ```
================================================ FILE: src/docs/contributing.md ================================================ # Contributing to SSSOM - [Contribution guidelines](https://github.com/mapping-commons/sssom/blob/master/CONTRIBUTING.md) - [Code of Conduct](https://github.com/mapping-commons/sssom/blob/master/CODE_OF_CONDUCT.md) ================================================ FILE: src/docs/create-mapping-commons.md ================================================ ## Getting started - First, create a virtual environment of your choice. If you need assistance with virtual environments, [here's a guide](https://berkeleybop.github.io/best_practice/python_environments) to help you. - Install the [cruft](https://github.com/cruft/cruft) package. Cruft enables keeping projects up-to-date with future updates made to this original template. ``` pip install cruft ``` - Create a project using the [mapping-commons-cookiecutter](https://github.com/mapping-commons/mapping-commons-cookiecutter) template. ``` cruft create https://github.com/mapping-commons/mapping-commons-cookiecutter ``` This kickstarts an interactive session where you declare the following: - `project_name`: Name of the project. [defaults to: my-commons-name] - `github_org`: Name of the github org the project belongs to. [defaults to: my-org] - `project_description`: Description of the project [defaults to: 'This is the project description.'] - `full_name`: Name of the author [defaults to: 'My Name'] - `email`: Author's email [defaults to: 'my-name@my-org.org'] - `yo`: Choose from [1]: Yes, [2]: No [**TEST OPTION FOR NOW**] - `license`: Choose from [1]: Yes, [2]: No [**TEST OPTION FOR NOW**] ## What does this do? The following files and directories are autogenerated in the project: ### TODO ## Version control ### GitHub 1. Go to [https://github.com/new] and follow the instructions, being sure to NOT add a README or .gitignore file (this cookiecutter template will take care of this for you) 2. Add the remote to your local git repository ```bash git remote add origin https://github.com/my-user-or-organization/my-commons-name.git git branch -M main git push -u origin main ``` ### GitLab #### TODO ## Future updates to the project's boilerplate code In order to be up-to-date with the template, first check if there is a mismatch between the project's boilerplate code and the template by running: ``` cruft check ``` This indicates if there is a difference between the current project's boilerplate code and the latest version of the project template. If the project is up-to-date with the template: ``` SUCCESS: Good work! Project's cruft is up to date and as clean as possible :). ``` Otherwise, it will indicate that the project's boilerplate code is not up-to-date by the following: ``` FAILURE: Project's cruft is out of date! Run `cruft update` to clean this mess up. ``` For viewing the difference, run `cruft diff`. This shows the difference between the project's boilerplate code and the template's latest version. After running `cruft update`, the project's boilerplate code will be updated to the latest version of the template. ================================================ FILE: src/docs/editors.md ================================================ # Simple Standard for Sharing Ontological Mappings (SSSOM) ## How to make a new release * Automated: * On the main code page, click on Releases (right hand column) * Click on the `Draft a new release` button * Click the `Choose a tag` button, create a new tag: `X.X.X` * Click on the `Generate a new release` button * Make sure only the `Select as the latest release` checkbox is checked. * Click `Publish release` button * Manual: * `make build` * `make pypi` This triggers a GitHub Action workflow that releases the new version of SSSOM to PyPi. ## Documentation deployment This can be done in two ways: * Automated: Every time a pull request is merged into the `main` branch, a github action is triggered to deploy documentation automatically. * Manually: The make command to deploy documentation is `make deploy`. ================================================ FILE: src/docs/events/ccb2022.md ================================================ ## CCB Seminar Series: Open SSSOM - Unlocking the wealth of biomedical data using shared standardized entity mappings Where: Virtual event at the Center for Computational Biomedicine, Harvard Medical School, see https://computationalbiomed.hms.harvard.edu/education/ccb-seminar-series/ When: Monday 12 Dec 2022 Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert Abstract: In this seminar, we will discuss the role of entity mappings in the biomedical domain, and the potential gain we might get from standardising and sharing them. We will introduce the Simple Standard of Ontological Mappings (SSSOM, https://w3id.org/sssom) and showcase some of its use cases. The central goal of this seminar is to redefine entity mappings as FAIR semantic artefacts in their own right, thus making them first-class citizens alongside, for example, controlled vocabularies and ontologies. ================================================ FILE: src/docs/events/mc2021.md ================================================ # 1st Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings When: 03.09.2021 For a *list of participants* see: - Wikidata: https://www.wikidata.org/wiki/Q108394519 - Scholia: https://scholia.toolforge.org/event/Q108394519 In 2020, we introduced the Simple Standard for Sharing Ontology Mappings (SSSOM) as a way for the mapping community to exchange and consolidate mappings using a simple TSV format. SSSOM seeks to solve, in particular, the following problems: - Standardising the mapping metadata that is necessary to drive data transformation and knowledge graph merging use cases - Enable effective merging and filtering of mapping sets - Standardising the representation of mapping sets across formats such as RDF/XML, JSON-LD, TSV, and others. The *purpose* of this 3-hour workshop: - Describing current use cases to the community, and ensuring all community use cases are documented and understood - Establishing a user forum for getting support and providing feedback - Define a simple governance strategy for organic evolution of standard - Describing a number of key open issues: - The representation of complex mappings - The representation of curation rules - The problem of predicate modification - The alignment with external standards such as PROV-O and Alignment API - Defining the path to a SSSOM beta release (stable) and the rallying for the paper *Resources*: - [Overview presentation](https://docs.google.com/presentation/d/1T75TRkpKRGHk5FSeFS7mQe8vmo8rt7bE69kgPX6PZMs/edit?usp=sharing) ## Outcomes - Members of the SSSOM core team are organised as as github team: https://github.com/orgs/mapping-commons/teams/sssom-core - If you want to become a member, please make an issue here: https://github.com/mapping-commons/SSSOM/issues - We added some of the questions asked to the new SSSOM FAQ: https://mapping-commons.github.io/sssom/faq/ - Governance proposal (comments welcome): https://github.com/mapping-commons/SSSOM/issues/82 - Governance will evolve over time. Standard and governance will evolve together. - We will versioning (like SemVer) and should denote when backwards incompatible changes happen - The [5-Star system](https://mapping-commons.github.io/sssom/spec/#minimum) for open FAIR mappings is now in its first official version. ### Discussion summary - Ben Gyori: would be interesting to discuss beyond the format whether there's a central repository, or whether primary developers will make it as a primary export hosted along with their other artifact. Would there be a process to pull those? - Nico: takes a long time for uptake of new publication systems, so this could take a long time -> maybe better to promote on an ontology level. Could also have the side benefit of providing a point of introspection - John G: I totally would want BioPortal to be capable of managing the RDF produced from SSSOM resources, and for Bioportal to be a mapping resource and not simply an ontology resource. I suspect the RDF patterns that SSSOM is defining are the gold we'll need for that gold standard for exchanging mappings. Uploading the RDF files can trivially be done in a naive way of course, but integrating that RDF knowledge into Bioportal to make them maximally useful as a separate kind of resource is obviously 'real work' (and so schedules are unknowable). - John: how dependent is the library on the software itself? Is it an exchange principle? - Nico: LinkML has the advantage that it gets JSON and TTL outputs for free if we use it. Would also be advantageous if more people used this standard for metamodeling to create similar outputs for different modeling - Charlie: using "frontmatter" format for SSSOM TSV files, like how github is using frontmatter in Jekyll (ref: https://blog.datacite.org/using-yaml-frontmatter-with-csv/) (http://csvy.org/) - John G: Analogous to frontmatter format, I keep being drawn to the SKOS Play format as an alternate (but I *think* TTL-compatible) format for the SSSOM content. How bad would that be? (I can create a ticket) - Charlie: Requirements for a default JSON-LD context (e.g., prefix -> URL prefix mapping) - How should it be maintained? Should it continue to be manually curated, or is an automated export from something like the Bioregistry a good idea? If it's automatically exported from the Bioregistry, what kinds of interactions might users want to have via the Bioregistry issue tracker to propose improvements? Similarly, we can make tutorials for directly creating PRs. - Charlie: How prefixes should be stylized/what is the business logic/decision tree for using OBO Library PURLs, Identifiers.org URLs, Bioregistry URLs, first-party provider URLs, etc. based on what's available and mapped between various first-party providers, third-party providers (e.g., ChemSpider InChI resolver), and meta-providers (e.g., Identifiers.org, OntoBee)? This is both a concern for "best practices" in SSSOM defining a custom context and also when using or extending the default context. - Charlie: How to represent mappings where the curator is unsure if the relation is correct or not? This happens often when curating equivalences, e.g., in Biomappings https://github.com/biomappings/biomappings/blob/master/src/biomappings/resources/unsure.tsv - Tiffany: Is it important to know why someone feels more or less confident about a mapping? If so, is there also a way to include that in the measure of “confidence”? - Sue: In practice I’ve tended to add comments when I am uncertain and have questions. Possibly this could be formalized? - Davera clinical use case discussion: Overall issue: mapping sets of things to a term is a goal for clinical mappings - mapping recommendations/rational exercise - staging and diagnosis information (like stage 1 or stage 2 of a given cancer) - Select a set of stages - this is challenging wrt mappings - Different kinds of scales describing the same thing are hard/sometimes not "kosher" to mix - Phenotypes rely on capturing human-readable data on the decision logic of how mappings are applied by standards implementation team - Proposed to look at the HL7 Implementation profiles as a way to incorporate an approach to this complex mapping challenge - Melissa: rename SSSOM to Slytherin Standard. - Charlie 100% supports this (Tiffany: +1; Alex +1) - Kristin also likes this. - John used it. - John G: Ontology repositories are mappings-motivated, to both provide to users good mappings, and to provide good ways for users or managers to ingest, manage, apply, and create mapping knowledge. Ontology repositories are presumably also capable of storing mappings in their semantic (RDF-equivalent) format. With this in mind, is the concept of a "mapping server" equivalent, complementary, or antagonistic to the existing ontology repositories? - John G: Need to consider identification and versioning of the mapping artifacts. It's one thing to say "We have all the mapping artifacts and we are giving those out", but (just like ontologies) citing a mapping artifact requires that you have a unique identifier for that artifact, and that the identifier incorporates the fact the artifact may have multiple versions. Ideally the SSSOM artifacts (like ontologies) would (a) be accessible in a defined format at the identifier IRI, (b) include their identifiers within the SSMOC artifact. I am thinking that an SSMOC is inherently a semantic artifact, and therefore it should follow semantic namespace declaration principles in this regard. - Julie: W3id supports regex based redirects (for purls) ### Breakout sessions #### Curation rules: documenting the decision rules on how a mapping was determined - Effective definition of inclusion criteria/exclusion criteria: - Inclusion example: Two ontologies saying I created exact mappings that they have a string match or a string match to a synonym + an xref - Exclusion example: Only matched on an acronym - Match types - Cover partial string matches - Other - documentation - criteria to distinguish exact from narrow/broad - how exact is exact - Line between close/narrow/broad - Direction of narrow and broad - DOS: I'd favor manual mapping be done on definitions + context in ontology, leaving lexical mappings to machines. - What metadata could we add to the header to make clear criteria used? One thing it might be useful to record is whether ontology context (relationships & location in classification) of mapped terms was used (Some ontologies/taxonomies have poor quality graphs but high quality term definitions.) - Source string match to target (lexical exact, stem, word [synonym and type]) - need for both source and target, how to synonyms fit in - Needs: - Generalized patterns that relate file header information to row-level information - Need more expressivity in the match type - Inclusion and exclusion criteria - Best practices guide - Algorithm/tool/similarity measure for computationally derived mappings - Specificity with respect to the parent concept or portion of the hierarchy that the concept is from #### Mapping provenance and alignment with external provenance standards - Problems: - We need to distinguish original and derived mappings - We need to somehow “encode” how a derived mapping was created (for example through a walk - USeful to capture as part of the PROV activity - agents (wasAssociatedWith some) - mapping tool - creator - algorithms - semantic similarity etc - Why provenance: “i dont trust mappings from source x..” - Who did it? What tools were used? - are the most important - When completed, how often updated - Which version of the ontology was the mapping generated from? (20-30 provenance related properties that could be relevant) [list](https://hal.archives-ouvertes.fr/lirmm-01605783) - list of most-recommended terms as a template: https://github.com/sifrproject/MOD-Ontology/blob/master/mod-v1.4_properties_template.ttl - Activity manual mapping -> Activity reconciliation - Inputs and outputs of activities? - Mapping set activities vs mapping activities - Shahim: Generic tagging mechanism - users add tags k:v - Suggestion: we open the “other” field to arbitrary json, then if we see people use something a lot, we allow promoting stuff to the top level (look at fhir as an example) - James counter suggestion: open the column space and allow Qnames in there? its like Shahims suggestion just on the top level; - John says look at SKOS Play convert tool it implements arbitrary triples as so you add whatever properties you want in the top row top row is actually the first row *after* the Column Header row, which begins with "Identifier" cell - Thomas: While it’s nice to have the ability to express complex prov (and we should think about it), the important prov files are not that complex. Minimum should be something like: Creator, creation date, algorithm,... see below “list of critical (minimum) prov information” - John: World is changing we can assume a bit more complexity - All: A short list of the critical provenance information is needed, but there should also be a mechanism to add other ecosystem-specific provenance. - Versioning: - We need to carefully think through versioning of mapping sets. Versioning should be similar to ontology artefacts, with version IRIs and PURLs - W3id supports regex based redirects - We need to introduce versioning for the SSSOM standard itself. Someway to indicate whether breaking changes were introduced #### Representing predicate modification: negation, inverse, direct, indirect etc https://github.com/mapping-commons/sssom/issues/40 - Negative mappings (e.g., not equivalent to, not related to) have a clear use case in supporting semi-automated curation of mappings to avoid zombie mappings. - We agreed adding additional syntax to SSSOM would make it less simple and likely less accessible. - Two candidate solutions for including negative mappings remain: curating a controlled vocabulary of negative relationships (e.g., sssom:notEquivalentTo) OR adding a predicated modifier column. We considered parallel discussions in the LinkML community and examined the use of predicate modifiers in the Gene Ontology Annotation database. Both solutions could work, but we were hesitant to commit to one during the meeting. #### Mapping (clinical etc) data model elements and values https://github.com/mapping-commons/sssom/issues/43 #### Use cases for complex mappings - https://github.com/mapping-commons/sssom/issues/61 - The main outcome for this discussion was that the participant urged to keep the `Simple` in SSSOM, and that any decision to capture more complex mapping cases should be driven by a veruy strong use case - For the first release of the SSSOM standard, we will not worry about complex mappings ## Next steps - Declare stable first version for SSSOM spec (September 2021) - Write manuscript (September/October 2021) - Dockerise all mapping related tooling, for example for generation, reconciliation, transformation etc. (December 2021) - Work with OAEI to publish automated mappings more systematically in SSSOM, including better mapping justifications/curation rules (Early 2022) - Work with @cmungall & @balhoff to integrate mapping reconciliation as a first-class citizen into mapping pipelines (February 2022) - Extend OxO to fully support SSSOM data model (prototype SSSOM browser April 2022). ================================================ FILE: src/docs/events/mc2023.md ================================================ # 2nd Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings (SSSOM) **When**: Sunday, 23rd April, 2:30 pm Italy (CET), 1:30 pm BST, 8:30 am EDT, 5:30 am PDT (3 hours total) **Where**: co-located with [Biocuration 2023](https://biocuration2023.github.io/). For information about the venue see https://biocuration2023.github.io/workshops. During the workshop, we will work a lot with Slido for interactive sharing of ideas, rather than relying on open discussions. We recommend having your laptop / phone with you to be able to participate. ## Recordings https://www.youtube.com/watch?v=juMQQ01Q540&list=PLqu_J7ADQtKyX55F7RqZtaSS7TwGd3MoR&ab_channel=BiocurationConference2023 ## Recommended preparation - Scroll through [the documentation](https://mapping-commons.github.io/sssom/home/), in particular [the paper](https://doi.org/10.1093/database/baac035) and the [basic tutorial](https://mapping-commons.github.io/sssom/tutorial/) to get a basic sense of SSSOM - Skim through, or even watch, the [SSSOM CCB Seminar recording](https://www.youtube.com/watch?v=4vqeRECuAKE) - Scroll through the slide decks describing the problems we will be addressing (note, they are all works in progress): - [Complex Mappings: Examples from OMOP2OBO (Tiffany Callahan)](https://docs.google.com/presentation/d/1Jn0W9gjRn19ISDB8N-sEwKwXsJySLPlNIsOL6ng_nEA/edit?usp=sharing) - [Complex mappings - the journey towards a proposal (Nico Matentzoglu)](https://docs.google.com/presentation/d/1kFD33S_WMgEGmCnT7IjVCeEyKI7OpcUw1ZzRXGqt1hs/edit?usp=sharing) - [Literal mappings with SSSOM (James McLaughlin)](https://docs.google.com/presentation/d/1mBZK6KS7JgmXlEtszQiOa_Cl7SXg_Z8wRp0tZHaL57Y/edit?usp=sharing) - FAIR Impact and schema mappings (Yann Le Franc) - slides TBD - [Mapping Data Structures: Challenges and Approaches](https://docs.google.com/presentation/d/191jQYOe8KAGoktVOA408NW_WWk_Gon0q9idyylbEQck/edit?usp=sharing) (Chris Mungall) - [Concept Set Mappings](https://docs.google.com/presentation/d/1055Etr0kgHHkguwgizecb_SEhj2nNd7my3q0u8fCDvk/edit?usp=sharing) (Chris Roeder) - slides TBD ## Preliminary agenda | Time | Topic | Led by | | ------ | ----- | ------ | | 2:30pm | [Welcome, introduction, SSSOM overview and introduction of the problem of "non-simple mappings"](https://docs.google.com/presentation/d/1bHcZsYU9GpZDyeDxO4uopnuw0-ETfldn1EFYQwBbNro/edit?usp=sharing) | Nico Matentzoglu | | 2:55pm | [FAIR Impact and schema mappings](https://drive.google.com/file/d/1cDSfvBehegy3edJU4LxZK3S-xI0LNbmn/view?usp=sharing) | Yann Le Franc | | 3:10pm | [Literal mappings with SSSOM](https://docs.google.com/presentation/d/1mBZK6KS7JgmXlEtszQiOa_Cl7SXg_Z8wRp0tZHaL57Y/edit?usp=sharing) | James McLaughlin | | 3:20pm | Literal Mappings - Discussion | | | 3:30pm | [Concept Set Mappings](https://docs.google.com/presentation/d/1055Etr0kgHHkguwgizecb_SEhj2nNd7my3q0u8fCDvk/edit?usp=sharing) | Chris Roeder | | 3:45pm | Concept Set Mappings - discussion | | | 3:55pm | Break | | | 4:05pm | [Complex Mappings: Examples from OMOP2OBO](https://docs.google.com/presentation/d/1Jn0W9gjRn19ISDB8N-sEwKwXsJySLPlNIsOL6ng_nEA/edit?usp=sharing) | Tiffany Callahan | | 4:20pm | [Complex mappings - the journey towards a proposal](https://docs.google.com/presentation/d/1kFD33S_WMgEGmCnT7IjVCeEyKI7OpcUw1ZzRXGqt1hs/edit?usp=sharing) | Nico Matentzoglu | | 4:35pm | Complex Mappings - discussion | | | 4:50pm | [Mapping Data Structures: Challenges and Approaches](https://docs.google.com/presentation/d/191jQYOe8KAGoktVOA408NW_WWk_Gon0q9idyylbEQck/edit?usp=sharing) | Chris Mungall | | 5:05pm | Schema Mappings - discussion | | | 5:20pm | Summary and closing remarks | Nico Matentzoglu | ## Organisers - Nicolas Matentzoglu - Yann Le Franc - Tiffany Callahan - Chris Mungall - Chris Roeder - James McLaughlin - Nomi Harris ================================================ FILE: src/docs/events/oboacademy2022.md ================================================ ## OBO Academy 2022: Introduction to manual mapping curation Where: Virtual, https://oboacademy.github.io/obook/courses/monarch-obo-training/ When: 17th May 2022 Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert Abstract: See [here](../tutorial.md). ================================================ FILE: src/docs/events/ohdsi2022.md ================================================ ## OHDSI Symposium 2022: There are no "good" mappings. Where: - https://www.ohdsi.org/ohdsi2022symposium/ - https://www.ohdsi.org/ohdsi2022-workgroup-activities/ When: 15th October 2022 Speaker: Nicolas Matentzoglu, Melissa Haendel, Tiffany Callahan Slides: https://docs.google.com/presentation/d/1sGPh1b0keghxF4o7vMOQAlZ6QyBf97ZpaTXjmMY3UP0/edit#slide=id.p Abstract: The current state of observation data transformation management has resulted in a proliferation of approaches to data normalization and alignment that have accordingly created an explosion of maps. When publicly available, data transformation maps are of varying quality and are often configured to meet single clinical domain or project-specific objectives. Sharing maps in a community requires metadata to help map comparison and evaluation, and release management over different versions. This is typically lacking. In this workshop we will present rationale for translational research data transformation management, including implementation examples mapping Real World Data to OMOP, at-scale. We will discuss the way in which source vocabularies are managed in OMOP and how to improve that process in the future. Data transformations that accommodate divergent underlying ontologies supporting a variety of use cases will be solicited from the participants both in advance and during the workshop. ================================================ FILE: src/docs/events/ohdsi2023.md ================================================ ## Poster presentation at OHDSI 2023 Symposium, Rotterdam *Title*: "Synergizing Simple Standard for Sharing Ontology Mappings (SSSOM) and the Observational Health Data Sciences and Informatics (OHDSI)" *Where*: https://www.ohdsi-europe.org/index.php/symposium-2023 *When*: 3rd July 2023 *Presenters*: - Polina Talapova - Nicolas Matentzoglu *Links*: - [Link to poster](https://www.dropbox.com/s/qru8lel4ahrluwo/ohdsi2023_matentzoglu_talapova_poster.pdf?dl=0) - [Link to short report](https://www.dropbox.com/s/gvnjq16cfwvnna4/ohdsi2023_matentzoglu_talapova_extended_abstract.pdf?dl=0) ================================================ FILE: src/docs/events/om2022.md ================================================ ## OM 2022: A Simple Standard for Ontological Mappings 2022 - Updates of data model and outlook Where: Virtual, co-located with ISWC 2022, http://om2022.ontologymatching.org/ When: 23rd October 2022 Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert Abstract: The Simple Standard for Ontological Mappings (SSSOM) was first published in December 2021 (v. 0.9). After a number of revisions prompted by community feedback, we have published version 0.10.1 in August 2022. One of the key new features is the use of a controlled vocabulary for mapping-related processes, such as preprocessing steps and matching approaches. In this paper, we give an update on the development of SSSOM since v. 0.9, introduce the Semantic Mapping Vocabulary (SEMAPV) and outline some of our thoughts on the establishment of mapping commons in the future. ================================================ FILE: src/docs/events/pistoia2022.md ================================================ ## Pistoia Seminar 2022: A Simple Standard for Sharing Ontological Mappings (SSSOM) Where: Virtual When: 28th April 2022 Speaker: Nicolas Matentzoglu, PhD, Independent Contractor, Semantic Web and Knowledge Graphs expert Abstract: Meeting introducing SSSOM to the Pistoia Alliance. ================================================ FILE: src/docs/events/wsbo2021.md ================================================ ## WSBO-2021: Workshop on Synergizing Biomedical Ontologies Where: https://github.com/OntoloBridge/WSBO/ [Slides](https://docs.google.com/presentation/d/1TlROX-JNeWvgrX57-CBa2qxTrRp92VGGZnrhJv3rLPM/edit#slide=id.p) ================================================ FILE: src/docs/explanation/mappings.md ================================================ # What is a mapping? The word "mapping" is pretty overloaded in practice: for some people, it simply means "a correspondence of one term to another equivalent or near equivalent term." But even here, there is little understanding to what a "term" is in this sentence, or what "almost equivalent" means - and, there are many different kinds of mappings used in practice that are not "equivalent" at all. In its very essence, an individual mapping maps one information entity, i.e. a _representation of a real world entity_, to another information entity - how, and what these strings could be, will be the subject of the following section. In the following, we consider an **information entity** a *sequence of characters which has a well defined relationship to some thing in the real world*, for example: - an ontology id like HP:0004934 corresponds to the concept of "Vascular calcification" in the real world. Note that HP:0004934 is annotated with the `rdfs:label` "Vascular calcification". The label itself is not necessarily a term - it could change, for example to "Abnormal calcification of the vasculature", and still retain the same meaning. - "Vascular calcification" may be a term in my controlled vocabulary which I understand to correspond to that respective disease (not all controlled vocabularies have IDs for their terms). This happens for example in clinical data models that do not use formal identifiers to refer to the values of slots in their data model, like "MARRIED" in /datamodel/marital_status. - Examples of terms: - IDs of classes in an ontology - elements of a clinical value set - codes of clinical terminologies such as [Z63.1](https://www.icd10data.com/ICD10CM/Codes/Z00-Z99/Z55-Z65/Z63-/Z63.1) - TLDR: terms correspond to things in the world and that correspondence is not subject to change. Labels can change without changing the meaning of a term. ## An attempt at a practical categorisation In our experience, there are roughly four kinds of mappings: - _string-string_: Relating one string, or label, to another string, or label. Understanding such mappings is fundamental to understanding all the other kinds of mappings. - _string-term_: Relating a specific string or "label" to their corresponding term in a terminology or ontology. We usually refer to these as synonyms, but there may be other words used in this case. - _term-term_: Relating a term, for example a class in an ontology, to another term. This is what most people in the ontology domain would understand when thy hear "ontology mappings". - _complex mappings_: Relating two sets of terms. These are the rarest and most complicated kinds of mappings, as they related for example two phenotypic profiles (sets of phenotypes) with each other. We will discuss some more examples in a future lesson. In some ways, these four kinds of mappings can be very different. We do believe, however, that there are enough important commonalities such as common features, widely overlapping use cases and overlapping toolkits to consider them together. In the following, we will discuss these in more detail, including important features of mappings and useful tools. ### Important features of mappings Mappings have historically been neglected as second-class citizens in the medical terminology and ontology worlds - the metadata is insufficient to allow for precise analyses and clinical decision support, they are frequently stale and out of date, etc. The question "Where can I find the canonical mappings between X and Y"? is often shrugged off and developers are pointed to aggregators such as [OxO](https://www.ebi.ac.uk/spot/oxo/) or [UMLS](https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/mapping_projects/index.html) which combine manually curated mappings with automated ones causing ["mapping hairballs"](#How-to-solve-the-problem-of-mapping-hairballs). There are many important metadata elements to consider, but the ones that are by far the most important to consider one way or another are: - _Precision_: Is the mapping exact, broad or merely closely related? - _Confidence_: Do I trust the mapping? Was is done manually by an expert in my domain, or by an algorithm? - _Source version_: Which version of the term (or its corresponding ontology) was mapped? Is there a newer mapping which has a more suitable match for my term? Whenever you handle mappings (either create, or reuse), make sure you are keenly aware of at least these three metrics, and capture them. You may even want to consider using a proper mapping model like the [Simple Shared Standard for Ontology Mappings (SSSOM)](https://github.com/mapping-commons/SSSOM/blob/master/SSSOM.md) which will make your mappings FAIR and reusable. ### String-string mappings String-string mappings are mappings that relate two strings. The task of matching two strings is ubiquitous for example in database search fields (where a user search string needs to be mapped to some strings in a database). Most, if not all effective ontology matching techniques will employ some form of string-string matching. For example, to match simple variations of labels such as "abnormal heart" and "heart abnormality", various techniques such as [Stemming](https://en.wikipedia.org/wiki/Stemming) and [bag of words](https://en.wikipedia.org/wiki/Bag-of-words_model#:~:text=The%20bag%2Dof%2Dwords%20model,word%20order%20but%20keeping%20multiplicity.) can be employed effectively. Other techniques such as edit-distance or Levenshtein can be used to quantify the similarity of two strings, which can provide useful insights into mapping candidates. ### String-term mappings / synonyms String-term mappings relate a specific string or "label" to their corresponding term in a terminology or ontology. Here, we refer to these as "synonyms", but there may be other cases for string-term mappings beyond synonymy. There are a lot of use cases for synonyms so we will name just a few here that are relevant to typical workflows of Semantic Engineers in the life sciences. [Thesauri](https://en.wikipedia.org/wiki/Thesaurus) are reference tools for finding synonyms of terms. Modern ontologies often include very rich thesauri, with some ontologies like Mondo capturing more than 70,000 exact and 35,000 related synonyms. They can provide a huge boost to traditional NLP pipelines by providing synonyms that can be used for both Named Entity Recognition and Entity Resolution. Some insight on how, for example, Uberon was used to boost text mining can be found [here](https://github.com/obophenotype/uberon/wiki/Using-uberon-for-text-mining). ### Term-term mappings / ontology mappings Term-term mappings relate a term, for example a class in an ontology, to another term, usually from another ontology or database. The term-term case of mappings is what most people in the ontology domain would understand when they hear "ontology mappings". This is also what most people understand when they here "Entity Resolution" in the database world - the task of determining whether, in essence, two rows in a database correspond to the same thing (as an example of a tool doing ER see [deepmatcher](https://github.com/anhaidgroup/deepmatcher), or [py-entitymatcher](https://pypi.org/project/py-entitymatching/)). For a list standard entity matching toolkit outside the ontology sphere see [here](https://www.biggorilla.org/software_cat/entity-matching/index.html). ### Further reading - A great overview can be found in ["Tackling the challenges of matching biomedical ontologies" (Faria et al 2018)](https://jbiomedsem.biomedcentral.com/articles/10.1186/s13326-017-0170-9) - A yearly competition of ontology matching systems is held by the [Ontology Alignment Evaluation Initiative (OAEI)](https://oaei.ontologymatching.org/). The challenge [results](http://oaei.ontologymatching.org/2020/results/) are a useful guide to identifying systems for matching you may want to try. ## Some examples of domain-specific mapping of importance to the biomedical domain ### Phenotype ontology mappings Mapping phenotypes across species holds great promise for leveraging the knowledge generated by Model Organism Database communities (MODs) for understanding human disease. There is a lot of work happening at the moment (2021) to provide standard mappings between species specific phenotype ontologies to drive translational research ([example](https://github.com/mapping-commons/mh_mapping_initiative/tree/master/mappings)). Tools such as [Exomiser](https://github.com/exomiser/Exomiser) leverage such mappings to perform clinical diagnostic tasks such as variant prioritisation. Another app you can try out that leverages cross-species mappings is the Monarch Initiatives [Phenotype Profile Search](https://monarchinitiative.org/analyze/phenotypes). ### Disease ontology mappings Medical terminology and ontology mapping is a huge deal in medical informatics ([example](https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/mapping_projects/index.html)). [Mondo](https://github.com/monarch-initiative/mondo) is a particularly rich source of well provenanced disease ontology mappings. ## How should you map your data to ontologies? There are no one size fits all strategies for mapping your data to ontologies. There are many research areas that have something to give in this process. Here, we outline some ideas on how to think about the problem. ### Case 1: Mapping internal controlled vocabularies ### Case 2: Mappings from free text Examples: - [Monarch Text Annotator](https://monarchinitiative.org/tools/text-annotate) ### Case 3: Mappings between public controlled vocabularies and ontologies ## How to solve the problem of mapping hairballs String-term mappings Overview of automated approaches - Simple matches (string, string pre-pro, fuzzy string) - Graph-based matches (incl. semantic similarity) - NLP/Machine Learning Practical: - Try to get the same mappings as before using techniques - Exact - Simple preprocessing - Levenshtein - Jaccard similarity - Embedding similarity (?) ================================================ FILE: src/docs/faq.md ================================================ # Frequently Asked Questions (FAQ) ## Why should our mappings be FAIR and carefully standardised? Mappings are frequently created on an ad-hoc basis, using simple two-column spreadsheets where the first column corresponds to the subject of the mapping, and the second column to the object of the mapping. This is insufficient for a variety of reasons: - non-transparent precision: While the assumption is that the subject "sort of mostly exactly" maps to the object, in practice this is rarely the case. Matches can `exact`, where the subject corresponds 100% to the object, `broad`, where the object is broader than the subject, and others. Qualifiers like `exact`, `broad`, `narrow`, `related` and `close` qualify the *precision* of the mapping (not to be mistaken for fuzziness of confidence). Without knowing the precision, we cannot accurately transform our data, nor can we use the mappings to "walk", i.e. move from one mapping to another, see [SSSOM 5-Star recommendation for mappings](5star-mappings.md) - non-transparent incompleteness: We don't know when the mapping was created, on the basis of what version of the terminological source of the subject or object. As time passes, we also lose confidence whether there would now be more suitable mappings, or whether there are new terms that are now fully covered by the mappings. - non-transparent confidence: whether a tool or a human propose the mapping, there is always a bit of a risk the mapping call may be wrong. As consumers of the mappings we need to know how confident the mapping authors were (confidence score), and why they confident (curation rules, mapping justification). Currently, mappings are created by a variety of systems, manually curated and automatic, and we need a way to efficiently collect and combine them. Mapping sets and mappings with quality provenance metadata allow us to trace faulty mappings to the source and correct them in a way that _all_ users of the mapping set will profit from it. ## Is there a central repository of SSSOM files? The idea of a mapping commons is to provide mappings in a decentralised fashion akin to OBO ontologies. A mapping commons collects mappings relevant to a particular community, either by reference (i.e. pulling in mapping sets already published elsewhere), or directly maintained at the mapping commons ([example](https://github.com/mapping-commons/mh_mapping_initiative)). Their integration as part of a repository (mapping server) would look like [EBI's Ontology X-ref Browser](https://www.ebi.ac.uk/spot/oxo/) or [BioPortal](https://www.bioontology.org/wiki/BioPortal_Mappings), but the exact scope of these repositories is _use case dependent_ - EBI may chose to show cross references from and to ontologies loaded into OLS, while BioPortal chooses to show a different set of mappings. The plan is to update EBI's OxO to support the full SSSOM data model, drawing curated mappings from a variety of mapping commons, by Summer 2022 - but its unlikely that one central place will index all available mappings. ## Who is responsible for the conversion into SSSOM - the primary developers of an artefact, or a mapping commons? Like with everything on the web, the closer to the source the SSSOM mappings are curated, the better. Ideally, mappings are maintained as part of ontology release pipelines or by primary mapping creators, rather than derived from a secondary source such as a database, further downstream. The reason for this is that ideally, we would want mappings to be reviewable and editable in much the same way as open ontologies, offering issue trackers and an active community incorporating changes. That said, it is unlikely that all existing mappings will be maintained by the source directly. For example, we expect to maintain the SSSOM mappings derived from the vast majority of OBO ontology xrefs as a downstream task ([example](https://github.com/mapping-commons/ols-mapping-commons)). ## How dependent are we on the sssom-py toolkit? SSSOM follows the core design principle that mapping tables should be (a) self-contained, i.e. including its prefix maps similar to a turtle file, and (b) readable by normal data science toolkits. An SSSOM table can be read with pandas using the `comment='#'` parameter (with one caveat, which is that `#` must be used as a character _solely_ to denote comments), or a very simply combination of a yaml reader and pandas. The SSSOM toolkit however offers some extra functionality, like export to JSON-LD, or RDF or import from other frequently used format. ## Is the concept of a "mapping server" equivalent, complementary, or antagonistic to the existing ontology repositories? A (SSSOM) mapping server is a repository for mappings that enables the browsing of existing mappings, exposing all (or some relevant subset of) SSSOM metadata as search facets. In that sense, it should be considered complementary, as it enables the search for accurate mappings from a specific term or set of terms, something that goes beyond what most ontologies would offer. However, the concept of ontology mappings can be _perceived_ as antagonistic to Open Ontology principles, as its goal is _not the logical integration of knowledge, but the association or linking of terms across controlled semantic spaces_. The OBO vision involves the building of a coherent, non-redundant semantic space of logically interconnected ontologies, which in particular wants to avoid the introduction of overlapping concepts. The mapping world specifically embraces heterogeneous semantic spaces and overlapping concepts, and seeks to bridge the semantic gaps using well-defined mapping relations such as "skos:broadMatch" or "owl:equivalentClass". ================================================ FILE: src/docs/funding.md ================================================ ## Funding The Simple Standard for Sharing Ontological Mappings (SSSOM) is a community-driven project which has received support from many different sources. We list the most important ones in the following. ### Volunteering efforts A huge fraction of the work on SSSOM has been done by volunteers without dedicated grant support. We hereby acknowledge their contributions as being absolutely essential. A selection of amazing contributions (by no means exhaustive): - The development of [SSSOM Java](https://incenp.org/dvlpt/sssom-java/) - Hundreds of careful contributions to discussions on the [SSSOM issue tracker](https://github.com/mapping-commons/sssom/issues) - The first draft of the [Mapping Registry Cookiecutter](https://github.com/mapping-commons/mapping-commons-cookiecutter) - We try to keep track of other [Community efforts here](https://github.com/mapping-commons/sssom/discussions/318) ### Phenomics First (NIH / NHGRI #1RM1HG010860-01) A lot of the groundwork of SSSOM was done to support a disease mapping project as part of the [Mondo Disease Ontology](https://github.com/monarch-initiative/mondo), which included, but was not limited to: - Creation of a basic metadata model - Implementation of validation and parsing methods in [sssom-py](https://github.com/mapping-commons/sssom-py) - Generating [training materials](training.md) - Organising [workshops](workshops.md) - Outreach activities to clinical communities such as [OHDSI](https://www.ohdsi.org/) The grant was awarded to members of the Monarch Initiative. ### Monarch (NIH / OD #5R24OD011883) To support development of cross-species mappings and knowledge graph integration for the [Monarch Knowledge Graph](https://monarchinitiative.org/), a few new features had to be supported: - Groundwork for the [Semantic Mapping Vocabulary](https://github.com/mapping-commons/semantic-mapping-vocabulary) which contains, for example, cross-species mapping properties. - The advancement of the concepts and tools behind the "Mapping Commons", including supporting the development of the [Mapping Registry Cookiecutter](https://github.com/mapping-commons/mapping-commons-cookiecutter) - Various improvements to the SSSOM metadata model, including the introduction of curation rules. - The [OxO2 SSSOM mapping browser](https://github.com/EBISPOT/oxo2) The grant was awarded to members of the Monarch Initiative. ### Bosch Gift to LBNL A lot of the work on tooling was supported by a Bosch Gift to the Lawrence Berkely National Laboratory (Chris Mungall group). We thank Bosch for their generous support which helped us with the following: - Implementation of conversion and testing methods in [sssom-py](https://github.com/mapping-commons/sssom-py) - The development of training materials - The development of specialised matching tools such as [OAK lexmatch](https://incatools.github.io/ontology-access-kit/guide/mappings.html) which provided the first implementation of the SSSOM standard in a matching tool. ### DARPA: Young Faculty Award W911NF2010255 A huge amount of refactoring of [sssom-py](https://github.com/mapping-commons/sssom-py) and development best practices, as well as training materials, was provided through this grant (awarded to Benjamin M. Gyori). Other contributions include work on the [Semantic mapping reasoner and assembler](https://github.com/biopragmatics/semra) ================================================ FILE: src/docs/getting-started.md ================================================ ## Getting Started with SSSOM Pre-requisites: - You know [what a mapping is](explanation/mappings.md). ### Creating SSSOM files SSSOM files are typically created as spreadsheets and shared as TSV files. Tools exist to translate SSSOM files in TSV format into other formats such as JSON and RDF. The ability to curate SSSOM files as spreadsheets makes them accessible, especially in scientific communities, compared to more technical formats such as JSON or RDF. However, this simplicity comes with trade-offs — spreadsheet-based curation can make it harder to ensure that files are valid (see [this discussion](https://github.com/mapping-commons/sssom/discussions/428)). Using a proper validation tool (see [below](#validating-your-sssom-files)) is therefore strongly recommended. Let's look at a real-world example: mappings between the [Human Phenotype Ontology (HP)](https://hpo.jax.org/) and the [Mammalian Phenotype Ontology (MP)](http://www.informatics.jax.org/vocab/mp_ontology), derived from the [uPheno](https://github.com/obophenotype/upheno) project. | subject_id | subject_label | predicate_id | object_id | object_label | mapping_justification | | ---------- | ------------------------- | --------------- | ---------- | ------------------------- | ---------------------- | | HP:0000175 | Cleft palate | skos:exactMatch | MP:0000111 | cleft palate | semapv:LexicalMatching | | HP:0000252 | Microcephaly | skos:exactMatch | MP:0000433 | microcephaly | semapv:LexicalMatching | | HP:0000822 | Hypertension | skos:exactMatch | MP:0000231 | hypertension | semapv:LexicalMatching | | HP:0001596 | Alopecia | skos:exactMatch | MP:0000414 | alopecia | semapv:LexicalMatching | | HP:0001627 | Abnormal heart morphology | skos:exactMatch | MP:0000266 | abnormal heart morphology | semapv:LexicalMatching | A SSSOM file contains two main sections: 1. A header 2. The mappings The header contains additional metadata about the mapping set, such as the license or description: ```yaml # curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # owl: http://www.w3.org/2002/07/owl# # rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# # rdfs: http://www.w3.org/2000/01/rdf-schema# # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # sssom: https://w3id.org/sssom/ # license: https://creativecommons.org/publicdomain/zero/1.0/ # mapping_provider: http://purl.obolibrary.org/obo/upheno.owl # mapping_set_id: https://w3id.org/sssom/mappings/27f85fe9-8a72-4e76-909b-7ba4244d9ede ``` You can look at an example TSV file [on GitHub](https://github.com/mapping-commons/sssom/blob/master/examples/embedded/mp-hp-exact-0.0.1.sssom.tsv). #### Basic anatomy of a mapping ![SSSOM basic architecture](images/sssom-mapping.png) You should think of a mapping in the SSSOM-sense as a triple between a subject (the "mapping source") and an object (the "mapping target") via a predicate (such as "exact match"). In SSSOM, every mapping can have a lot of metadata associated with it, like who created it (creator_id), and when, and how confident we are in its truthfulness. Conceptually, we consider the sum total of all metadata collected for a mapping its "justification" - essentially the "evidence" provided towards the mapping. #### Identifiers in SSSOM SSSOM files use so-called CURIEs (Compact URIs) to identify the subject and object of a mapping. As you can see in the example in the previous section, the object of the first mapping is `MP:0000111`, a term from the Mammalian Phenotype Ontology. As you can see in the mandatory `curie_map`, the `MP` prefix represents the `http://purl.obolibrary.org/obo/MP_` namespace. Using a `curie_map` serves two purposes (1) it unambiguously identifies the entity being mapped. Prefixes can clash easily: the prefix `ICD` all by itself can refer to ICD-10 Clinical Modification, ICD-10 WHO Edition, ICD-11 Foundation, ICD-11 MMS Linearisation, ICD-9, etc. (2) they serve as the prefix expansion instruction for RDF serialisations. To convert for example `MP:0000111` into an RDF entity, we first expand it to `http://purl.obolibrary.org/obo/MP_0000111`. !!! warning "Why can't I use URIs instead of CURIEs in my TSV file?" The SSSOM/TSV format requires all identifiers to be in CURIE form. This is enforced by SSSOM validators. CURIEs are much more readable than full URIs/URLs, making your mapping files more compact and easier to work with. All prefixes used in your CURIEs must be declared in the `curie_map`. #### Mapping predicates The `predicate_id` specifies the mapping relation between subject and object. Any predicate identifier may be used, but if you are just getting started, it is best to stick to the [common predicates](spec-model.md#allowed-and-common-mapping-predicates). The most frequently used ones are: | Predicate | When to use | | ------------------- | ------------------------------------------------------------------------------- | | `skos:exactMatch` | The subject and object can be used interchangeably in most contexts. | | `skos:broadMatch` | The object is a broader/more general concept than the subject. | | `skos:narrowMatch` | The object is a narrower/more specific concept than the subject. | | `skos:closeMatch` | The two are similar enough to be interchangeable in some contexts, but not all. | | `skos:relatedMatch` | The two are associated in some way, but not interchangeable. | #### Basic SSSOM Metadata Every SSSOM mapping set has two levels of metadata: metadata about the _mapping set_ as a whole, and metadata about each _individual mapping_. **Required metadata for the mapping set** (see [MappingSet](MappingSet.md) for the full description of all fields): | Field | Description | | ---------------- | ------------------------------------------------------------------------------------------------------------------- | | `mapping_set_id` | A globally unique identifier (URI) for this mapping set, e.g. `https://w3id.org/sssom/tutorial/example1.sssom.tsv`. | | `license` | A URL to the license, e.g. `https://creativecommons.org/licenses/by/4.0/`. | | `curie_map` | A dictionary that maps CURIE prefixes to their IRI expansions. | Other commonly used set-level metadata includes `mapping_set_description`, `mapping_set_version`, `subject_source`, `object_source`, and `creator_id`. **Required metadata for each mapping** (see [Mapping](Mapping.md) for the full description of all fields): | Field | Description | | ----------------------- | -------------------------------------------------------------------- | | `subject_id` | The CURIE of the entity being mapped (the "source"). | | `predicate_id` | The mapping relation (e.g. `skos:exactMatch`). | | `object_id` | The CURIE of the entity being mapped to (the "target"). | | `mapping_justification` | How the mapping was determined, e.g. `semapv:ManualMappingCuration`. | Other commonly used mapping-level metadata includes `subject_label`, `object_label`, `confidence`, `author_id`, `mapping_date`, and `comment`. For a comprehensive list, see the [Quick reference for mapping metadata](index.md#quick-reference-for-mapping-metadata). #### Mapping justifications Every mapping in SSSOM must come with a justification - an indication of _how_ the mapping was established. You can think of it as the "evidence type" for the mapping. Justifications are terms from the [Semantic Mapping Vocabulary (SEMAPV)](https://mapping-commons.github.io/semantic-mapping-vocabulary/), specifically the terms under [`MatchingProcess`](https://www.ebi.ac.uk/ols4/ontologies/semapv/classes/https%253A%252F%252Fw3id.org%252Fsemapv%252Fvocab%252FMatching?lang=en). Some common justifications: | Justification | When to use | | -------------------------------------------- | ------------------------------------------------------------------------------- | | `semapv:ManualMappingCuration` | A human curator determined that the mapping is correct. | | `semapv:LexicalMatching` | The mapping was established by matching labels or synonyms. | | `semapv:LogicalReasoning` | The mapping was inferred through logical reasoning. | | `semapv:SemanticSimilarityThresholdMatching` | The mapping was established by computing semantic similarity above a threshold. | | `semapv:MappingReview` | The mapping was determined through a formal review process. | If you are manually curating your mappings, `semapv:ManualMappingCuration` is the right choice. For more detail on how to construct more nuanced justifications, see the [Guide to using Mapping Justifications](mapping-justifications.md). #### Validating your SSSOM files To check that your SSSOM files are valid, you can use the [SSSOM Toolkit](toolkit.md) (also known as `sssom-py`). After [installing it](https://mapping-commons.github.io/sssom-py/installation.html), you can validate a file like this: ```bash $ wget https://w3id.org/biopragmatics/biomappings/sssom/biomappings.sssom.tsv $ pip install sssom-py $ sssom validate biomappings.sssom.tsv ``` This will check that all required fields are present, that the CURIEs are properly declared in the `curie_map`, and that values conform to the expected types. Alternatively, if you prefer a Java-based tool, [sssom-java](https://incenp.org/dvlpt/sssom-java/)'s `sssom-cli` can also validate SSSOM files. See the [sssom-cli examples](https://incenp.org/dvlpt/sssom-java/sssom-cli/examples.html) for details. #### Converting SSSOM files into other formats The SSSOM Toolkit can convert your TSV mapping sets into other formats: ```bash sssom convert my-mappings.sssom.tsv --output my-mappings.owl --output-format owl sssom convert my-mappings.sssom.tsv --output my-mappings.json --output-format json ``` [sssom-java's `sssom-cli`](https://incenp.org/dvlpt/sssom-java/sssom-cli/examples.html) can also convert between formally defined SSSOM serialisation formats (TSV, JSON, and RDF/Turtle). For detailed information about the different serialisation formats, see [SSSOM/TSV](spec-formats-tsv.md), [OWL/RDF](spec-formats-owl.md), and [JSON](spec-formats-json.md). #### Storing and sharing SSSOM files SSSOM files are plain text (TSV), so they can be stored and version-controlled just like any other text file, for example in a GitHub repository. If your mappings are converted to RDF, they can also be loaded into a triple store or ontology repository. You may also choose to develop your mapping file in a columnar format like Excel or Google Sheets, and then convert to TSV. For many people this will be the easiest way to work with mapping files. Those with GitHub Actions experience can automate the conversion whenever source files change. ### Using SSSOM files So far we have focused on how to _create_ SSSOM files. But what can you actually _do_ with them? #### Programmatic access with sssom-py The [SSSOM Toolkit](https://mapping-commons.github.io/sssom-py) provides a Python API for loading, manipulating, and querying mapping sets: ```python from sssom.parsers import parse_sssom_table # Load an SSSOM TSV file msdf = parse_sssom_table("my-mappings.sssom.tsv") # Access the mapping set metadata print(msdf.metadata) # Access the mappings as a pandas DataFrame df = msdf.df print(df.head()) ``` #### Common operations with the SSSOM Toolkit The SSSOM Toolkit CLI supports a range of useful operations. Here are some of the most common ones: - **Merging** mapping sets from different sources into one: ```bash sssom merge mappings1.sssom.tsv mappings2.sssom.tsv --output merged.sssom.tsv ``` - **Filtering** mappings, for example by predicate: ```bash sssom filter my-mappings.sssom.tsv --predicate_id skos:exactMatch -o exact-only.sssom.tsv ``` - **Diffing** two mapping sets to see what changed: ```bash sssom diff mappings-v1.sssom.tsv mappings-v2.sssom.tsv --output diff.tsv ``` For a more detailed walkthrough, see the [SSSOM Toolkit guide](toolkit.md) and the [sssom-py documentation](https://mapping-commons.github.io/sssom-py). #### Using SSSOM in Java with sssom-java [sssom-java](https://github.com/gouttegd/sssom-java) is a Java implementation of SSSOM developed by Damien Goutte-Gattat. It provides reading and writing support for SSSOM/TSV and JSON formats, and can be used as a library in your own Java applications or as a [ROBOT](http://robot.obolibrary.org/) plugin. To add sssom-java to your Maven project: ```xml org.incenp sssom-core 1.10.0 ``` Reading and iterating over mappings: ```java import org.incenp.obofoundry.sssom.TSVReader; import org.incenp.obofoundry.sssom.model.MappingSet; import org.incenp.obofoundry.sssom.model.Mapping; TSVReader reader = new TSVReader("my-mappings.sssom.tsv"); MappingSet ms = reader.read(); for (Mapping m : ms.getMappings()) { System.out.printf("%s -[%s]-> %s%n", m.getSubjectId(), m.getPredicateId(), m.getObjectId()); } ``` Writing a mapping set back to TSV: ```java import org.incenp.obofoundry.sssom.TSVWriter; TSVWriter writer = new TSVWriter("output.sssom.tsv"); writer.write(ms); ``` sssom-java also ships with a [ROBOT plugin](https://github.com/gouttegd/sssom-java) that can extract cross-references from OWL ontologies into SSSOM format, inject mapping-derived axioms into ontologies, and more. For the full documentation, see the [sssom-java homepage](https://incenp.org/dvlpt/sssom-java/). #### Using SSSOM in the Ontology Development Kit (ODK) The [Ontology Development Kit (ODK)](https://github.com/INCATools/ontology-development-kit) comes with built-in support for SSSOM. If you are maintaining an ontology using the ODK, you can manage your mappings alongside your ontology source files and have them automatically validated as part of your build process. For an example, see the how [Uberon manages its mappings](https://github.com/obophenotype/uberon/blob/master/src/ontology/uberon-odk.yaml). ### Where to go from here - [Detailed SSSOM curation tutorial](tutorial.md) - a step-by-step guide on how to curate SSSOM mapping sets from scratch. - [Mapping justifications](mapping-justifications.md) - learn how to construct more nuanced mapping justifications. - [SSSOM Toolkit guide](toolkit.md) - learn how to use the SSSOM command line tools. - [SSSOM data model](spec-model.md) - the full specification of the SSSOM data model. - [Training materials](training.md) - video tutorials and external guides. ================================================ FILE: src/docs/glossary.md ================================================ # Glossary The glossary is currently being developed [here](https://docs.google.com/document/d/1QqR8j7szjaq6wzE9YLBnZ2kOD9eN14d3SYd312X8JjQ/edit?usp=sharing). ================================================ FILE: src/docs/introduction.md ================================================ # Introduction This section overs an overview of SSSOM. If you are looking for a fast way to [get started, see here](getting-started.md). ## Abstract Mappings, or cross-references, are used to link terms across different ontologies. However, there is currently little to no standardisation in how such mappings are represented. While properties such as hasDbXref property are widely used in ontologies such as GO and MONDO, the meaning of such mappings is unclear, and cannot be further described with additional metadata or provenance. The Simple Standard for Sharing Ontology Mappings (SSSOM) is an initiative to provide a minimal and standard set of elements for the dissemination of mappings between ontology terms, to ensure a reliable interpretation of generated mappings and to enable sharing and data integration between people and applications. This document introduces the SSSOM catalog of metadata elements, which can be used to attach meta- and provenance data to both mappings and sets of mappings; a controlled vocabulary for the description of match types (SSSOM CV); a definition of both RDF and TSV serialisations of ontology mappings; and a (non-exhaustive) selection of recommended mapping predicates. ## Introduction Currently, there are three methods typically used to express mappings in OWL: direct logical axioms using owl:equivalentClass; the oboInOwl hasDbXref property; and the SKOS vocabulary for mapping properties. The first, owl:equivalentClass, is a strong logical equivalence assertion which is not appropriate for more nuanced mappings such as close matches. The second, hasDbXref, does not assert formal logical equivalence but also has no clearly defined meaning. Finally, the SKOS vocabulary provides a hierarchy of mapping properties which allow the unambiguous specification of exact, close, broad, and narrow matches, but does not provide the means for mappings to be annotated with additional metadata such as confidence scores and provenance. The Simple Standard for Sharing Ontology Mappings (SSSOM) addresses these problems by defining a catalog of metadata terms to describe mappings. Both individual mappings and **_sets of_** mappings can be described, enabling provenance and metadata to be captured on multiple levels. SSSOM interoperates with existing methods for the specification of mappings, allowing any predicate to be used to describe the nature of each mapping including those from OWL and SKOS. The provenance of mappings - such as whether the mapping was created as the result of a human-curated equivalence match, or a semantic similarity match - is specified using a controlled vocabulary (CV), SSSOM CV. Combined with the metadata properties provided by SSSOM such as confidence and semantic_similarity_score, this provenance information can be used to capture mapping descriptions in a manner that is explicit and amenable to curation. Two serialisations for SSSOM mappings are provided in this document, aimed at different communities: an RDF/OWL serialisation using IRIs that is aimed at the Knowledge Graph/Semantic Web community, and a TSV serialisation using [CURIE](https://www.w3.org/TR/curie/) syntax which is aimed at the wider bioinformatics community. An unambiguous translation between these serialisations is provided. ## Challenges for exchange and use of mappings Despite their importance for data integration, term mappings are typically neglected as data artefacts (57). A mapping in this context is a correspondence between two terms, referred to here as "subject" and "object" terms. A "predicate" defines the type of relationship between the subject and the object, such as skos:exactMatch, or owl:equivalentClass. A mapping, or "match", does not have to be exact: it can be broad, e.g. between a conceptually narrow term such as "Red Delicious" and a conceptually broader term such as "Apple". To our knowledge, no formal review has been published that analyzes the representation and formats used for collections of term mappings (mapping sets, or alignments), but in our experience, most mapping sets are represented as tables using an ad-hoc "schema", often merely a simple two-column format that lists matching terms in two naming schemes. An example of such a table can be seen in the following Table. Subject | Object --- | --- UBERON:0002101 | FMA:24875 UBERON:0000019 | FMA:54448 ``` Table 1: An example of a typical mapping table one might find on the web. ``` This type of table lacks clear semantics and is therefore very difficult to use and reuse either by humans or by machines. We will discuss a few of the most critical problems in the sections that follow. _Non-transparent imprecision_. Mapping precision describes, usually qualitatively, whether a mapping between a subject and an object is exact, broad, narrow, close or related. An exact mapping means that the subject term can be replaced with the object term and vice versa, i.e. they refer to the exact same real-world entity. A broad mapping links a subject term to a more general term, for example, the term "leg" to the term "hindlimb" (if the ontology defines leg as the parts of the hindlimb that exclude the foot). A narrow mapping links a subject term to a more specific term. For example, "long QT syndrome" in the Mondo Disease Ontology is a narrow match to "Romano-Ward long QT syndrome" in Orphanet. A close mapping relates two terms that are neither exact, broad or narrow, but belong to the same category of things and are semantically similar, such as "apple" to "pear", or "paw" to "hand". Due to its subjective nature ("what is close?"), this is a problematic category of mapping, but it is widely used, for example for relating similar anatomical terms across species. Related mappings are mappings across categories of things, such as the mapping between a phenotype "enlarged liver" and the anatomical entity "liver". In practice, it is rare that mapping tables such as the one presented in Table 1 constitute a set of purely "exact" matches. Different use cases may require different levels of mapping precision. For example, for entity merging (defined as the process of merging two entities from different sources into one) or data translation (defined as the process of moving annotations from using one ontology to another), exact mappings may be required, while for data grouping broad matches are often sufficient (ensuring that the subject is classified under the object term). For many machine learning use cases, close and related matches will be extremely useful regardless of their lack of semantic precision (though semantic precision is likely to improve predictive power). In practice, many mappings are to varying degrees imprecise but do not specify the mapping precision. This makes it impossible to reliably apply them to use cases such as entity merging or data translation. _Non-transparent accuracy, confidence, and provenance_. To scale to real-world use cases, automated tools are critical for matching terms across databases, terminologies and ontologies. Such tools typically implement mapping rules that determine whether a given pair of terms constitutes a match. For example, label matching rules might include "match if subject and object labels match", "match if subject label matches with an exact synonym of the object" and "match if subject and object exhibit a very high degree of semantic similarity". Depending on the rules, tools will have more or less confidence that a match constitutes a mapping. Even human curators often have different levels of confidence about the accuracy of any given mapping, especially if the process of determining whether a mapping is accurate involves the review of (often complex) descriptions and term definitions. Different use cases will profit from different degrees of accuracy. For example, if we seek to integrate data from various medical terminologies to inform medical diagnosis, we may require not only a very high degree of confidence about the mapping but also ensure that the mapping is "explainable" to users. To ensure that diagnostic decisions that require bridging of data silos through mappings are explainable, we furthermore need provenance (documentation of where a piece of data comes from and how it was produced), such as an explicit statement of the mapping rules by which the match was originally determined (for example, the labels of both terms could have been the same). Thanks to efforts by initiatives such as the Ontology Alignment Evaluation Initiative (OAEI), many mapping tables on the web include at least a confidence score. However, in our experience, mapping rules are rarely stated explicitly as part of the mappings or mapping set metadata. Many mappings in the wild are to varying degrees inaccurate, but without a confidence score and explicit mapping rules, this inaccuracy will not be transparent. _Non-transparent incompleteness_. Mapping sets can be incomplete for (at least) three major reasons: (1) they are out of date, i.e. a term in one ontology was removed (deprecated) in a later version of the ontology or a term with a more precise mapping was introduced; (2) they are deliberately partial, i.e. covering only a subset of terms, which were mapped for a specific purpose (for example a manual effort to map all COVID-19-relevant phenotypes from the Human Phenotype Ontology to the Mammalian Phenotype Ontology); or (3) they accidentally omit certain correct mappings, as the automated approaches that were used did not detect them (false negatives). We cannot determine whether a mapping set such as the one given in Table X1 is up-to-date, deliberately partial or accidentally incomplete without sufficient metadata about the purpose of the mappings, the tools used and the version of the source data used for the matching process. _UnFAIRness_. The FAIR principles are a set of community-developed guidelines to ensure that data or any digital object are Findable, Accessible, Interoperable and Reusable. Unlike many of the widely used controlled vocabularies, ontologies and data schemas, mappings are rarely published using standard formats and metadata vocabularies and can therefore be considered second class citizens in the world of FAIR semantics. While some tools exist to browse mappings (the F and A in FAIR, findable and accessible), such as OxO and BioPortal, they lack access to at least some of the metadata required to determine their applicability for a use case: Are mappings likely to be correct? Are they precise enough? Have they been updated recently? Can I trust the authority that generated the mappings? While some minimum level of interoperability (the I in FAIR) is achieved simply by publishing the mappings as RDF triples (which rarely happens in practice), most mappings are best captured in the form of simple tables (in our experience the preferred format for both mapping curators and data engineers). Furthermore, the predicates or relations used in the mappings are far from standardized. Different relations have different semantics, ranging from strong logical relations such as owl:sameAs or owl:equivalentClass to predicates with no formally specified semantics such as oboInOwl:hasDbXref. In our experience, reusability (the R in FAIR) is a significant obstacle to FAIRness. It is infeasible to simply reuse existing mappings without the metadata required to make imprecision, inaccuracy and incompleteness explicit. Repositories such as OxO and BioPortal cannot make mappings more accessible, because the metadata required to do so simply does not exist. In order to gradually improve our mappings and make them FAIRer, we need to be able to share, review, fix and maintain our mappings in much the same way as our ontologies themselves - using standard formats and rich metadata. FAIRifying data is an effort that aims to supply practical solutions for the use of the FAIR guiding principles throughout the research data life cycle. It recommends technologies that support semantic interoperability in a sustainable way, and practices that support FAIRness. The FAIRSemantics effort is currently discussing how to incorporate semantic mappings, and we reached out to them to consider SSSOM for this purpose. ## Background about mappings A mapping can be defined as a triple _s, p, o_, where s is the subject of the mapping, p is the mapping predicate (or relation) and o is the object. There are many different mapping predicates used in practice, but they are not always standardized. The Semantic Web community uses a number of standard mapping predicates, such as owl:sameAs or owl:equivalentClass (logical mapping predicates) and skos:exactMatch or skos:broadMatch (terminological mapping predicates). We refer to mapping subjects and objects as "terms", which we will loosely define here as a set of symbols that define some entity in the real world. Usually, a term can be referred to by an identifier that uniquely identifies some entity in a certain context. For example, UBERON:0002101 is the identifier for a term that refers to the anatomical entity "limb". Putting it all together, the mapping describes a correspondence in which the term with the id UBERON:0002101 constitutes a terminological exact match to the term with the identifier FMA:24875. Mappings between data model elements, databases and other representations can be described similarly. Note that we generally use the terms "matching" and "mapping" interchangeably. Occasionally we refer to "matching" as the process to determine a mapping candidate (lexical matching, logical matching etc), a "match" as the result of the matching process, and a "mapping" the process and result of the process that deduces a true correspondence from a (set of) matches. For SSSOM, this distinction is a bit academic, but useful to keep in mind when talking about the interplay of automated approaches (which result in "matches") and manual approaches (which typically result in the final mappings). Ontology alignment is the task of determining corresponding terms shared between two or more ontologies, i.e. mappings. Sometimes "ontology alignment" refers to the output of the alignment process. Mapping sets can be "partial", i.e. covering only a subset of terms in the subject or object source (ontology, database, etc), "derived", i.e. one mapping set can be obtained from one or more others (for example, a XAO to MeSH mapping can be obtained by combining a XAO-Uberon mapping with a Uberon-MeSH mapping), or "complete". We refer to a "complete" mapping, i.e. the set of all correspondences between two resources (ontologies, databases), as an "alignment". The identifier of a term has three parts: a namespace that describes in which database or ontology the identifier is defined, a local identifier that unambiguously identifies an entity within that namespace, and optionally a separator that can be used to separate the namespace from the local identifier to make them easier to process. UBERON:0002101, for example, comprises the namespace "UBERON", the separator ":" and the local identifier "0002101". There are various syntaxes for denoting identifiers; the UBERON:0002101 notation is called compact URI (CURIE) syntax, which is used widely across the database and ontology worlds. The problem with this syntax is that UBERON may not be a globally unique prefix, so files making use of such CURIEs must come with a prefix map that ensures that UBERON (in the CURIE syntax referred to as "prefix") is globally unique by mapping it to the persistent International Resource Identifier (IRI) prefix http://purl.obolibrary.org/obo/UBERON_. This may not be a major problem for a fairly unique prefix such as "UBERON", but it is for prefixes such as "ICD", which can refer to many different name spaces, such as ICD9, ICD10, ICD11 and more, all of which correspond to entirely different terminologies. _Approaches to mapping_. There are many different techniques that can be employed to generate term mappings. Automated matching techniques include ontology matching, entity resolution (the task of determining whether two database records correspond to the same entity), semantic similarity or automated reasoning. Recent approaches based on machine learning and graph embeddings show promise for working with messier inputs. No single tool will perform equally well on all inputs: some of the semantics-aware tools like LogMap and Agreement Maker Light (AML) can exploit the ontology structure to determine high-quality matches but will have problems with the large-scale data linking tasks required by modern big-data applications. Purely automated approaches to mapping are often insufficient for real world use cases that require a high degree of accuracy, such as medical diagnostics. They often need to be refined by hand or using sophisticated mapping reconciliation approaches independent of the actual matching. Determining a mapping is often complex, due to the high degree of terminological variability: different communities may use very different names for the same real world entities . For example, for example, the condition referred to in the Human Phenotype Ontology (HPO) as "Hyperchloriduria" is called "increased urine chloride ion level" in the Mammalian Phenotype Ontology (MP), which is used by the model organism community. _Mapping rules - capturing the conditions under which a match is established_. Mapping rules define the conditions under which we determine a match between two terms. For example, the condition for a mapping rule could be "if the subject label and object label match exactly". In practice, mapping rules can be very simple (e.g., "exact match of term labels"), more complex ("exact match between label of subject and exact synonym of object after they are pre-processed using stemming"), or even more exacting ("complex match determined by a human curator that carefully reviewed the descriptions and definitions of both terms and concluded they mean the same thing"). One problem for both manually curated mappings and automated approaches is that these mapping rules are often hidden deeply in the code or are not documented at all. Exposing mapping rules along with confidence scores would be very valuable for reviewing mappings and explaining them to users. Our reference implementation for SSSOM is rdf-matcher, which makes these mapping rules explicit, but other approaches such as OMOP2OBO also capture mapping rules as part of the mapping metadata. ================================================ FILE: src/docs/javascripts/mathjax.js ================================================ window.MathJax = { tex: { inlineMath: [["\\(", "\\)"]], displayMath: [["\\[", "\\]"]], processEscapes: true, processEnvironments: true }, options: { ignoreHtmlClass: ".*|", processHtmlClass: "arithmatex" } }; document$.subscribe(() => { MathJax.startup.output.clearCache() MathJax.typesetClear() MathJax.texReset() MathJax.typesetPromise() }) ================================================ FILE: src/docs/mapping-commons.md ================================================ # How to set up a Mapping Commons A mapping commons is an open, collaborative space for managing and reconciling mappings. The goal is to collect mappings from a variety of sources into a _mapping set registry_, standardise them into a common representation, curate some basic metrics such as "confidence" (how much does the community managing the commons trust a specific mapping source?) and provenance (where exactly did this mapping come from before it was integrated). There is no agreed upon standard for mapping registries yet. SSSOM itself provides a [lightweight metadata model for mapping registries](https://mapping-commons.github.io/sssom/) which is, as of August 2023, under active development. ## Typical setup of a mapping commons We recommend to base your mapping commons on a combination of GitHub (or GitLab) collaborative workflows (issues and discussions for the community, access management etc) and a git repository based on the [Mapping Commons Cookiecutter Template](https://github.com/mapping-commons/mapping-commons-cookiecutter) for version control of the mappings. Using the template system above allows you to: 1. make use of basic CI and quality control for your mappings, 2. provides a standard way to document metadata about your mapping sets 3. provides a basic ETL system based on `gnu make` (which you dont have to use, its just convenient) 4. Provides a standardised registry format that can be reused/imported by others. Examples of Mapping Commons are: 1. https://github.com/mapping-commons/mh_mapping_initiative 1. https://gitlab.c-path.org/c-pathontology/mapping-commons ================================================ FILE: src/docs/mapping-justifications.md ================================================ # Guide to using Mapping Justifications The goal of this document is to provide the user with a few pointers into the art of mapping justification construction. As of Summer 2023, the SSSOM justification system is still evolving, and will likely benefit from yoru input. Where informative metadata properties or values are missing from the [SSSOM datamodel](https://mapping-commons.github.io/sssom/) or [SEMAPV](https://mapping-commons.github.io/semantic-mapping-vocabulary/), request them on the [SSSOM](https://github.com/mapping-commons/sssom/issues) or [SEMAPV issue tracker](https://github.com/mapping-commons/semantic-mapping-vocabulary/issues) respectively. ## Table of contents 1. [lexical matching](#lexical-matching) 1. [semantic similarity threshold-based matching](#semantic-matching) 1. [manual mapping curation](#manual-mapping-curation) 1. [mapping review](#mapping-review) 1. Other justifications 1. background knowledge-based matching 1. composite matching 1. instance-based matching 1. lexical similarity threshold-based matching 1. logical reasoning 1. mapping chaining-based matching 1. mapping inversion-based matching 1. semantic similarity threshold-based matching 1. structural matching 1. unspecified matching ## Lexical matching There are two kinds of lexical matching justifications we try to distinguish: - [semapv:LexicalMatching](https://w3id.org/semapv/vocab/LexicalMatching): The match is exact (potentially after pre-processing) - [semapv:LexicalSimilarityThresholdMatching](https://w3id.org/semapv/vocab/LexicalSimilarityThresholdMatching): The match is fuzzy (for example, Levenshtein distance). Note: embedding similarity, even if constructed purely of a word embedding, is considered a form of _semantic_ similarity. #### Level 1: Track the fact that the match was based on a lexical process Whenever a mapping was established by a lexical matching process, track at least that fact: - [mapping_justification](https://mapping-commons.github.io/sssom/mapping_justification/)`: `[semapv:LexicalMatching](https://w3id.org/semapv/vocab/CompositeMatching). This indicates that the mapping was determined through some form of exact lexical matching. #### Level 2: Track the specific datamodel fields involved in the matching process Regardless of which specific lexical matching justification you are working on, it is often useful to document the source field of the values used to acquire the match. For example: - [subject_match_field](https://mapping-commons.github.io/sssom/subject_match_field/)`: rdfs:label` indicates that the value of the `rdfs:label` property on the subject entity was used to establish the match. - [object_match_field](https://mapping-commons.github.io/sssom/object_match_field/)`: skos:prefLabel` indicates that the value of the `skos:prefLabel` property on the object entity was used to establish the match. - [match_string](https://mapping-commons.github.io/sssom/match_string/)`: somestring` the exact string that was used to establish the match. This is especially useful if preprocessing methods are applied, see below (Level 3). #### Level 3: Pre-processing There are many pre-processing techniques for text in the NLP literature, such as lower-casing or lemmatisation. To judge the fidelity of a match, it is often useful to document the exact techniques used. - [subject_preprocessing](https://mapping-commons.github.io/sssom/match_string/)`: semapv:BlankNormalisation` indicates that before determining the match, blank characters (spaces etc) where standardised in some way. There are plenty of preprocessing techniques already recorded in [SEMAPV](https://mapping-commons.github.io/semantic-mapping-vocabulary/), including semapv:BlankNormalisation, semapv:CaseNormalization, semapv:DiacriticsSuppression, semapv:DigitSuppression, semapv:Lemmatization, semapv:LinkStripping, semapv:PunctuationElemination, semapv:RegexRemoval, semapv:RegexReplacement, semapv:Stemming, semapv:StopWordRemoval, semapv:TermExtraction, semapv:Tokenization, but feel free to add more. However, there is one aspect that makes this process quite difficult to implement: Most matchers will blindly apply a set of normalisation techniques prior to processing, but not document which exact technique **had an effect**. It is obviously less useful to say: we applied all these 20 techniques, if only one of them was actually effectual (i.e. caused the string to change). If there is no (easy) way to keep track of which technique was effectual for any given match, we believe that it is still better to document all techniques, but doing so on `mapping set` level rather than for each individual mappings (to keep the mapping sets smaller). ## Semantic similarity threshold-based matching The basic idea behind "Semantic similarity threshold-based matching" is that a process that is "semantics aware" (in the loose sense, either by being cognisant about the graph structure, the logical structure, or a contextual textual knowledge such as an embedded Wikipedia article) enabled computing a score between the subject and object entity that to some degree reflects the "similarity" between the two entities. There are many examples of this: 1. The (graph-)structure around the subject and object entities are projected into a common embedding space, and the similarity between the subject and object entities are expressed as cosine similarity between the two embeddings. 1. The jaccard similarity between a set of properties of the subject and object entities is calculated. 1. The Resnik score is calculated between the subject and object entities. **Important note on applicability of SSSOM for semantic similarity profiles**: SSSOM is not used for documenting semantic similarity profiles, i.e. cross-tables where some set of terms are compared with another set of terms and the semantic similarity is recorded as a score. SSSOM is used to document mappings, and only if a mapping decision is influenced by a semantic similarity based approach, especially in conjunction with as specific threshold, SSSOM is applicable. For pure semantic similarity tables use [OAK Semantic Similarity](https://incatools.github.io/ontology-access-kit/datamodels/similarity/index.html). **Semantic vs lexical similarity?**: Semantic similarity is different from lexical similarity intuitively because the context (the graph structure, the background information) is taken into account and provides an (often crude) model of the actual entity, rather than of the word describing it. However, the distinctions can become a bit hazy. Imagine learning a graph embedding on a graph without edges, or a word embedding purely on a single label - there is definitely a grey zone where lexical similarity finishes and semantic similarity begins. In practice though, it should be mostly clear. #### Level 1: Documenting semantic similarity matches The suggested metadata for semantic similarity threshold-based matching approach is: - [semantic_similarity_measure](https://mapping-commons.github.io/sssom/semantic_similarity_measure/) - [semantic_similarity_score](https://mapping-commons.github.io/sssom/semantic_similarity_score/) - ((authors note: Maybe we need a [value for similarity threshold](https://github.com/mapping-commons/sssom/issues/296)?)) ## Manual mapping curation [semapv:ManualMappingCuration](https://w3id.org/semapv/vocab/ManualMappingCuration) is a process conducted by a (usually human) agent to determine a mapping by virtue of domain expertise. The task usually involves the agent determining, for a given `subject_id`, a suitable `obect_id` in the `object_source`. #### Level 1: Documenting manual mapping curation The suggested minimal metadata for manual mapping curation is: - [author_id](https://mapping-commons.github.io/sssom/author_id/): Documenting, using a unique identifier such as an ORCID, the identity of the author performing the expert curation. - [comment](https://mapping-commons.github.io/sssom/comment/): When no formal [curation_rule](https://mapping-commons.github.io/sssom/curation_rule/) is provided (see below), it is recommended to provide a short comment with the mapping justification, especially if there is some uncertainty or ambiguity about the mapping decision. #### Level 2: Documenting the confidence of expert curation [confidence](https://mapping-commons.github.io/sssom/confidence/) is an incredibly useful metric for downstream users, including ETL engineers and data analysts. In an ideal world, all mappings have some kind of confidence associated with them. `confidence` scores should be read as "the strength of evidence provided in this record/table row (i.e mapping justification) leads us to believe the mapping (e.g. OMOP:44499396 --[skos:broadMatch]--> OMOP:4028717) is correct with 90% confidence. In manual curation, confidence expresses the domain expertise degree of conviction that the asserted mapping holds true. While manual mapping curation is still considered a gold standard, in practice human agents have (a) varying levels of expertise on the subject domain, (b) varying levels of understanding of the intuitions behind "semantic spaces" and associated concepts and (c) varying levels of metadata associated with a concept to be able to determine a match (definitions, labels, papers, synonyms, etc). Documenting confidence can be very useful both to increase the transparency of data science pipelines that involve entity mappings, and as a means to increase curation speed: rather than trying to achieve 100% confidence for a mapping, which can be extremely time-consuming, it is often better to first "wave through" a mapping with lower confidence to reach coverage, and later revisit low confidence mappings iteratively. #### Level 3: Documenting curation rules For manual matches, it is often unclear by what criteria a match was established. Documenting the `curation rule`s can help increase consistency for manual curation, and transparency for downstream users. For example `OHDSI_CURATION_RULE:19` could correspond to the following rule: OHDSI_CURATION_RULE:19 = If the subject concept does not have an exact match in the object source vocabulary, we select the nearest broad ("up-hill") concept applicable. Conceptually, if both terms would exist in the same terminology, the subject concept can be defined as a subconcept of the object concept. The determination for both criteria (nearest broad, conceptally subconcept) is performed through medical expert judgement. Curation rules are often very use case-specific and difficult to standardise. As of August 2023, SSSOM does not provide any standardised curation rules, but encourages the community to define them locally. ## Mapping review [semapv:MappingReview](https://w3id.org/semapv/vocab/MappingReview) is a process conducted by a (usually human) agent to determine the validity of a specific given mapping. It differs from [semapv:ManualMappingCuration](https://w3id.org/semapv/vocab/ManualMappingCuration) in that it does not involve looking for alternative mappings or indeed, necessarily determining if a mapping is the best possible mapping. It should be considered cheaper, less trustworthy evidence compared to [semapv:ManualMappingCuration](https://w3id.org/semapv/vocab/ManualMappingCuration). There are two kinds of mapping reviews in SSSOM: - Review as an independent justification: [semapv:MappingReview](https://w3id.org/semapv/vocab/MappingReview) is an independent process that determines the validity of a mapping. - Review _of_ an existing justification: Instead of evaluating an entire mapping, you can record the fact that someone has looked at a specific justification and deemed it acceptable. In this case, simply record the reviewers identify using the [reviewer_id](https://mapping-commons.github.io/sssom/reviewer_id/) or [reviewer_label](https://mapping-commons.github.io/sssom/reviewer_label/) and the optional [review_date](https://mapping-commons.github.io/sssom/review_date/) and [reviewer_agreement](https://mapping-commons.github.io/sssom/reviewer_agreement/) fields. ================================================ FILE: src/docs/mapping-predicates.md ================================================ # How to pick the right mapping predicates A mapping predicate such as skos:exactMatch specifies the semantics of the mapping relation - in other words, it defines how a computer (and human!) should interpret the mapping when it is being used. For example, a computer program may be allowed to merge nodes in a knowledge graph _only when they are `skos:exactMatch`_, but not when they are, say, `skos:closeMatch`. Picking the right predicate to specify the meaning of your mapping is often a difficult process. The following guide should help you to understand the most widely used mapping predicates and when they are appropriate. ## Table of content - [The three primary concerns for selecting a mapping predicate](#primary) - [The 3 step process for selecting an appropriate mapping predicate](#tenstep) - [Frequently asked questions about mapping predicates](#faq) ## Glossary - `subject`: the entity that is being mapped - `object`: the entity that the `subject` is mapped to - `predicate`: the semantic mapping relationship used ## The three primary concerns for selecting a mapping predicate There are at least three things you need to decide before selecting an appropriate mapping predicate: 1. [Precision](#precision) 2. [Acceptable degree of noise](#noise) 3. [Intended use case](#use-case) ### What is the **precision** of the mapping? As a curator, you should try to investigate the **intended meaning** of both the subject and the object. This task usually involves trying to find out as much as possible about the mapped identifiers: What is their human readable definition? Are there any logical axioms that could help with understanding the intended meaning? Sometimes, this even involves asking the respective stewards of the database or ontology for clarification. **Important:** The key here is "intended meaning". For example, when you see `FOODON:Apple` (FOODON is an ontology), you do not try to figure out _what an apple is_, but what thing in the world (in your conceptual model of the world) the FOODON developers _intended the `FOODON:Apple` identifier to refer to_. This might be an apple that you can eat, or a [cultivar](https://en.wikipedia.org/wiki/List_of_apple_cultivars)! The **precision** is simply: is the mapping `exact`, `close`, `broad`, `narrow` or `related`? Here is a basic guide about how to think of each: - `exact`: The two terms are intended to refer to the same thing. For example, both the subject and the object identifiers refer to the concept of [Gala cultivar](https://en.wikipedia.org/wiki/Gala_(apple)). - `close`: The two terms are intended to refer to roughly the same thing, but not quite. This is a hazy category and should be avoided in practice, because when taken too literally, most mappings could be interpreted as close mappings. This is not the point of creating mappings, if their intention is to be useful (see "use case" considerations later in this document). An example of a `close` mapping is one between the "heart" concept in a database of anatomical entities for biological research on chimpanzees and the "human heart" in an electronic health record for humans. - `broad`: The object is conceptually broader than the subject. For example, "human heart" in an electronic health record refers to "heart" in a general anatomy ontology that covers all species, such as Uberon. Another example is "Gala (cultivar)" in one ontology or database to "Apple (cultivar)" in another: the Apple (cultivar) has a broader meaning then "Gala (cultivar)". For a good mapping, it is advisable that "broad" and "narrow" are applied a bit more strictly than is technically permitted by the SKOS specification: both the subject and the object should belong to the same **category**. For example, you should use broad (or narrow) only if both the subject and the object are "cultivars" (in the above example). - `narrow`: The object is conceptually narrower than the subject. For example "Apple (cultivar)" is a narrow match to "Gala (cultivar)". Think of it as the opposite of "broad". `broad` and `narrow` are so-called inverse categories: If "Gala (cultivar)" is a `broad` match to "Apple (cultivar)", then "Apple (cultivar)" is a `narrow` match to "Gala (cultivar)"! One **note of caution**: `narrow` matches generally have less useful applications then `broad` ones. For example, if we want to _group_ subject entities in a database under an ontology to make them queryable in a knowledge graph, only `broad` matches to the ontology can be retrieved. For example, if we map "Gala (cultivar)" in a database to "Apple (cultivar)" in an ontology, and we wish to write a semantic query to obtain all records that are about "Apple (cultivar)" according to the ontology, we obtain "Gala (cultivar)". This is not true the other way around: if the ontology term is _more_ specific then the database term, it can't be used to group the database data. - `related`: The subject refers to an analogous concept of a different category. For example "Apple" and "Apple tree" are considered `related` matches, but not `exact` matches, as "Apple" is of the "fruit" category, and "Apple tree" of the "tree" category. Other examples include: "disease" and "phenotype", "chemical" and "chemical exposure", "car" and "car manufacturing process". In general, `related` mappings should be reserved for "direct analogues". For example, we should not try to map to `related` and `broad` categories at the same time, like, for example, "Gala (cultivar)" to "Apple tree". This causes a huge amount of proliferation of very "low value" mappings (see use case section later). ### What is the **acceptable degree of noise** of the mapping? "Noise" is the permissible margin of error for some target use case. Depending on what you want to do with your mappings, different quality levels are acceptable. This section is _not exhaustive_. While reading through this section, you should keep one thing in mind: it is _never_ a good idea to think about mappings as "correct" or "wrong". Even the the exact same identifier (for example in Wikidata, or even the biomedical data domain) can mean something very different depending on which database it is using it or in which part of which datamodel (or value set) they are used. Mapping should therefore be perceived as an inexact art where the goal is not "correctness" but "fitness for purpose": can the mappings deliver the use case I am interested in? In the following, we will take a closer look at the varying levels of noise you may need to weigh against each other. - "zero-noise". Some mappings directly inform decision processes of downstream consumers, such as clinical decision support or manufacturing. For example, in an electronic health record (EHR) system we may want to know what the latest recommended drugs (or contra-indications) for a conditions are, and the disease-drugs relationships may be curated using one terminology such as [OMOP](https://ohdsi.org/omop), and the EHR may be represented using [ICD10-CM](https://icd.codes/icd10cm) (a clinical terminology used widely by hospitals). In these cases, noise should be zero or close to zero, as patient lives depend on the correctness of these mappings. - "low-noise". Most mappings are used to augment/inform processes that are a bit upstream of the final consumer. For example, mappings are used to group data for analysis or make it easier to find related data during search (enhancing search indexing semantically). The final consumer does not immediately "see" the mappings, but just the consequences of applying the mappings. In these cases, a bit of noise may be acceptable, i.e. some mappings that are "not quite right". Practically, this is very often the case where data sources are aligned automatically to enable searches across, so a few bad mappings are better than having none. - "high-noise": Some use cases employ data processing approaches that are themselves highly resilient to noise, like Machine Learning. Here, even a larger number of mappings (in a knowledge graph for example) which are "not quite right", or noisy, may be acceptable (if the signal to noise ratio is still ok, i.e. there are "more good than bad" mappings). There is no easy formula by which you can decide what level of noise is acceptable. Your use case will determine this. What you, as the steward of your organisation's mapping data, should consider is that there is (roughly) an order of magnitude in cost involved between the three levels: - "high-noise": Very cheap to generate. Automated matching tools can be used to generate the mappings, with no human review required. Your system may implement a way for your consumers to flag up bad results which can be traced back to a bad mapping, and simply exclude them moving forward. - "low-noise": Moderately expensive. Most mappings are generated using automated matchers, but then confirmed by a human curator. The confirmation process can often be "hand-wavy" to weed out obviously bad mappings, but do not involve the same rigour as "zero-noise" mappings would require to maintain scalability to large volumes of mappings. Such a "hand-wavy" confirmative review can take 10 seconds to 100 seconds (if a quick lookup is required). - "zero-noise": Very expensive. Every mapping must be carefully reviewed by a human curator, sometimes by a group of curators. In our experience, reviewing or establishing a mapping like this (manually) can take anything between 10 and 30 minutes - occasionally more. You can use these estimated costs for mapping review to determine how much it would cost to apply the same level of rigour to your own mappings. ### What is the intended use case? This section is informative, not exhaustive, and will give you a sense of how use cases affect your choice of mapping predicate. We have covered some implications of use cases in the sections above: 1. Some use cases require lower _levels of noise_, others can live with higher levels of noise. 2. Mappings are rarely 100% exact when mapping across semantic spaces (different database, ontologies, terminologies). What matters is not "correctness" - what matters is that the mappings are "_fit for purpose_" (i.e. useful for your use case). 3. Some mappings may be of _more value_ for your use case than others (for example, `exact` mappings may be more valuable than `broad` mappings). You can find the right level of cost benefit by selecting optimising value and cost of generating/maintaining such mappings. `close` mappings may often have a very low value, but if your acceptable level of noise is high, just generate them, since they don't cost you anything! Other key considerations in the sections are: - [Semantic frameworks for analysis and querying](#uc-semantic) - [Instance vs concept-level mapping](#instance) - [Typical use cases](#uc-typical) #### Semantic frameworks for analysis and querying There are four semantic frameworks/formalisms that default SSSOM supports: (1) [SPARQL/RDF(S)](https://www.w3.org/TR/rdf-sparql-query/) (querying an integrated knowledge with basic SPARQL); (2) [Simple Knowledge organisation systems (SKOS)](https://www.w3.org/TR/skos-reference/); (3) [Web Ontology Language (OWL)](https://www.w3.org/TR/owl2-syntax/); (4) no formalism (property graphs, non-semantic use cases). We will briefly discuss the implications of each for your use cases. - SPARQL/RDF(S) is a very general semantic framework that allows query across [property paths](https://www.w3.org/TR/sparql11-property-paths/). Many SPARQL engines provide at least RDFS entailment regime, which allows for some (basic) semantic reasoning (subClassOf, property domains). This is the most likely semantic framework of choice if your use case involves semantic queries such as those involving sub-class groupings. - SKOS is a semantic framework that layers on top of RDF and specifies semantics for a handful of properties that are useful for building taxonomies that do not seek to follow the rigorous semantics of the class-level modelling constructs such as subClassOf. We have no experience with SKOS reasoners, and do not know if there are any out there. This means, in effect, that this "case" (semantic framework) has the same exact considerations as the SPARQL/RDF(S) one above. - OWL is a very powerful semantic framework that is based on formal logic. Ontologies represented in OWL offer support for complex expressions of knowledge, way beyond what RDFS and SKOS can do. OWL is the semantic framework of choice if the goal is to build **and reason** over an integrated (merged) ontology. An example use case where OWL is the appropriate framework is integration of species-specific anatomy ontologies under species-neutral ones, see for example [Uberon](https://github.com/obophenotype/uberon). A basic rule of thumb is: unless you know positively that you have to reason over the _merged_ graph, i.e. set of all ontologies you have mapped across, OWL is probably overkill and should be avoided. - Using no semantic framework does not mean semantic mappings are useless! Many extremely useful applications exist for mappings which do not involve a semantic framework, such as those related to [Labelled Property Graphs](https://www.oxfordsemantic.tech/fundamentals/what-is-a-labeled-property-graph) (for example [neo4j](https://neo4j.com/)). Even if you just want to translate your data into a graph, it is useful to know the semantics of your mappings as they can inform your graph queries. Other semantic frameworks exist such as rule-based systems (e.g. Datalog, SWRL), but they are not used as widely as the above in our domain. #### Instance vs Property vs Concept-level mapping To pick the correct mapping predicate, it is important to understand whether you are mapping concepts or instances: - Concept-level: the entity being mapped constitutes a class or a concept. A concept can be thought of a collection or set of individuals. For example, "Apple" could refer to the class of all apples. - Instance-level: the entity being mapped constitutes an individual or an instance. An instance is a single real-world entity, such as Barack Obama. Instances are members of classes/concepts. For example, Barack Obama belongs to the class of "Person", or "Former Presidents". Another example is an individual apple on a shelf in a supermarket ("Gala Apple 199999"), which is an instance of the "Apple" class. Note that notions like `broad` or `narrow` make no sense when mapping instances. We typically try to avoid the SKOS vocabulary for mapping instances, and make use of `owl:sameAs` instead. Note that `owl:sameAs` does have implications for reasoning, but it is also the preferred property when within the "RDF/SPARQL" semantic framework. If the mapping involves an instance _and_ a class, you have hit a corner case of the SSSOM use case. This case can still be represented, but instance-concept relationships are not widely thought of as "mappings". In much the same way as concepts and instances, you can also map properties or "relationships": - Property-level: the entities being mapped are both properties, like, for example, rdfs:label, skos:prefLabel, RO:0000050 (part of). Note that it does not make sense to try to map instances of concepts, or concepts, directly to properties. There are no relationships that would support such a mapping. #### Typical use cases Typical use cases for mappings include: 1. _Semantic data integration_. This often involves linking data to ontologies or semantic layers in knowledge graphs. Data from one source (such as an EHR) is translated to another (such as OMOP, see above). To analyse the data semantically, the most valuable links are `exact` and `broad` as these allow you to directly query the ontology to retrieve instance data. `close` and `narrow` matches are less useful for such a use case, but maybe be consulted as the "next best thing" to an exact mapping. Often, a low level of noise is acceptable. 2. _Data translation_. Similar to data integration, but we want to map as precisely as possible. Only `exact` matches really matter if we want to make sure that data annotated with one ontology means the exact same thing as data annotated with another. Noise in the mappings is often not acceptable. An example for this is if one source has annotated all its genes using the HUGO Gene Nomenclature Committee (HGNC) while another is using NCBI Gene Database identifiers. `broad`, `narrow` and even `close` matches are mostly meaningless - we need a 1:1 translation table with next to zero noise. 3. _Ontology and knowledge graph merging_. Here, the key issue is that `exact` matches matches have as little noise as possible. Some merging approaches use probabilistic algorithms to weed out out potentially bad mappings (low levels of noise may be acceptable, see for example [boomer](https://github.com/INCATools/boomer)), but any naive merging approach, which is still prevalent in the knowledge graph world, will usually do the following: (1) Merge all `exact` matches into one "node" in the knowledge graph and (2) redirect all data against all these `exact` matches to that newly created node. ## The 3-step process for selecting an appropriate mapping predicate The following 3-step process condenses the sections above into a simple to follow algorithm. Given two terms A and B: 1. Target: semantic framework: Does your use case require OWL reasoning over the merged subject and object sources? - If yes, use OWL vocabulary for properties - If no, use RDF/SPARQL/SKOS vocabulary for properties 1. Are A and B instances, properties or concepts? - If A and B are instances, use only vocabulary suitable for instances - If A and B are concepts, use only vocabulary suitable for concepts - If A and B are properties, use only vocabulary suitable for properties - If either one of A or B is an instance and the other is a concept, use only vocabulary suitable for describing instance-class relationships 1. Is A roughly the same as B? - If yes, does the difference between "truly exact" and your understanding of `A` and `B` constitute "acceptable noise level"? - If yes: the mapping is `exact`. - If no: the mapping is `close`. - If no, determine if the precision as described above. You can now select the mapping predicate based on the table below: | Mapping Predicate | Precision | Suitable for semantic framework | Suitable entity types? | Acceptable noise | |------------------------|-------------|---------------------------------|------------------------|------------------| | skos:exactMatch | exact | SKOS/RDF(S)/SPARQL/NO | Concept | low | | skos:relatedMatch | related | SKOS/RDF(S)/SPARQL/NO | Concept | low | | skos:broadMatch | broad | SKOS/RDF(S)/SPARQL/NO | Concept | low | | skos:narrowMatch | narrow | SKOS/RDF(S)/SPARQL/NO | Concept | low | | skos:closeMatch | close | SKOS/RDF(S)/SPARQL/NO | Concept | low | | owl:sameAs | exact | SKOS/RDF(S)/SPARQL/OWL/NO | Instance | low | | owl:equivalentClass | exact | OWL | Concept | no | | rdfs:subClassOf | broad | RDF(S)/SPARQL/OWL | Concept | no | | owl:equivalentProperty | exact | OWL | Property | no | | rdfs:subPropertyOf | broad | OWL | Property | no | | oboInOwl:hasDbXref | exact | SKOS/RDF(S)/SPARQL | Any | high | | rdfs:seeAlso | close | SKOS/RDF(S)/SPARQL | Any | high | | rdf:type | exact/broad | RDF(S)/SPARQL/OWL | Instance-Concept | no | Note that "acceptable noise" refers to "what is acceptable for the target semantic framework". When using OWL, even a bit of noise can have huge consequences for reasoning, so it is not advisable to use the OWL vocabulary in cases where there is a lot of noise. ## Frequently asked questions 1. None of the mapping predicates listed here seem to fit for my use case. Can I define my own? The SSSOM specification is currently open to specifying new mapping predicates. However, it is always advisable to open an [issue](https://github.com/mapping-commons/sssom/issues) to discuss such cases with the wider community - there may be some benefit in standardising predicates from the start! ================================================ FILE: src/docs/matching-tool-implementation-guide.md ================================================ # Matching Tools: Implementation Guide for SSSOM *Summary**: The goal of this document is to advice matching tool developers how to implement SSSOM-style matching justifications as part of their output. For example, if a mapping was determined (or is supported by) a lexical matching process, we can document that, alongside metadata that further describes the details about that process. As of 17.07.2023, this guide is a _work in progress_. If you are a tool developer interested to implement these recommendations, feel free to reach out on https://github.com/mapping-commons/sssom/issues for support and feel empowered to help us improve this guide! ## Basics 1. A (semantic) mapping in the sense of this guide is a tuple `` that describes the correspondence of a subject `s` to an object `o` via a mapping predicate `p`. `|j|` is a non-empty set of mapping justifications that provide evidence towards the validity of the correspondence. 1. As stated above, but re-stated for clarity: **every mapping can be associated with 1 or more justifications**. 1. Carefully consider if a piece of metadata should be applied on [mapping](https://mapping-commons.github.io/sssom/Mapping/) or [mapping set](https://mapping-commons.github.io/sssom/MappingSet/) level. As a rule of thumb, if a piece of metadata applies to absolutely all mappings in the target set, then consider adding it as metadata to the mapping set, to safe space. 1. Justifications in the sense of this guide comprise a *category* (documented in the [mapping_justification](https://mapping-commons.github.io/sssom/mapping_justification/) field), which is represented as a specific matching activity such as "lexical matching", "logical matching", "manual mapping curation", etc, a confidence value that represents the amount of confidence the justification contributes to the perceived truthfulness of a mapping, and additional metadata that provide additional provenance. 1. The basic vocabulary for the justification category is the [Semantic Mapping Vocabulary](https://github.com/mapping-commons/semantic-mapping-vocabulary). Feel free to use the [issue tracker](https://github.com/mapping-commons/semantic-mapping-vocabulary/issues) to request new categories to be added. There is a fast turnaround. 1. The goal of providing mapping justifications is to enable cross-purpose reuse of mappings, sharing of mappings and [mapping reconciliation](glossary.md). Mapping justifications make individual mapping decisions transparent. 1. Adding justifications is always valuable, even if **not all detailed metadata is provided**. 1. Many justifications are combinations of other justifications. For example, we may decide that a match is justified if (a) there is a lexical match and (b) the surrounding graph-structure is isomorphic or (c) the entities involved share the same properties. In this case, we should add individual justifications for each individual justification. The [confidence](https://mapping-commons.github.io/sssom/confidence/) value expresses how **confident the specific justification makes you feel about the truthfulness of the mapping**. If a joint probability is calculated from multiple justifications, add a separate justification for that, e.g. [semapv:CompositeMatching](https://w3id.org/semapv/vocab/CompositeMatching). 1. In the SSSOM TSV formats, every row corresponds to a justification, not a mapping. So the same mapping with three justifications will result in three rows in the SSSOM TSV file. ## Background Before reading on, please skim through the [technical documentation of SSSOM](index.md) to get a sense of what kind of properties exist, and read our primer on [mapping justifications](mapping-justifications.md) first, which explains how to design a number of frequently used mapping justifications. As the collection of justifications can impact the performance of he matching process (at least for huge matching tasks), it is advisable that the process can be switched off by the CLI. For most matching processes, we first construct a candidate mapping set from a combination of sources, for example: 1. Mappings provided by user as input to the matching process 1. Lexical exact matching 1. Lexical fuzzy matching (traditional and word embeddings) As a second step, we use often complex combinations of techniques to refine and expand the candidate mapping set: 1. Structural matching (graph-based approaches etc) 1. Semantic matching 1. Logical matching (for example by deconstructing complex terms into composites and then using logical reasoning). 1. Similarity based matching, including graph-embedding similarity (machine learning), old-school semantic similarity measures like Resnik or even Jaccard (over some part of the ontology/schema structure) As a rule of thumb, the more complex the rules by which a match is determined, the harder it is to provide a useful justification. To put it slightly differently: the more complex a justification, the less useful it is if the goal is to make matching decisions **transparent for human users**. A good example of this are decisions based on embedding (e.g. graph, node) similarity: while it is often useful to understand that a match has been determined by a threshold (e.g. >=0.9) of cosine similarity of a node embedding, it is less important to communicate exactly how the embedding space was constructed. This insight guides our implementation in two ways: 1. We start by focusing on the "easy" cases with clear mapping justifications (like the lexical ones used to construct the _candidate mapping set_), and incrementally work our way up towards harder ones. 1. We have a default justification for "complex" cases which we have not covered yet. This is necessary not only because it may be hard to construct complex justifications from within a matching tool, but also because SSSOM simply does not have a way to express the justification yet (in this case, request clarification on the [SSSOM issue tracker](https://github.com/mapping-commons/sssom/issues)). ## Basic thoughts about architecture The [MELT framework](https://github.com/dwslab/melt) offers a well designed architecture for matchers. While the interested readers is referred to [the MELT documentation](https://dwslab.github.io/melt/) for details, we want to use it here as an example on how a tool implementer, from a higher level perspective, could think about collecting SSSOM metadata as part of the matching process. Conceptually, a matching process (from the perspective of the MELT developers) has four inputs: 1. Source ontology: `O_s` 2. Target ontology: `O_t` 3. (potentially empty) input alignment: `Map_in` 4. Configuration (for the matching tool): `Cong` and return one output: 1. Output alignment: `Map_out` Note that any given implementation can take other inputs and produce other outputs, but for the sake of this guide we assume this basic architecture. Conceptually, four elements are important to matching process: 1. The alignment 1. The individual correspondence part of the alignment 1. Evidence gathered for towards the truthfullness of the alignment 1. A matcher that implements the "matching process" described above in terms of input/output In the MELT reference implementation, for example, there is an [Alignment](https://github.com/dwslab/melt/blob/master/yet-another-alignment-api/src/main/java/de/uni_mannheim/informatik/dws/melt/yet_another_alignment_api/Alignment.java) class. During the matching process, the alignment is passed through a series of [matchers](https://github.com/dwslab/melt/blob/master/matching-jena/src/main/java/de/uni_mannheim/informatik/dws/melt/matching_jena/MatcherYAAA.java#L16) to be augmented. For example, a [bounded path matcher](https://github.com/dwslab/melt/blob/master/matching-jena-matchers/src/main/java/de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/structurelevel/BoundedPathMatching.java#L41). In essence, the matching process is a series of matchings strung together, passing where the alignment produced by the last process is passed through to the next, then augmented, then passed on (potentially for other processes such as filtering, which we consider matching processes as well). During an individual matching process like [bounded path matcher](https://github.com/dwslab/melt/blob/master/matching-jena-matchers/src/main/java/de/uni_mannheim/informatik/dws/melt/matching_jena_matchers/structurelevel/BoundedPathMatching.java#L41), correspondences are added and removed from the alignment. The key for a meaningful SSSOM integration is this: when a new correspondence (mapping) is added to the alignment (or "mapping set" in SSSOM speech) you _add a piece of evidence alongside the correspondence_. This is usually done by extending the correspondence data model with a new field: justification, evidence, or similar. A piece of evidence includes three major things: 1. A justification. Usually, any `matcher` type will correspond to exactly one [justification in the SEMAPV vocabulary](https://www.ebi.ac.uk/ols4/ontologies/semapv/classes/https%253A%252F%252Fw3id.org%252Fsemapv%252Fvocab%252FMatching?lang=en). 2. A confidence level. This reflects how much confidence the process has induced in the mapping all by itself. 3. Any other metadata important for that specific justifications, such as `subject_match_field` for a lexical matching process. Your matching process should collect this metadata, and, by the end of the process, the whole alignment, including correspondences and justifications for each correspondence should be exported. _Important note_: In the final TSV file, every _justification_ will have its own row! So a correspondence (mapping) will appear on multiple rows! ## Step-by-step guide for implementation This step by step guide is roughly according to our own thinking of what should be done first, second, and so on. 1. Add an option to your matching tool to output legal SSSOM TSV (recommended format now), for example `--export-sssom` or similar. 1. OPTIONAL: Add an option to your matching tool to accept legal SSSOM TSV as user input as an alternative to Alignment API (recommended format now). 1. Always provide basic provenance in the SSSOM output: - [mapping_tool](https://mapping-commons.github.io/sssom/mapping_tool/): The canonical reference to your tool, ideally a persistent identifier. - [mapping_tool_version](https://mapping-commons.github.io/sssom/mapping_tool_version/): The version of the tool used to compute the mapping set. - [mapping_set_id](https://mapping-commons.github.io/sssom/mapping_set_id/): A (often randomly generated) mapping set identifier. - [mapping_date](https://mapping-commons.github.io/sssom/mapping_date/): The date the mapping was generated. - OPTIONAL: if available, add [subject_source](https://mapping-commons.github.io/sssom/subject_source/), [object_source](https://mapping-commons.github.io/sssom/object_source/) and [subject_source_version](https://mapping-commons.github.io/sssom/subject_source_version/), [object_source_version](https://mapping-commons.github.io/sssom/object_source_version/). 1. Document some basic entity metadata, this can help reading the mapping set: - [subject_label](https://mapping-commons.github.io/sssom/subject_label/), [object_label](https://mapping-commons.github.io/sssom/object_label/): If available, add the label of the subject, and object id. 1. Add basic justification support 1. Track lexical matching-based mapping decisions. A good chunk of candidate mappings will be computed by some form of lexical matching. See [here](mapping-justifications.md#lexical-matching) for details. 1. If something more complex than a simple lexical matching has happened, try to find an appropriate one in [SEMAPV](https://mapping-commons.github.io/semantic-mapping-vocabulary/). If none exists, or its too much work to create one, use as a fall-through: - [semapv:CompositeMatching](https://w3id.org/semapv/vocab/CompositeMatching) in the case that the match was established through a combination of approaches, but you don't want to provide justifications for each individual one. - [semapv:UnspecifiedMatching](https://w3id.org/semapv/vocab/UnspecifiedMatching) in the case you dont know why the match happened. 1. All justifications should come with a [confidence](https://mapping-commons.github.io/sssom/confidence/) value that expresses how **confident the specific justification makes you feel about the truthfulness of the mapping**. 1. Track if a mapping was provided (as input) by a user. Ideally, if the input to the matching process is SSSOM, simply adopt all of the mapping justifications provided by the user. If the provided mapping has no metadata, add a suitable [mapping_provider](https://mapping-commons.github.io/sssom/mapping_provider/) value (e.g. `MYTOOL:USER`, to indicate that the mapping was provided by the user). 1. Add advanced justification support. Add all metadata explained in [mapping justifications](mapping-justifications.md). Where suitable fields or values are missing from the [SSSOM datamodel](https://mapping-commons.github.io/sssom/) or [SEMAPV](https://mapping-commons.github.io/semantic-mapping-vocabulary/), request them on the [SSSOM](https://github.com/mapping-commons/sssom/issues) or [SEMAPV issue tracker](https://github.com/mapping-commons/semantic-mapping-vocabulary/issues) respectively. There is likely a lot of interesting details to be added, so dont be shy to request/suggest! 1. If you reject a user provided mapping, it makes sense to include that in a negative mapping set in SSSOM. You could provide [predicate_modifier](https://mapping-commons.github.io/sssom/predicate_modifier/)`= NOT` to ensure the file is not interpreted wrongly. 1. HIGHLY OPTIONAL: In some few cases, it may be interesting to inform the user that not all mappings are 1:1. In this case, it could be advisable to use the `mapping_cardinality` field. 1. OPTIONAL: If relevant you can add the [subject_type](https://mapping-commons.github.io/sssom/subject_type/) and [object_type](https://mapping-commons.github.io/sssom/object_type/) fields to your output, if known. This can be interesting in some cases with mixed content (being able to separate `owl:Class` related mappings from those about `owl:ObjectProperty`). 1. You can always use the [comment](https://mapping-commons.github.io/sssom/comment/) or [other](https://mapping-commons.github.io/sssom/other/)* fields to deposit additional useful metadata that can later be turned into structured content. ## Examples - [MGI Mouse-Human mappings](https://github.com/mapping-commons/mh_mapping_initiative/blob/master/mappings/mp_hp_mgi_all.sssom.tsv) - [SSSOM examples](https://github.com/mapping-commons/sssom/tree/master/examples/embedded) ================================================ FILE: src/docs/presentations.md ================================================ # Presentations ## Building Bridges with FAIR Mappings: An RDA Perspective (GA4GH 13th Plenary) - October 6th, 2025, hybrid ([information](https://www.ga4gh.org/event/13th-plenary/)) - [Lightning talk](https://docs.google.com/document/d/1e3X9dzq84B3sX0eaLEjmLcYYDv_INmGUNZ6kbAh5MM8/edit?tab=t.0#heading=h.somgoowwkvaj) at GA4GH Connect session **The future is FAIR beyond genomics: Building bridges to the Research Data Alliance (RDA) and other FAIR research data initiatives**, representing SSSOM and FAIR Mappings WG. - [Slides](https://docs.google.com/presentation/d/18vf5gcHtGKzBpkzQRpHUZZyxjQ9GiYG45CYzhzQSNgE/edit) ## RDA FAIR Mappings WG: Highlights from Case Studies and Initial Taxonomy (RDA’s 24th plenary) - April 7th, 2025, virtual - Talk at RDA 24th plenary meeting about the progress of the [RDA FAIR Mappings Working Group](https://www.rd-alliance.org/groups/fair-mappings-wg/activity/), which uses SSSOM as the baseline model. - [Slides](https://docs.google.com/presentation/d/1A3Fgo9ExUGgrc0VX85tqG-_6_Vu6b-qtcfvLTmW9mIU/edit) ## A Simple Standard for Ontological Mappings 2024: A quick guide for getting started with publishing better entity mappings (RDA’s 23rd plenary) - November 13th, 2024, hybrid - Talk at [RDA plenary](https://www.rd-alliance.org/rdas-23rd-plenary-programme/), in particular [VSSIG WG](https://www.rd-alliance.org/session_entry/group-session-applications-03-07-2024-john-graybeal/) giving a brief update and encouraging SSSOM uptake. - [Slides](https://docs.google.com/presentation/d/1TKdXO-THSUf5PHZp2sYrzaTQhtunoB78A6_Yt8VEyoc/edit?usp=sharing) ## A Simple Standard for Ontological Mappings 2024: The case for prioritising 5-Star mappings (Every Cure, internal seminar) - August 1st, 2024, virtual - Talk to [Every Cure](https://everycure.org/) technical team (Knowledge Sharing Series) to make the case for prioritising the collection of mapping provenance. - [Slides](https://docs.google.com/presentation/d/1_KkSeZcKyzsQVlZe8qkl7iEvii3HX0QKR58uvbirTDM/edit) ## (Re-)bridging the anatomy ontologies with SSSOM - July 19th, 2024 - [15th International Conference on Biomedical Ontologies (ICBO)](https://icbo-conference.github.io/icbo2024/) - [Slides](https://github.com/gouttegd/sssomt-uberon/releases/download/v2-icbo-2024/screen.pdf), [paper](https://github.com/gouttegd/sssomt-uberon/releases/download/v2-icbo-2024/paper.pdf), [sources](https://github.com/gouttegd/sssomt-uberon) ## Workshop on Prefixes, CURIEs, and IRIs 2023 - November 27th, 2023, virtual - Lightning talk on the deeper integration of the Bioregistry and `curies` toolkit with the SSSOM Python package - [Workshop info](https://biopragmatics.github.io/workshops/WPCI2023) ## OM2023: A Simple Standard for Sharing Ontological Mappings 2023: Updates on data model, collaborations and tooling - 7th November 2023 - [http://om2023.ontologymatching.org/](http://om2023.ontologymatching.org/) - [Slides](https://docs.google.com/presentation/d/1d2t-VcseZ_oAgVTbrDHJOmwQTqyXB9ZHDrZ08OU87a0/edit) ## SSSOM Updates 2023 (FAIR Impact and FAIRCORE4EOSC Life Working Session) - 6th October 2023 - [Slides](https://docs.google.com/presentation/d/1RY0IKn5TWVqXhcJ5vyabXEPhCPXNLsx9IWiWzQCzXZg/edit) ## OHDSI 2023 Symposium: Synergizing Simple Standard for Sharing Ontology Mappings (SSSOM) and the Observational Health Data Sciences and Informatics (OHDSI) - 3rd July 2023, https://www.ohdsi-europe.org/index.php/symposium-2023 - [Seminar info](events/ohdsi2023.md) ## Elixir Data Interoperability Meeting: Introduction to SSSOM - 6th March 2023 - This talk is a variant of the CCB Seminar Series talk below - [Slides](https://docs.google.com/presentation/d/1w-rNLTprIbW8IUBu6YokDsPe98AKg4VwfR1gSsJrae8/edit#slide=id.g167f28e52df_0_22) ## Ontology Summit 2023: Open, FAIR and standardised mappings for ontologies, controlled vocabularies and database entities - 22 February 2023, virtual - This talk is a variant of the CCB Seminar Series talk below - [Seminar info](https://ontologforum.org/index.php/ConferenceCall_2023_02_22) - [Video recording (scroll to minute 46)](https://ontologforum.s3.amazonaws.com/OntologySummit2023/Part1/Ubergraph--JimBalhoff_20230222.mp4) - [Slides](https://docs.google.com/presentation/d/1_TuimFiJ_7VP0ZFkQrHYky_ktFZc981Vse0-_hZjKtc/edit#slide=id.g167f28e52df_0_22) ## CCB Seminar Series: Open SSSOM - Unlocking the wealth of biomedical data using shared standardized entity mappings - December 2022 talk, virtual - [Seminar info](events/ccb2022.md) - [Video recording](https://www.youtube.com/watch?v=4vqeRECuAKE) - [Slides](https://docs.google.com/presentation/d/1Gt6kLSTx_e1Al6eCvGp_hviezy5ySo4UA_ii8LGqqIw/edit?usp=drive_web&ouid=105278838581444356576) ## OHDSI Symposium 2022: There are no "good" mappings. - October 2022, hybrid symposium (https://www.ohdsi.org/ohdsi2022-workgroup-activities/) - [Seminar info](events/ohdsi2022.md) - [Slides](https://docs.google.com/presentation/d/1sGPh1b0keghxF4o7vMOQAlZ6QyBf97ZpaTXjmMY3UP0/edit#slide=id.SLIDES_API69505745_0) ## OM 2022: A Simple Standard for Ontological Mappings 2022 - Updates of data model and outlook - 23rd October 2022, Workshop for Ontology Matching, ISWC 2022, virtual - [Seminar info](events/om2022.md) - [Slides](https://docs.google.com/presentation/d/1L0LzXVPcfS9eW1KkN-BIYnxuh_CQ_8fl3QPvqw9BmUs/edit#slide=id.g16d02f01a3b_0_0) ## OBO Academy 2022: Introduction to manual mapping curation - 17th May 2022, Seminar, Monarch Seminar Series - [Video recording](https://www.youtube.com/watch?v=ZZeZcg-Vwjw) - [Seminar info](events/oboacademy2022.md) - [Slides](https://mapping-commons.github.io/sssom/tutorial/) ## Workshop on Prefixes, CURIEs, and IRIs 2021 - Fall 2021, Use Case Talk on the need for prefix maps for SSSOM - [Video](https://youtu.be/iOXZfLAF_X0?t=1100) - [Workshop info](https://biopragmatics.github.io/workshops/WPCI2021) ## Pistoia Seminar 2022: A Simple Standard for Sharing Ontological Mappings (SSSOM) - 28th April 2022, Team Meeting - [Seminar info](events/pistoia2022.md) - [Slides](https://docs.google.com/presentation/d/1gW-BN4yR1c8qxzL9uLeJm99zRancY3k0tcZlJRPu4Eg/edit#slide=id.g126201cd604_0_0) ## WSBO-2021: Workshop on Synergizing Biomedical Ontologies - 14th July 2021, Workshop - [Workshop info](events/wsbo2021.md) - [Slides](https://docs.google.com/presentation/d/1TlROX-JNeWvgrX57-CBa2qxTrRp92VGGZnrhJv3rLPM/edit#slide=id.p) ## MC-2021: 1st Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings - [Workshop info](events/mc2021.md) - [Video recording](https://www.youtube.com/watch?v=lgVqFeSxYbg) - [Slides: Introduction](https://docs.google.com/presentation/d/1T75TRkpKRGHk5FSeFS7mQe8vmo8rt7bE69kgPX6PZMs/edit?usp=sharing) - [Slides: OMOP2OBO](https://docs.google.com/presentation/d/1ItWLWnIlJeBgw5r4ZQ6mOVAFVQp-1uQ7vA9EI-1o5HY/edit?usp=sharing). ================================================ FILE: src/docs/record-identifiers.md ================================================ # Identifying mapping records Since version 1.1, the SSSOM specification allows to explicitly assign an identifier to every single mapping record within a given mapping set, through the [record_id](../record-id.md) slot. The specification is deliberately non-prescriptive about what record identifiers should look like or how they should be generated. The only constraints on the `record_id` slot are that: - the value must be a URI; - the URI must be representable in CURIE form in some serialisations (e.g. in SSSOM/TSV); - either all records within a set have an identifier, or none have one; - each identifier should be unique within the set that contains it. Beyond those constraints, it is left to the creators of a SSSOM mapping set to decide whether and how to mint identifiers for their records. This page is intended to provide some non-normative guidance. ## Uniqueness scope While the specification only mandates that record identifiers must be unique **within a set**, it is probably a good idea to use identifiers that are **globally** unique. An easy way to do that is to construct the identifiers on top of the `mapping_set_id` of the mapping set. For example, if you have the following set: ``` #curie_map: # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.inc/food/ #license: https://creativecommons.org/licenses/by/4.0/ #mapping_set_id: https://w3id.org/sssom/tutorial/example1 subject_id subject_label predicate_id object_id object_label mapping_justification KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration KF_FOOD:F003 pink skos:exactMatch FOODON:00004187 Pink apple (whole, raw) semapv:ManualMappingCuration KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration ``` then you could construct the following identifiers, all derived by appending a local part to the `mapping_set_id` URI: - `https://w3id.org/sssom/tutorial/example1#001` - `https://w3id.org/sssom/tutorial/example1#002` - `https://w3id.org/sssom/tutorial/example1#003` - `https://w3id.org/sssom/tutorial/example1#004` Assuming the `mapping_set_id` URI is globally unique (which it should be), then all record identifiers derived from it will necessarily be globally unique as well. The resulting set would then look as follows (keep in mind that record identifiers must be in CURIE form in the SSSOM/TSV format): ``` #curie_map: # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.inc/food/ # example1: https://w3id.org/sssom/tutorial/example1# #license: https://creativecommons.org/licenses/by/4.0/ #mapping_set_id: https://w3id.org/sssom/tutorial/example1 record_id subject_id subject_label predicate_id object_id object_label mapping_justification example1:001 KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration example1:002 KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration example1:003 KF_FOOD:F003 pink skos:exactMatch FOODON:00004187 Pink apple (whole, raw) semapv:ManualMappingCuration example1:004 KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration ``` > Again, this is **guidance** only. There is _no obligation_ for record > identifiers to be derived from the `mapping_set_id`. It is simply a convenient > way to achieve global uniqueness, should it be desired. ## Identifier generation methods Here are some of the ways by which the local part of identifiers can be generated. ### Opaque identifiers #### Serially allocated numbers This is the method used in the example above. The local part of the identifier is a (typically fixed-width) number that is simply incremented whenever a new identifier is needed. This is arguably the simplest method, and one that is especially practical when creating/editing a mapping set using a generic, non-SSSOM-aware spreadsheet software. It requires keeping track of the last used number, but that should not be a big hurdle when editing a set in a spreadsheet software. #### Randomly allocated numbers The local part of the identifier can be made of numbers that are randomly picked rather than serially allocated. This dispenses of the need to keep track of the last used number. For this method to work, the random numbers must be picked (1) within a large enough space and (2) using an established and solidly implemented pseudo-random number generator (PRNG) software. In particular, they should _not_ be hand-picked by a human editor (or a LLM). Humans (and the LLMs that try to mimic them) are notoriously bad at producing random numbers. Of note, this method can easily produce **globally unique** identifiers on its own if the space in which the random numbers are picked is large enough. Typically, using 128-bit numbers (and assuming a proper PRNG), the probability of two random identifiers colliding is sufficiently low that for all purposes the identifiers can be considered globally unique. ### Non-opaque identifiers #### Manually crafted non-opaque strings A human editor could mint an identifier that meaningfully represents some “key” characteristics of the mapping record. For example, the first record in the example mapping set above: ``` subject_id subject_label predicate_id object_id object_label mapping_justification KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration ``` (not repeating the mapping set metadata _brevitatis causa_) could get assigned an identifier like `example1:F001_exact_FOODON2473` – constructed by some informal derivation of the subject ID (`F001`), the predicate ID (`exact`), and the object ID (`FOODON2473`). This might be perceived as useful to a human curator, as the record identifier immediately gives a sense of what the record is about. It must be noted, however, that embedding any kind of meaning into an identifier is generally a bad idea ([McMurry _et al_., 2017](https://doi.org/10.1371/journal.pbio.2001414)). #### Content-derived identifiers An identifier can be automatically derived from the record by running the record through some kind of condensation (“hash”) function that returns a value calculated in such a way that the probability that two different records could yield the same value can be considered negligible. The SSSOM specification defines [such a function](spec-support-hashing.md). While the resulting value may appear meaningless, and not different from a randomly picked number, it represents a non-opaque identifier nonetheless because the value is still directly dependent on the content of the record. Of note, the [Mapping Sameness Identifier](https://ts4nfdi.github.io/mapping-sameness-identifier/), a proposed SSSOM-independent standard to compute an unique identifier for a mapping, is _not_ suitable as a SSSOM record identifier, because it may not be unique for any given record (on the contrary, it is explicitly designed to be identical for all records that have the same subject, the same predicate (incl. possible negation modifier), and the same object, regardless of all other metadata). ##### Content-derived identifiers considered harmful As noted at the very beginning of this page, the SSSOM specification is non-prescriptive about how identifiers should be minted. It neither mandates nor forbids any particular method, and all of the methods listed above (as well as other methods not listed here) _can_ be used in a SSSOM mapping set. However, the author of those lines strongly feels that content-derived identifiers are a particularly bad idea, for the reasons given in this section. **(A)** Content-derived identifiers make it impossible to write a mapping set entirely by hand. The hash of a mapping record cannot be realistically computed in someone’s head, whoever is editing the mapping will have to use a dedicated tool to produce it. This breaks an important promise of SSSOM, which is that one can always manually craft a SSSOM set with no specialised tooling at all – just a plain old spreadsheet software. **(B)** Content-derived identifiers are at risk of becoming “out-of-sync” with the records they supposedly identify. If an editor modifies the record in any way but forgets to re-run the ID-generating procedure, then they’ll end up with identifiers that are no longer really derived from the content of the record. **(C)** Content-derived identifiers deprive the set’s creators of the freedom to decide the difference between “updating an existing record” and “creating a new record”, because in fact there is no such thing as “updating a record” when using content-derived identifiers – any change to a record would cause the identifier to change, in effect always creating a _new_ independent record. **(D)** As a direct consequence of **C**, content-derived identifiers are not _stable_, because again any change to the record (even a semantically meaningless change like fixing a typo) would cause the identifier to change. This is turn means, for example, that consumers of records with content-derived identifiers cannot _reliably_ refer to them, because the identifiers may change at any time for even the slightest change made to the records. **(E)** The instability of content-derived identifiers is even worse in the specific case of SSSOM, because the content of a SSSOM record could change because of something that is out of the control of the record’s creator. Consider the following record: ``` subject_id subject_label predicate_id object_id object_label FBbt:00000015 thorax semapv:crossSpeciesExactMatch UBERON:6000015 insect thorax ``` and now let’s imagine that Uberon curators decide to rename UBERON:6000015 from “insect thorax” to “thorax sensu Insecta” (because they decided they prefer this way of mentioning the species within the label). The next time the mapping is updated to ensure it is using the latest labels, the record thus becomes: ``` subject_id subject_label predicate_id object_id object_label FBbt:00000015 thorax semapv:crossSpeciesExactMatch UBERON:6000015 thorax sensu Insecta ``` In this scenario, the _meaning_ of the record has not changed at all. UBERON:6000015 still represents the same concept as before, so this record still represents the very same mapping between the very same entities. And yet, because the _label_ of UBERON:6000015 has changed (something that the creator of the set has no control upon), if records were identified using content-derived identifiers we would have to consider the second record as a _different entity_, identified with a different identifier, than the first. Overall, content-derived identifiers can only be viable if some very specific conditions are met: - the data store (be it a database, a file, or whatever) where records are stored must be **append-only**; that is, it must not be possible to delete or modify existing records, you can only _add_ new records; - whenever a new record is created by deriving from an existing record, there must be a way to get to the original record from the new record. _If_ you use SSSOM that way, then _maybe_ content-derived identifiers can be fine for your use case. Otherwise, you should stick to meaningless, opaque identifiers that are not tied to the content of the record. ================================================ FILE: src/docs/related-documentation.md ================================================ ## Related documentation - [SSSOM Toolkit](https://mapping-commons.github.io/sssom-py/index.html#): A toolkit and library for processing SSSOM files in Python - [SSSOM Java](https://incenp.org/dvlpt/sssom-java/): A toolkit and library for processing SSSOM files in Java - [SSSOM Curator](https://github.com/cthoyt/sssom-curator/): A framework for curating SSSOM mapping sets - [Semantic Mapping Vocabulary (SEMAPV)](https://mapping-commons.github.io/semantic-mapping-vocabulary/): The mapping vocabulary used for mapping justifications and specialised mapping predicates - [LinkML](https://linkml.io/linkml/): The modelling framework used by SSSOM - [OBO Academy](https://oboacademy.github.io/obook/): Ontology and mapping related training materials - [Monarch Initiative](https://monarch-initiative.github.io/monarch-documentation/): Knowledge Graph related products supported by the Monarch Initiatives, including many tools using and producing SSSOM ================================================ FILE: src/docs/spec-formats-json.md ================================================ # The JSON serialisation format The JSON serialisation format is currently unspecified. It is intended as a more-or-less direct serialisation of the `MappingSet` class into the JSON format as specified by [RFC 8259](https://datatracker.ietf.org/doc/html/rfc8259), but many details of the serialisation are left unspecified for now. ================================================ FILE: src/docs/spec-formats-owl.md ================================================ # The OWL/RDF serialisation format This section defines a way to serialise SSSOM mappings as _reified OWL axioms_. This has the advantage that any mapping set can be simply merged with an ontology in the usual way, for example using [ROBOT merge](https://robot.obolibrary.org/merge). The OWL/RDF serialisation rules deal with three types of reified OWL axioms, and a few sub-types: 1. Predicate is an annotation property 2. Predicate is an object property and 1. Object/Subject are classes 2. Object/Subject are individuals 3. Predicate is language relational construct of RDFS or OWL (`rdfs:subClassOf`, `owl:equivalentClass`) ## Predicate is an annotation property: If the predicate corresponds to an annotation property, the mapping `` gets converted to an OWLAnnotationAssertion axiom: `OWLAnnotationAssertion(P,S,O)`. All mapping level metadata (`meta`) gets converted into OWLAnnotation objects which are materialised as axiom annotations on the mapping annotation assertion, see [OWL 2 Structural Specification](https://www.w3.org/TR/owl2-syntax/#Annotations): ``` AnnotationAssertion(meta P, S, O) ``` Where `meta` is a sequence of OWL Annotations objects like: ``` Annotation(Q1,V1) Annotation(Q2,V2) ... Annotation(Qn,Vn) ``` where `Qi` is a SSSOM metadata slot and `Vi` is an annotation value. Note that if a SSSOM metadata element value is a list `L` (i.e. can have multiple elements, such as creator and others), individual annotations are created for each of them: ``` Annotation(Q,V) for all V in L. ``` Example: ``` AnnotationAssertion(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) skos:exactMatch ) ``` Mapping set level annotations are manifested as Ontology annotation in the usual way, according to the [OWL 2 Structural Specification](https://www.w3.org/TR/owl2-syntax/#Annotations). ## Predicate is an object property ### Case 1: Object and Subject are classes. The mapping `` gets translated into an existential restriction: ``` SubclassOf(S, P some O) ``` All metadata slots are added as OWLAnnotation objects and added to SubclassOf axiom as axiom annotations: ``` SubclassOf(meta, S, P some O) ``` Example: ``` SubClassOf(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) ObjectSomeValuesFrom( )) ``` ### Case 2: Object and Subject are individuals The mapping `` gets translated into an object property assertion: ``` ObjectPropertyAssertion(P, S, O) ``` All metadata slots are added as OWLAnnotation objects and added to ObjectPropertyAssertion axiom as axiom annotations: ``` ObjectPropertyAssertion(meta, P, S, O) ``` Example: ``` ObjectPropertyAssertion(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) ) ``` ### Predicate is language relational construct of RDFS or OWL The mapping `` gets translated into an annotated axiom using the following table: | Mapping predicate | Generated axiom | | ------------------- | --------------------------- | | owl:equivalentClass | EquivalentClass(meta, S, O) | | rdfs:subClassOf | SubClassOf(meta, S, O) | Example: ``` SubClassOf(Annotation(sssom:creator_id ) Annotation(sssom:mapping_justification semapv:LexicalMatching) ) ``` ================================================ FILE: src/docs/spec-formats-rdf.md ================================================ # The SSSOM/RDF serialisation format This section defines how to represent a SSSOM mapping set as a [RDF model](https://www.w3.org/TR/rdf11-concepts/). ## RDF formats The RDF model that represents a SSSOM mapping set is independent of the concrete format that may be used to serialise the model. It is RECOMMENDED that implementations support reading and writing a SSSOM set from and to the [RDF Turtle](https://www.w3.org/TR/turtle/) format at least. They MAY support any other RDF concrete format (e.g. RDF/XML, TriG, N-Triples, etc.). This specification does not mandate how a concrete RDF syntax is to be used. For example, if the RDF syntax allows named resources and predicates to be serialised as either IRIs or CURIEs, it is left to the discretion of the implementations (or their users) to decide which form to use. ## Representation of slots A metadata slot on any given SSSOM object (such as a `Mapping` or a `MappingSet`) MUST be represented as a RDF triple where: - the subject is the resource representing the SSSOM object; - the predicate is either: - the property indicated by the `URI` field in the LinkML description of the slot, if such a field is present; - or a property constructed by concatenating the `https://w3id.org/sssom/` namespace and the name of the slot; - the object is the value of the slot. ### Representation of slot values The following rules determine how the value of a slot is represented as the object of a RDF triple. #### For slots typed as `sssom:EntityReference` (e.g. `subject_id`, `mapping_justification`, `subject_source`…) The value MUST be represented as a named RDF resource (IRI). #### For slots typed as `sssom:NonRelativeURI` (e.g. `license`, `mapping_provider`, `issue_tracker`…) The value MUST be represented as a named RDF resource (IRI). #### For slots typed as `linkml:date` (e.g. `mapping_date`, `publication_date`) The value MUST be represented as a `xsd:date` literal. #### For slots typed as `linkml:double` (e.g. `mapping_set_confidence`, `confidence`, `similarity_score`) The value MUST be represented as a `xsd:double` literal. #### For slots typed as an enumeration (e.g. `sssom_version`, `mapping_cardinality`, `subject_type`…) If the permissible values for the enumeration are defined in the LinkML model as having an associated `meaning` property, then the value MUST be represented as a named RDF resource with the indicated property. Otherwise, the value MUST be represented as a `xsd:string` literal. > Examples: > > A `subject_type` slot with the value `owl class` is represented by: > > ```ttl > ?object sssom:subject_type . > ``` > > while a `mapping_cardinality` slot with the value `1:1` is represented by: > > ```ttl > ?object sssom:mapping_cardinality "1:1"^^xsd:string . > ``` > > because the `owl class` value of the `EntityTypeEnum` enumeration has a > `meaning` property of `http://www.w3.org/2002/07/owl#Class`, while the `1:1` > value of the `MappingCardinalityEnum` enumeration has no `meaning` property. #### For slots typed as a SSSOM object (e.g. `mappings`, `extension_definitions`) The value MUST be represented as a RDF resource. Whether the resource is named (IRI) or not (blank node) will depend on the type of the object, see the [section on representing SSSOM objects](#sssom-objects) below for details. ### Representation of multi-valued slots (e.g. `creator_id`, `see_also`, `object_match_field`…) As an exception to the general principle that slots are represented by a single RDF triple, multi-valued slots MUST be represented by as many triples as there are values, each value being the object of one triple. > Non-normative notes: > > 1. This means, in particular, that RDF complex structures intended to > represent collections of values, such as `rdfs:Container` or `rdfs:List`, > MUST NOT be used to represent multi-valued SSSOM slots. > 2. This also implies that values in multi-valued slots are _not_ ordered. The other rules above apply to determine how each single value is to be represented. > Example: > > A `creator_id` slot with the values `https://example.org/people/0001` and > `https://example.org/people/0002` is represented by the following two triples: > > ```ttl > ?object dcterms:creator . > ?object dcterms:creator . > ``` ### Representation of extension slots An [extension slot](spec-model.md#non-standard-slots) MUST be represented in a similar way to a standard slot, with the following specific rules. The predicate is the property associated to the extension slot, as indicated by the `property` slot in the set’s [definition](ExtensionDefinition.md) of the extension. The value of the extension MUST be represented: - as a named RDF resource, if the `type_hint` of the extension definition is `linkml:uriOrCurie`; - otherwise, as a literal of the type indicated by the `type_hint`. ## Representation of SSSOM objects ### Representation of a `Mapping` object The RDF type of a `Mapping` object is `owl:Axiom`. If the `Mapping` object has a `record_id` slot, then the value of that slot MUST be used as the named RDF resource that represents the object (and consequently, that slot MUST NOT be represented using the [general rules](#sssom-slots) for the representation of slots as defined above). Otherwise, the `Mapping` object is represented as a blank node. ### Representation of a `MappingSet` object The RDF type of a `MappingSet` object is `sssom:MappingSet`. A `MappingSet` object MUST be represented by a named RDF resource corresponding to the value of the `mapping_set_id` slot (which consequently MUST NOT be represented using the [general rules](#sssom-slots) for the representation slots as defined above). The `curie_map` slot MUST NOT be represented using the [general rules](#sssom-slots). Instead, if it is needed it MUST be represented using whatever mechanism is provided by the concrete RDF serialisation format (e.g. `@prefix` declarations in [RDF Turtle](https://www.w3.org/TR/turtle/) or [RDF TriG](https://www.w3.org/TR/trig/), or `xmlns` namespace declarations in [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)). > Non-normative notes > > 1. The CURIE map may not be needed at all if all named resources and > predicates are always serialised as full-length IRIs. > 2. If at least some named resources or predicates are serialised as CURIEs, > the RDF requirement that all used prefix names must be declared (using the > appropriate mechanism for the chosen concrete syntax) takes precedence over > the possibility of omitting the declarations of prefix names that are > considered [built-in](spec-intro.md#iri-prefixes) in the context of SSSOM. ### Representation of an `ExtensionDefinition` object The RDF type of an `ExtensionDefinition` object is `sssom:ExtensionDefinition`. An `ExtensionDefinition` object has no identifier of any kind and is always represented by a blank node. ## Special considerations for serialising to RDF When serialising a mapping set to SSSOM/RDF, implementations should consider how the resulting RDF file is intended to be used. In particular, they should ponder whether it is expected that the RDF serialisation can at any time be converted back to any other SSSOM format (e.g. SSSOM/TSV), or if it is only intended to be used by “generic”, non-SSSOM-aware RDF applications. Depending on that intended usage (if it is known), implementations may adopt slightly different behaviours as described in the following subsections. ### Serialisations of identifiers If the serialisation is intended to be convertible back to another SSSOM format (especially the SSSOM/TSV format), implementations MUST declare all the prefixes found in the CURIE map and SHOULD serialise all identifiers as CURIEs using said declared prefixes. > Non-normative explanation > > This is because, if all identifiers are serialised as full-length IRIs, then > even if the RDF file includes prefix declarations, they may be stripped away > by a RDF reader, since they are not needed. And without those prefix > declarations, it would not be possible to serialise the set back as a > SSSOM/TSV file (remember that the SSSOM/TSV format _requires_ that identifiers > be serialised as CURIEs). Conversely, if the ability to convert the RDF file back to another SSSOM format is not required, implementations can freely decide whether to serialise identifiers as IRIs or CURIEs (assuming the concrete RDF syntax allows that of course). ### Extension definitions Extension definitions MAY be omitted if the RDF file is only intended to be used by RDF applications. Conversely, they SHOULD be included if the set is intended to be convertible back to another SSSOM format. > Non-normative explanation > > The whole point of an extension definition in SSSOM is to provide (1) a > property that confers some meaning to the extension, and (2) the type of the > expected values. In RDF, as described [above](#extension-slots), those two > bits of information are already contained in the triple that represents the > extension slot, so there is no need for an additional definition. > > But the extension definition also provides the `slot_name` which is used to > represent the extension slot in other formats (especially SSSOM/TSV), so if > conversion back to other SSSOM formats is required, ensuring that the > extension definitions are present in the RDF serialisation is helpful. ### Propagation and condensation Propagatable slots can be represented in RDF indifferently in their propagated or condensed form, following the [normal rules](spec-model.md##propagation-of-mapping-set-slots) for propagation and condensation. But if the RDF file is intended to be used by generic, non-SSSOM-aware RDF applications, then implementations SHOULD serialise propagatable slots in their propagated form. > Non-normative explanation > > Propagation is a SSSOM-specific concept. If a RDF application is provided with > a RDF file representing a set with condensed slots, the application will not > know to propagate the condensed slots at the set level down to the level of > the individual mappings, which will result in the application having an > incomplete view of the mappings. ### Representation of mappings as “direct triples” For every single mapping record in a set, implementations MAY _additionally_ inject a single triple of the form: ```ttl ?subject_id ?predicate_id ?object_id . ``` If so, that behaviour MUST be optional. When that behaviour is enabled, implementations SHOULD NOT inject such triples in the following cases: - when the record represents a literal mapping (that is, `subject_type` or `object_type` – or both – is set to `rdfs literal`); - when the record represents a negated mapping (that is, `predicate_modifier` is set to `Not`); - when the record represents an absence of match (that is, `subject_id` or `object_id` – or both – is set to `sssom:NoTermFound`). In any case, a SSSOM/RDF reader MUST NOT expect the presence of such triples, and if they are present MUST NOT use them to construct mapping records. > Non-normative explanations > > Such “direct triples” are merely a convenience for downstream RDF > applications, allowing them to find a direct link (as a single triple) between > the subject and the object of a mapping, without having to construct such a > link by following the `owl:annotatedSource`, `owl:annotatedProperty`, and > `owl:annotatedTarget` triples. > > It is recommended not to inject such direct triples for literal mapping > records, even if they do have a `subject_id` and an `object_id`, because by > definition the subject and/or the object of such records is not an > identifiable semantic entity and has no business being represented in a RDF > graph. > > It is recommended not to inject such direct triples for negated mapping > records because they would seem to convey a meaning that is the exact opposite > of what the records mean. > > It is recommended not to inject such direct triples for no-match mapping > records since they do not represent a real mapping. ## Compatibility with pre-standard RDF representations The present specification of the SSSOM/RDF format differs slightly from what several implementations of SSSOM have been producing before the format was formally specified. In the name of backward compatibility, implementations MAY support the alternative rules described in the following subsections when deserialising from RDF. Implementations MUST NOT follow these rules when serialising to RDF. ### Representation of slots typed as `sssom:NonRelativeURI` Implementations MAY accept a value represented as a `xsd:anyURI` literal. For example, implementations MAY accept ```ttl ?mapping sssom:mapping_provider "https://www.ohdsi.org/"^^xsd:anyURI . ``` as an alternative to ```ttl ?mapping sssom:mapping_provider . ``` ### Representation of slots typed as an enumeration Implementations MAY accept a value represented as a string literal, even if the value is defined in the LinkML model as having an associated `meaning` property. For example, implementations MAY accept ```ttl ?mapping sssom:predicate_modifier "Not"^^xsd:string . ``` as an alternative to ```ttl ?mapping sssom:predicate_modifier sssom:NegatedPredicate . ``` ### Representation of a `MappingSet` object Implementations MAY accept a `MappingSet` object represented as a blank node, with the `mapping_set_id` slot being represented as any other slot. For example, instead of ```ttl a sssom:MappingSet . ``` implementations MAY accept ```ttl [] a sssom:MappingSet ; sssom:mapping_set_id . ``` or even (by also applying the alternative rule regarding the representation of slots typed as `sssom:NonRelativeURI`) ```ttl [] a sssom:MappingSet ; sssom:mapping_set_id "https://example.org/myset"^^xsd:anyURI . ``` ## Examples > This section is non-normative. Considering the following set in the SSSOM/TSV format: ``` #curie_map: # EXT: https://example.org/properties/ # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.inc/food/ # ORCID: https://orcid.org/ #mapping_set_id: https://example.org/sample-set #mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data. #license: https://creativecommons.org/licenses/by/4.0/ #mapping_date: 2025-07-14 #extension_definitions: # - slot_name: ext_fooable # property: EXT:isFooable # type_hint: xsd:boolean subject_id subject_label predicate_id object_id object_label mapping_justification author_id confidence ext_fooable KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration ORCID:0000-0002-7356-1779 0.95 true KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration ORCID:0000-0002-7356-1779 1 false ``` A valid serialisation of that set in RDF/Turtle would be: ```ttl @prefix EXT: . @prefix FOODON: . @prefix KF_FOOD: . @prefix ORCID: . @prefix dcterms: . @prefix owl: . @prefix pav: . @prefix semapv: . @prefix skos: . @prefix sssom: . @prefix xsd: . a sssom:MappingSet; dcterms:description "Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data."; dcterms:license ; sssom:extension_definitions [ sssom:property EXT:isFooable; sssom:slot_name "ext_fooable"; sssom:type_hint xsd:boolean ]; sssom:mappings [ a owl:Axiom; pav:authoredBy ORCID:0000-0002-7356-1779; dcterms:created "2025-07-14"^^xsd:date; owl:annotatedProperty skos:exactMatch; owl:annotatedSource KF_FOOD:F001; owl:annotatedTarget FOODON:00002473; EXT:isFooable true; sssom:confidence 9.5E-1; sssom:mapping_justification semapv:ManualMappingCuration; sssom:object_label "apple (whole)"; sssom:subject_label "apple" ], [ a owl:Axiom; pav:authoredBy ORCID:0000-0002-7356-1779; dcterms:created "2025-07-14"^^xsd:date; owl:annotatedProperty skos:exactMatch; owl:annotatedSource KF_FOOD:F002; owl:annotatedTarget FOODON:00003348; EXT:isFooable false; sssom:confidence 1.0E0; sssom:mapping_justification semapv:ManualMappingCuration; sssom:object_label "Gala apple (whole)"; sssom:subject_label "gala" ] . ``` Note that the two `Mapping` objects are represented as blank nodes, since the original set does not contain any `record_id` slot. Note also that (1) identifiers are serialised as CURIEs whenever possible, and (2) the definition for the `EXT:isFooable` extension is included. This means that the set can be fully converted back to SSSOM/TSV without any loss of information. ================================================ FILE: src/docs/spec-formats-tsv.md ================================================ # The SSSOM/TSV serialisation format The SSSOM/TSV format is intended as the main format for exchanging SSSOM mapping set objects. The RECOMMENDED filename extension for a SSSOM/TSV file is `.sssom.tsv`, but SSSOM/TSV parsers MUST accept SSSOM/TSV files regardless of their extension. ## Structure A SSSOM/TSV file contains one, and only one, mapping set object. It is made of two different parts: * the _metadata block_, which contains essentially all the slots of the [`MappingSet` class](MappingSet.md) except the `mappings` slot; * the _mappings block_ (also called the _TSV section_), which contains the individual mappings. A SSSOM/TSV file MUST NOT contain anything other than those two blocks. ### Metadata block The metadata block is written as the [YAML 1.2](https://yaml.org/spec/1.2.2/) serialisation of the `MappingSet` object, except that the `mappings` slot is _not_ included (since it contains the mappings, that are serialised in the mappings block instead). The metadata block MUST appear at the beginning of the file. Every line of the block MUST be preceded by a `#` character; the `#` character MAY be followed by one or several space characters (U+0020) before the YAML content – if so, every line MUST have the same number of space characters. The metadata block ends with the first line that does not begin with a `#` character, which marks the beginning of the mappings block. The metadata block SHOULD only contain the slots that do have a value. SSSOM/TSV writers SHOULD skip slots with no value when serialising the mapping set object. #### Multi-valued slots with a single value As an exception to the standard YAML rules regarding the serialisation of sequences, a multi-valued slot that happens to contain a single value MAY be serialised as a scalar value rather than as sequence containing only one item. For example, a `creator_id` slot with the single value `ORCID:1111-2222-3333-4444` MAY be serialised as ```yaml creator_id: "ORCID:1111-2222-3333-4444" ``` This is, strictly speaking, invalid according the YAML specification; the correct serialisation would be either ```yaml creator_id: [ "ORCID:1111-2222-3333-4444" ] ``` or ```yaml creator_id: - "ORCID:1111-2222-3333-4444" ``` but the scalar form is frequently found in existing SSSOM/TSV files, so SSSOM/TSV parsers SHOULD accept it. SSSOM/TSV writers SHOULD favour one of the correct YAML serialisations, however. #### Forbidden YAML features The following features of the YAML 1.2 specification MUST NOT be used within the metadata block: * YAML directives ([YAML 1.2 §6.8.1](https://yaml.org/spec/1.2.2/#681-yaml-directives)); * TAG directives ([YAML 1.2 §6.8.2](https://yaml.org/spec/1.2.2/#682-tag-directives)); * Node tags ([YAML 1.2 §6.9.1](https://yaml.org/spec/1.2.2/#691-node-tags)); * Node anchors ([YAML 1.2 §6.9.2](https://yaml.org/spec/1.2.2/#692-node-anchors)); * Alias nodes ([YAML 1.2 §7.1](https://yaml.org/spec/1.2.2/#71-alias-nodes)). SSSOM/TSV writers MUST NOT generate any of those when writing the metadata block. The expected behaviour of SSSOM/TSV parsers upon encountering them is unspecified. ### Mappings block The mappings block contains the mappings, serialised as a matrix where each line represents an individual mapping and each column (separated by tab characters, U+0009) represents one of the slots of the [`Mapping` class](Mapping.md). The mappings block MUST follow immediately the metadata block within a SSSOM/TSV file. It starts with a header line containing the column names, which are the names of the slots in the `Mapping` class. There SHOULD be no empty columns. If none of the mappings in a set has a value for a given slot, that slot SHOULD be skipped when writing the header line and the individual mappings. #### Quoting Within the mappings block, the following quoting rules, adapted from [RFC 4180](https://datatracker.ietf.org/doc/html/rfc4180), apply: 1. Any value MAY be enclosed in double quotes (`"`). 2. Values containing line breaks, double quotes, or tabs (U+0009) MUST be enclosed in double quotes. 3. When a value is enclosed in double quotes, a double quote appearing within the value MUST be escaped by preceding it with another double quote. SSSOM/TSV parsers MUST strip any enclosing double quotes and escaping double quotes before passing the parsed objects to the application code. #### Multi-valued slots Multi-valued slots MUST be serialised as a list of values separated by `|` characters. If a value within a multi-valued slot contains a `|` (“pipe”) character, that MUST be escaped by prepending a `\` (“backslash”) character in front of it. Likewise, if a value contains a `\` character, it MUST be escaped by prepending another `\` character in front of it. Conversely, SSSOM/TSV parsers MUST interpret a `\|` sequence as a `|` character that is part of the current value, not as a value separator. Likewise, a `\\` sequence MUST be interpreted as a single `\` character. Any other occurrence of a `\` character (i.e. an occurrence that is not followed by a `|` character or another `\` character) MUST be interpreted as a normal `\` character. Both `\|` and `\\` sequences MUST be interpreted in the order in which they appear in the value (for example, a `\\|` MUST be read as an escaped `\` character followed by a non-escaped `|` character). The quoting rules described in the previous section apply to the entire `|`-separated list of values. ## External metadata mode The metadata block MAY be stored in a separate file from the TSV section, instead of preceding it in the same file as described above. This is called the _external metadata mode_ (by contrast, when the two blocks are in the same file, this is called the _embedded metadata mode_). In external mode, the metadata block follows the same rules as described in the [Metadata block](#metadata-block) section above, except that lines MUST NOT start with a `#` character. It is RECOMMENDED that the file containing the metadata block has the same basename as the file containing the TSV section, with a `.sssom.yml` extension. When an external metadata file is used, the file containing the TSV section MUST NOT contain anything else than the TSV section. That is, the first line of that file MUST be the header line containing the column names. Implementations SHOULD support reading SSSOM/TSV files in external metadata mode; they MAY support writing SSSOM/TSV files in that mode. ## Encoding SSSOM/TSV files MUST be encoded in UTF-8 ([RFC 3629](https://datatracker.ietf.org/doc/html/rfc3629#section-13)). They MUST NOT start with a byte order mark (U+FEFF). This applies to external metadata files as well, when the [external metadata mode](#external-metadata-mode) is used. ## Identifiers All identifiers in a SSSOM/TSV file, that is, all the values of slots typed as [EntityReference](EntityReference.md), MUST be serialised in [CURIE syntax](https://www.w3.org/TR/curie/). SSSOM/TSV parsers SHOULD reject files containing identifiers serialised as IRIs. As stated in the description of the model ([Identifiers section](spec-model.md#identifiers)), all prefix names used in CURIEs MUST be declared in the `curie_map` slot of the mapping set object, unless the prefix is a “built-in” prefix (in which case it MAY be omitted). SSSOM/TSV parsers MUST reject a file with undeclared, non-built-in prefix names. A SSSOM/TSV writer SHOULD refuse to serialise a mapping set that contains IRIs that cannot be contracted into CURIEs because there is no suitable prefix declaration in its CURIE map. The use of a custom, ad-hoc logic to infer a possible prefix name where none has been provided (e.g., “if the IRI ends with a `ZZZ_NNNNNNN` pattern, turn it into a `ZZZ:NNNNNNN` CURIE”) is strongly discouraged. ## Non-standard slots If an implementation does not support [non-standard slots](spec-model.md#non-standard-slots), then: * a SSSOM/TSV reader MUST discard any unknown top-level YAML key in the metadata block, and any unknown TSV column in the TSV section; * a SSSOM/TSV writer MUST NOT write any unknown top-level YAML key in the metadata block, or any unknown TSV column in the TSV section. ### Support for defined extensions This section applies to implementations that supports defined extensions. A SSSOM/TSV reader MUST check the validity of the extension definitions listed in the `extension_definitions` slot in the YAML metadata block: * definitions with no `slot_name`, or with a `slot_name` that is not a XML non-colonized name, MUST be ignored; * definitions with any unexpected content (e.g. other keys than just `slot_name`, `property`, and `type_hint`) MUST be ignored; * the `property` and `type_hint` values for a given definition, if present, MUST be CURIEs and MUST be resolvable using the mapping set’s `curie_map`, otherwise the definition MUST be ignored. A SSSOM/TSV reader MUST, upon encountering a non-standard YAML key in the metadata block or an unknown TSV column, check that the name of the key or of the column matches the `slot_name` of one of the extension definitions listed in the mapping set’s `extension_definitions` slot. If there is no match, the non-standard slot MUST be discarded. Upon encountering a non-standard slot whose corresponding definition has a `type_hint` of `https://w3id.org/linkml/Uriorcurie`, the reader SHOULD check that the value is a CURIE and is resolvable using the mapping set’s `curie_map`. ## Compatibility with previous versions of the specification Implementations MUST support the current version of the specification. However, SSSOM/TSV parsers MAY additionally accept to parse files that were compliant to a previous version. This section provides advice for implementations willing to support older versions. ### Compatibility with pre-1.0 versions #### `match_type` slot Initial versions of this specification defined a `match_type` slot on the `Mapping` class. The slot was intended to describe the kind of match that led to the mapping, and accepted values from a specific enumeration. In SSSOM 0.9.1, this slot was replaced by the `mapping_justification` slot, and the enumeration was replaced by terms from the [SEMAPV vocabulary](https://mapping-commons.github.io/semantic-mapping-vocabulary/). Upon encountering a `match_type` slot, implementations supporting pre-1.0 versions MUST silently transform it into a `mapping_justification` slot and convert the enumeration values using the following table: | `match_type` value | `mapping_justification` value | | ------------------ | ----------------------------- | | Lexical | semapv:LexicalMatching | | Logical | semapv:LogicalMatching | | HumanCurated | semapv:ManualMappingCuration | | Complex | semapv:CompositeMatching | | Unspecified | semapv:UnspecifiedMatching | | SemanticSimilarity | semapv:SemanticSimilarityThresholdMatching | Any other value in the `match_type` slot MUST be treated as an error. If the set contains both `match_type` and `mapping_justification` slots, it is advised to simply ignore the former. #### `match_term_type` slot Initial versions of this specification defined a `match_term_type` slot on the `Mapping` class. The slot was intended to describe what was being matched. In SSSOM 0.9.1, this slot was replaced by two distinct slots called `subject_type` and `object_type` (this notably allowed for the case where the subject and the object are of a different type, something the `match_term_type` slot did not support). Upon encountering a `match_term_type` slot, implementations supporting pre-1.0 versions MUST silently transform it into a pair of `subject_type` and `object_type` slots, both slots having the same value derived from the original value using the following table: | `match_term_type` value | `subject_type` and `object_type` value | | ----------------------- | -------------------------------------- | | ConceptMatch | skos concept | | ClassMatch | owl class | | ObjectPropertyMatch | owl object property | | IndividualMatch | owl named individual | | DataPropertyMatch | owl data property | | TermMatch | rdfs literal | Any other value in the `match_term_type` slot MUST be treated as an error. If the set already contains `subject_type` and `object_type` slots, any `match_term_type` slot can be silently ignored. #### semantic_similarity_score and semantic_similarity_measure Initial versions of this specification defined a `semantic_similarity_score` slot to store the semantic similarity, and a `semantic_similarity_measure` slot to describe how the the semantic similarity is assessed. In SSSOM 1.0, those slots were replaced by more generic `similarity_score` and `similarity_measure` slots. Upon encountering a `semantic_similarity_score` (respectively `semantic_similarity_measure`) slot, implementations supporting pre-1.0 versions MUST silently transform it into a `similarity_score` (respectively `similarity_measure`) slot. No changes on the value of the slot are required. ## Canonical SSSOM/TSV format This section defines a “canonical” variant of the SSSOM/TSV format, which has stricter serialisation rules. The purpose of the canonical SSSOM/TSV format is to minimise differences across SSSOM/TSV files that would be induced by small diverging behaviours between different SSSOM/TSV implementations. The rules in this section apply to SSSOM/TSV writers only. SSSOM/TSV writers SHOULD write files in the canonical format, but SSSOM/TSV readers MUST NOT reject a file solely because it does not follow the canonical rules. ### General rules A canonical SSSOM/TSV writer: * MUST use line breaks made of only the U+000A character (no U+000D, and no U+000D + U+000A sequences); * MUST condense the slots whenever possible, as described in the [Condensation](#condensation) section. ### Rules for the metadata block When writing the metadata block, a canonical SSSOM/TSV writer: * MUST embed the metadata block in the same file as the TSV section (no external metadata); * MUST NOT insert additional space characters between the initial `#` character and the YAML content; * MUST serialise multi-valued slots as YAML “block sequences” ([YAML Specification §8.2.1](https://yaml.org/spec/1.2.2/#821-block-sequences)) – even when the list of values contains only one item; * MUST serialise scalar values in YAML “plain style” ([YAML Specification §7.3.3](https://yaml.org/spec/1.2.2/#733-plain-style)) whenever possible, otherwise in “double-quoted style” ([YAML Specification §7.3.1](https://yaml.org/spec/1.2.2/#731-double-quoted-style)); * MUST serialise the slots in the order they appear in the [“Slots” table](MappingSet.md#slots), in the documentation for the `MappingSet` class; * MUST NOT include in the CURIE map the prefix names that are considered “built-in”; * MUST NOT include in the CURIE map any prefix name that is not used anywhere in the set; * MUST sort the prefix names in the CURIE map in lexicographical order. In addition, if [extension slots](spec-model.md#non-standard-slots) are supported, the writer: * MUST write any extension slot in the mapping set _after_ the standard slots; * MUST sort the extension slots lexicographically on the `property` of their corresponding extension definitions; * MUST sort extension definitions on their `property` value; * MUST not include an extension definition if the corresponding extension is not used anywhere in the set. ### Rules for the mappings block When writing the mappings block, a canonical SSSOM/TSV writer: * MUST quote values only when needed, as per the rules in the [Quoting](#quoting) section; * MUST serialise floating point values with up to three digits as needed after the decimal point, rounding the last digit to the nearest neighbour (rounding up if both neighbours are equidistant); * MUST write the columns in the order the slots appear in the [“Slots” table](Mapping.md#slots), in the documentation for the `Mapping` class; * MUST sort the mappings in lexicographical order on all their slots, in the order the slots appear in the [“Slots” table](Mapping.md#slots). In addition, if [extension slots](spec-model.md#non-standard-slots) are supported, the writer: * MUST write any non-standard column _after_ the standard columns; * MUST sort the non-standard column lexicographically on the `property` of their corresponding extension definitions. ## Examples This section is _non-normative_. A SSSOM/TSV file in embedded metadata mode: ``` #curie_map: # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.inc/food/ # orcid: https://orcid.org/ #mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv #mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data. #license: https://creativecommons.org/licenses/by/4.0/ #mapping_date: 2022-05-02 subject_id subject_label predicate_id object_id object_label mapping_justification author_id confidence comment KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 1 KF_FOOD:F003 pink skos:exactMatch FOODON:00004186 Pink apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 1 ``` The same set in external metadata mode: first the file containing the metadata block: ```yaml curie_map: FOODON: http://purl.obolibrary.org/obo/FOODON_ KF_FOOD: https://kewl-foodie.inc/food/ orcid: https://orcid.org/ mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data. license: https://creativecommons.org/licenses/by/4.0/ mapping_date: 2022-05-02 ``` then the file containing the mappings block: ``` subject_id subject_label predicate_id object_id object_label mapping_justification author_id confidence comment KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 1 KF_FOOD:F003 pink skos:exactMatch FOODON:00004186 Pink apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 1 ``` ### Invalid examples Illegal case 1: the metadata block cannot contains comments that are not part of the metadata. ``` # This is a comment that does not belong here. #curie_map: # HP: "http://purl.obolibrary.org/obo/HP_" # MP: "http://purl.obolibrary.org/obo/MP_" # orcid: "https://orcid.org/" # This is another comment that also does not belong here. #creator_id: # - "orcid:0000-0002-7356-1779" ``` Illegal case 2: there should be no empty lines. ``` #curie_map: # HP: "http://purl.obolibrary.org/obo/HP_" # MP: "http://purl.obolibrary.org/obo/MP_" # orcid: "https://orcid.org/" #creator_id: # - "orcid:0000-0002-7356-1779" ``` ================================================ FILE: src/docs/spec-formats.md ================================================ # SSSOM serialisation formats The SSSOM standard defines the following serialisation formats for storing and exchanging mapping sets: * the [SSSOM/TSV](spec-formats-tsv.md) format; * the [SSSOM/JSON](spec-formats-json.md) format; * the [SSSOM/RDF](spec-formats-rdf.md) format; * and the [OWL/RDF](spec-formats-owl.md) format. Implementations MUST support the SSSOM/TSV format. They MAY support the other formats. ================================================ FILE: src/docs/spec-intro.md ================================================ # Specification of the SSSOM standard This document is the official specification for the SSSOM standard. It is divided in three sections covering the three different components of the standard: * the specification for the [data model](spec-model.md), to manipulate SSSOM mappings and mapping sets in your programs; * the specification for the [serialisation formats](spec-formats.md), to read, write, and exchange SSSOM mapping sets; * the specification for [supporting functions](spec-support.md) to help manipulating SSSOM mappings and mapping sets. All three sections are _normative_. ## Conventions used in this document ### Key words Throughout the specification, the key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “NOT RECOMMENDED”, “MAY”, and “OPTIONAL” are to be interpreted as described in [BCP 14](https://datatracker.ietf.org/doc/html/bcp14) when, and only when, they appear in all capitals, as shown here. ### IRI prefixes Throughout the specification, the following IRI prefix names are used: | Prefix name | IRI prefix | | ----------- | ---------- | | owl | http://www.w3.org/2002/07/owl# | | rdf | http://www.w3.org/1999/02/22-rdf-syntax-ns# | | rdfs | http://www.w3.org/2000/01/rdf-schema# | | semapv | https://w3id.org/semapv/vocab/ | | skos | http://www.w3.org/2004/02/skos/core# | | sssom | https://w3id.org/sssom/ | | xsd | http://www.w3.org/2001/XMLSchema# | | linkml | https://w3id.org/linkml/ | ================================================ FILE: src/docs/spec-model.md ================================================ # The SSSOM data model The SSSOM data model (hereafter “the model”) defines the data structure to represent and manipulate SSSOM concepts. The model is formally described as a [LinkML](https://linkml.io/) schema, from which the [documentation](linkml-index.md) is derived. This section provides an overview of the model and supplementary information that may not be found in the schema (and its derived documentation) itself. Of note, the schema, not this section, is always the authoritative source of truth for all questions pertaining to the model. ## Overview The model consists in a handful of classes, the most important of them being the [`Mapping` class](Mapping.md) and the [`MappingSet` class](MappingSet.md). Any SSSOM implementation MUST support those two classes and all their slots; support for the other classes is OPTIONAL. The `Mapping` class represents an individual mapping. Fundamental slots in that class are: * `subject_id` and `object_id`, referring to the entities being mapped to each other; * `predicate_id`, referring to the relationship between the mapped entities; * `mapping_justification`, which should provide the justification for the mapping. Those slots are mandatory (including the `mapping_justification` slot: the SSSOM standard posits that there can be no mapping without some form of justification) and an implementation MUST NOT allow the creation of a mapping object that does not have a value for any one of them. Other slots are intended to provide further details about a mapping. Those “further details” are sometimes referred to as “mapping metadata”, though the SSSOM standard makes no formal distinction between “data” and “metadata” – there are only “data about a mapping”. The `MappingSet` class represents, well, a set of individual mappings, which are contained in the `mappings` slot (a list of `Mapping` instances). Other slots in that class are intended either to provide further details about the set itself (sometimes referred to as “mapping set metadata”, with the same caveat as above regarding the data/metadata distinction), or to provide common details for all the mappings in the set (see the [Propagation of mapping set slots](#propagation-of-mapping-set-slots) section further below for details). Of note, within a set, a mapping may not necessarily be uniquely identified by the combination of its four mandatory slots (`subject_id`, `predicate_id`, `object_id`, and `mapping_justification`). A set may very well contain several mappings with the same subject, predicate, object, and justification, but that differ on some of the other, complementary slots. ## Identifiers Throughout the model, identifiers to external resources are represented using the custom type [`EntityReference`](EntityReference.md) (based on the LinkML type [`uriorcurie`](https://w3id.org/linkml/Uriorcurie)), which accepts both full-length IRIs and [CURIEs](https://www.w3.org/TR/curie/) as possible identifier formats. (Note however that serialisation formats may mandate the use of one identifier format over the other; for example, the [SSSOM/TSV](spec-formats-tsv.md) format requires the systematic use of CURIEs, whereas the [OWL/RDF](spec-formats-owl.md) format conversely requires the systematic use of IRIs). Whenever the CURIE syntax is used in a mapping set (whether this is by choice of the SSSOM producer, or because it is mandated by the serialisation format), all CURIEs MUST be unambiguously resolvable into corresponding full-length IRIs without requiring any external resources. This means that any prefix name used MUST be properly declared in the set’s `curie_map` slot, which is a dictionary associating a prefix name to an IRI prefix. By exception, prefix names listed in the table found in the [IRI prefixes](spec-intro.md#iri-prefixes) section are considered “built-in”. As such, they MAY be omitted from the `curie_map`. If they are not omitted, they MUST point to the same IRI prefixes as in the aforementioned table. ## Propagatable slots As mentioned briefly above, there are two different types of slots in the `MappingSet` class: * slots that provide information about the set itself; * slots that provide information about all the mappings in the set. The latter are called “propagatable slots”. In the LinkML model, they are marked with a `propagated` annotation whose value is set to `true`. For convenience, here is the current list of propagatable slots: * `curation_rule`, * `curation_rule_text`, * `cardinality_scope`, * `mapping_date`, * `mapping_provider`, * `mapping_tool`, * `mapping_tool_version`, * `object_match_field`, * `object_preprocessing`, * `object_source`, * `object_source_version`, * `object_type`, * `subject_match_field`, * `subject_preprocessing`, * `subject_source`, * `subject_source_version`, * `subject_type`, * `predicate_type`, * `similarity_measure`. When a mapping set object has a value in one of its propagatable slots, this MUST be interpreted as if all mappings within the set had that same value in their corresponding slot. For example, if a set has the value _foo_ in its `mapping_tool` slot, all the mappings in that set MUST be treated as if they had the value _foo_ in their `mapping_tool` slot. This mechanism is intended as a convenience, so that a slot which has the same value for all mappings in a set can be specified only once at the level of the set rather than for each individual mapping. Slots that are not in the above list (“non-propagatable slots”) describe the mapping set itself, not the mappings it contains, even if the slot also exists on the `Mapping` class. For example, the `creator_id` slot, when used in the `MappingSet` class, is intended to refer to the creators of the set, _not_ the creators of the individual mappings (which may be different, and which are listed in the `creator_id` slot of every mapping). ### Propagation “Propagation” is the operation of assigning to individual mapping records in a set the values from the propagatable slots of the set. For any given propagatable slot, propagation is only allowed if none of the individual mapping records already have their own value in that slot. If any record (even only one record) has a value in that slot, then the slot MUST be considered as non-propagatable. Otherwise, to propagate the slot an implementation MUST (1) copy over the value of the propagatable slot on the mapping set to the corresponding slot of every individual mapping records, and (2) remove the propagated value from the mapping set. ### Condensation “Condensation” is the opposite of “propagation”. It is the operation of assigning common values to the propagatable slots of the set, based on the values of these slots on individual mapping records. For any given propagatable slot, condensation is only allowed if (1) all mapping records in the set have the same value for that slot, and (2) the mapping set itself does not already have a value in the slot, unless that value happens to be the same as the value in all records. If those two conditions are met, then to condense the slot an implementation MUST (1) set the value of the slot on the mapping set to the common value of the slot in all mapping records, and (2) remove the condensed value from all the mapping records. ### When to perform propagation and condensation Implementations SHOULD support propagation and condensation. The two features MUST NOT be dissociated; that is, an implementation that supports propagation MUST also support condensation, and the other way round. Unless specified otherwise in the specification for the [SSSOM serialisation formats](spec-formats.md), if an implementation supports propagation and condensation, then: * propagation SHOULD be performed by a SSSOM parser before passing the parsed objects to the application code; * condensation SHOULD be performed by a SSSOM writer prior to writing the set into a file, however that behaviour MUST be deactivatable. ## Allowed and common mapping predicates Implementations MUST accept any arbitrary predicate in the `predicate_id` slot. The following mapping predicates are considered common, and implementations MAY encourage users to use them: | Predicate | Description | | --------- | ----------- | | owl:sameAs | The subject and the object are instances (OWL individuals), and the two instances are the same. | | owl:equivalentClass | The subject and the object are OWL classes, and the two classes are the same. | | owl:equivalentProperty | The subject and the object are OWL object, data, or annotation properties, and the two properties are the same. | | rdfs:subClassOf | The subject and the object are OWL classes, and the subject is a subclass of the object. | | rdfs:subPropertyOf | The subject and the object are OWL object, data, or annotation properties, and the subject is a subproperty of the object. | | skos:relatedMatch | The subject and the object are associated in some unspecified way. | | skos:closeMatch | The subject and the object are sufficiently similar that they can be used interchangeably in some information retrieval applications. | | skos:exactMatch | The subject and the object can, with a high degree of confidence, be used interchangeably across a wide range of information retrieval applications. | | skos:narrowMatch | The object is a narrower concept than the subject. | | skos:broadMatch | The object is a broader concept than the subject. | | oboInOwl:hasDbXref | Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go). | | rdfs:seeAlso | The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information. | In addition, predicates from the following sources MAY also be encouraged: * any relation from the [Relation Ontology (RO)](https://obofoundry.org/ontology/ro.html); * any relation under [skos:mappingRelation](http://www.w3.org/2004/02/skos/core#mappingRelation) in the [Semantic Mapping Vocabulary](https://mapping-commons.github.io/semantic-mapping-vocabulary/). ## Literal mappings The SSSOM model is primarily intended to represent mappings between semantic entities. However, it may also be used to represent mappings where at least one side is a literal string that does not have an identifier of its own. Any such mapping is henceforth called a _literal mapping_. To represent a mapping whose subject (resp. object) is a literal: * the `subject_type` (resp. `object_type`) slot MUST be set to `rdfs literal`; * the `subject_label` (resp. `object_label`) slot MUST be set to the literal itself; * the `subject_id` (resp. `object_id`) slot MAY be left empty. The last point is an exception to the normal rules about required slots, which state that a mapping must always have a `subject_id` and an `object_id`. Implementations MUST accept a mapping without a `subject_id` (resp. `object_id`) _if and only if_ the `subject_type` (resp. `object_type`) slot is set to `rdfs literal`. All other slots in the `Mapping` class may be used normally in a literal mapping, with the same meaning as for a non-literal mapping. When computing the cardinality of mappings in a set (e.g. to set the value of the `mapping_cardinality` slot), if the mapping has a literal subject (resp. object), then the `subject_label` (resp. `object_label`) slot must be used for determining the number of occurrences of the subject (resp. object) in the set. ## Representing unmapped entities The special value `sssom:NoTermFound` MAY be used as the `object_id` of a mapping to explicitly state that the subject of said mapping cannot be mapped to any entity in the domain represented by the `object_source` slot. Likewise, the `sssom:NoTermFound` value MAY be used as the `subject_id` of a mapping to state that the object of said mapping cannot be mapped to any entity in the domain represented by the `subject_source` slot. When that special value is used as the `subject_id` (respectively `object_id`), the `subject_source` (respectively `object_source`) slot SHOULD be defined. The `sssom:NoTermFound` value MUST NOT be used in any other slot than `subject_id` or `object_id`. The meaning of the NOT predicate modifier in a mapping that refers to `sssom:NoTermFound` is unspecified. When computing cardinality values (to fill the `mapping_cardinality` slot): (1) a mapping record with a `object_id` (respectively `subject_id`) of `sssom:NoTermFound` MUST be assigned a cardinality value of `1:0` (respectively `0:1`), regardless of any other record; (2) a mapping record with both the `subject_id` and the `object_id` set to `sssom:NoTermFound` MUST be assigned a cardinality value of `0:0`, regardless of any other record; (3) such records MUST be ignored when computing the cardinality of other records. ## Mapping cardinality and cardinality scope The `mapping_cardinality` slot is somewhat special in that its value is only meaningful within a given context, or “scope”: a mapping record in itself does not have any cardinality – it only has one when it is part of a larger set of records. Consider the following three records (set metadata, and in particular prefix declarations, have been omitted for brevity): | `subject_id` | `predicate_id` | `object_id` | `object_source` | | -------------- | ---------------- | ------------ | --------------- | | UBERON:0000011 | skos:broadMatch | VHOG:0000755 | obo:VHOG | | UBERON:0000011 | skos:narrowMatch | EHDAA:4655 | obo:EHDAA | | UBERON:0000011 | skos:narrowMatch | NCIT:C12764 | obo:NCIT | Within that particular set, all three records have a cardinality of `1:n` (one subject, UBERON:0000011, mapped to many objects). But cardinality can also be computed on smaller subsets. For example: * if we are only interested in records that have the same predicate, then the first record has a cardinality of `1:1` (UBERON:0000011 is mapped to only one object through a `skos:broadMatch` predicate), while the other two still have a cardinality of `1:n` (UBERON:0000011 is mapped to two different objects through a `skos:narrowMatch` predicate); * if we are only interested in records where the objects are from the same source, then all three records have a cardinality of `1:1` (UBERON:0000011 is mapped to only one object in each of the three vocabularies VHOG, EHDAA, and NCIT). It is left to users and downstream applications of SSSOM to decide which type of cardinality (relative to the entire set or relative to any of the many possible subsets) will be the most useful to them. The `cardinality_scope` slot is intended to allow them to specify which cardinality they use. When computing cardinality values: * if the cardinality is computed on the entire set, the `cardinality_scope` slot MUST be left empty (or absent); * if the cardinality is computed on a subset, the `cardinality_scope` slot MUST be filled with the list of slots that are used to define the subset. ## Non-standard slots Implementations are only REQUIRED to support the standard metadata slots defined in the SSSOM LinkML schema. However, implementations MAY support the use of supplementary, non-standard slots (hereafter called _extension slots_ or simply _extensions_). There are two types of extension slots: _defined_ extension slots and _undefined_ extension slots. ### Defined extensions Defined extensions are non-standard slots that are explicitly declared (or, _defined_) before being used. Implementations SHOULD support the use of defined extensions. Extensions are defined in the `extension_definition` slot of the `MappingSet` object. Each definition is comprised of three elements: * the name of the slot, as it will appear when used in a mapping set (`slot_name`); * a property intended to specify the meaning of the slot (`property`); * the type of values expected by the slot (`type_hint`). A definition MUST have at least a `slot_name`. The name MUST be a XML “non-colonized name” (“NCName”, see [Namespaces in XML, §2](https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName)). The name MUST NOT match the name of an existing standard slot. To avoid any conflicy with a future version of the SSSOM specification (which could introduce new standard slot names), implementations are strongly encouraged to craft extension slot names that start with the `ext_` prefix. No new standard slot with a name starting with `ext_` will ever be introduced in any future version of the standard. (This is an advice for SSSOM producers only; SSSOM consumers MUST NOT reject an extension slot solely on the basis that its name does not start with `ext`.) A definition SHOULD have a `property`. If it does not, implementations MUST automatically construct a default property by concatenating the prefix `http://sssom.invalid/` with the name of the extension. The slot name and the property MUST be unique to each definition. No two definitions can share the same name and/or the same property. A definition MAY have a `type_hint`. If it does not, a default type of `http://www.w3.org/2001/XMLSchema#string` is assumed. Once defined, an extension slot may be used as a supplementary slot in either the `Mapping` class or the `MappingSet` class (or both), as if it was a normal, standard slot. How those slots are represented internally and provided to client code is left at the discretion of the implementations. ### Undefined extensions Undefined extensions are non-standard slots that are not explicitly defined as described in the previous section. Implementations MAY support undefined extensions. Upon encountering a non-standard slot that is not a defined extension, an implementation that supports undefined extensions MUST behave as if the slot had been defined with: * a `property` constructed by catenating the prefix `http://sssom.invalid/` to the name of the slot; * a `type_hint` of `http://www.w3.org/2001/XMLSchema#string`. ### Restrictions on the values of extension slots #### General restrictions The following restrictions apply to all extension slots, regardless of whether they are defined or undefined. Each mapping set and each mapping can have at most _one_ value for each extension slot. The expected behaviour upon encountering a repeated extension slot is unspecified. An extension value MUST be either a string or an instance of a simple data type such as a numerical value (integer or floating point), a boolean value, or a date or datetime value. In particular, composite data structures (e.g. lists or dictionaries) MUST NOT be used as extension values. It is always possible to use arbitrarily complex values by encoding them as literal strings. However, how complex values would be encoded is out of scope of this specification; implementations MUST treat such values as opaque strings. #### Further restrictions for typed defined extensions If a defined extension slot has a `type_hint` other than `http://www.w3.org/2001/XMLSchema#string`, implementations MAY enforce further constraints on extension values based on the type hint, according to the following table: | Type hint | Constraints | | --------- | ----------- | | http://www.w3.org/2001/XMLSchema#integer | Implementations MAY check that the value is an integer | | http://www.w3.org/2001/XMLSchema#double | Implementations MAY check that the value is a floating number | | http://www.w3.org/2001/XMLSchema#boolean | Implementations MAY check that the value is either `true` or `false` | | http://www.w3.org/2001/XMLSchema#date | Implementations MAY check that the value is a date in the ISO 8601 format (`yyyy-mm-dd`) | | http://www.w3.org/2001/XMLSchema#datetime | Implementations MAY check that the value is a date and time value in the ISO 8601 format (`yyyy-mm-ddThh:mm:ssTZ`) | Implementations MAY decide to recognise more types and to enforce type-specific constraints. For example, an implementation could recognise the type `http://www.w3.org/2001/XMLSchema#negativeInteger` and check that the value starts with a minus sign. ## Versioning Starting from version 1.1 of the specification, the `MappingSet` class has an optional slot named `sssom_version` which indicates the version of the specification that the set declares itself to be compliant with. ### Versioning rules The SSSOM specification mostly follows the [Semantic Versioning principles](https://semver.org/), but only version numbers with two components: a _major_ number _X_ and a _minor_ number _Y_, expressed as `X.Y`. A set that is compliant with a minor version _X.Y_ is also compliant with any minor version _X.Y+n_, for any value of _n_. The opposite is not true: a set compliant with a minor version _X.Y_ may not necessarily be compliant with a minor version _X.Y-n_. A set that is compliant with a major version _X_ may not be compliant with any other major version _X+n_ or _X-n_. Therefore, an implementation that is itself compliant with version _X.Y_ SHOULD always accept a set compliant with any version _X.Y-n_. It MAY reject outright a set compliant with any version _X.Y+n_ (more recent minor version), _X-n_ (older major version), or _X+n_ (more recent major version). In other words, the SSSOM specification guarantees backwards compatibility between two versions (in that a set compliant with an older version can be used with an implementation compliant with a newer version) only insofar as only the _minor_ version has changed. ### Using the `sssom_version` slot When reading a SSSOM set: (A) If the set contains a `sssom_version` slot, implementations SHOULD check whether they recognize the indicated version as a supported version according to the rules in the previous section; if they don’t, they MAY reject the set outright. (B) If the set does not contain a `sssom_version` slot, it MUST be assumed to be compliant with version 1.0. When generating a SSSOM mapping set: (A) If the set uses slots or enum values that were added in more recent versions than 1.0, then the `sssom_version` slot MUST be set to the lowest version that defines all the slots effectively used. (B) If the set only uses slots or values that already existed in version 1.0, then the set is effectively compliant with said version 1.0 and the `sssom_version` slot MAY be omitted entirely. Note that, if the `sssom_version` slot is _not_ omitted, then it MUST be set to `1.1`, since that slot itself has been added in version 1.1. It follows that a `sssom_version=1.0` slot (a set that would declare itself to be compliant with version 1.0) is self-contradictory. ### Model changes across versions For all slots that were added to the specification after version 1.0, the LinkML model contains an `added_in` annotation that indicates the exact version in which the slot was introduced. Not all changes can be annotated thusly in the LinkML model, though. For changes other than the complete addition of a new slot, implementation can refer to the following subsections. #### Model changes in version 1.1 * The `similarity_measure` slot, which previously only existed on the `Mapping` class, has been added to the `MappingSet` class. * The value `composed entity expression` has been added to the `EntityType` enumeration. * The type of the `see_also` slot has been changed to `sssom:NonRelativeURI`. When parsing a SSSOM 1.0 set, implementations SHOULD accept arbitrary string values in that slot. * All slots that were typed as `xsd:anyURI` have been re-typed as `sssom:NonRelativeURI`. When parsing a SSSOM 1.0 set, implementations SHOULD accept relative URI values in those slots. * The `curation_rule` and `curation_rule_text` slots which previously only existed on the `Mapping` class, have been added to the `MappingSet` class. Both slots have now been typed [propagatable](#propagation-of-mapping-set-slots). * A new value `0:0` has been added to the `mapping_cardinality_enum`. ================================================ FILE: src/docs/spec-support-hashing.md ================================================ # Hashing a SSSOM mapping record SSSOM implementations SHOULD provide a function to compute a hash on a SSSOM mapping record. That function is hereafter called “the SSSOM hashing function” and defined below. ## Rationale and purpose The SSSOM hashing function defined here allows to compute a value derived from a mapping record in such a way that, if two mapping records yield the same value, the records are highly likely to be identical. The function is intended for **interoperability** between SSSOM implementations. Its point is to ensure that one can always compute the same hash for the same mapping record regardless of which SSSOM implementation is used. When an implementation needs to compute a record hash **for its own internal purpose** (for example, to store records into a hash table), it may use whatever method is best suited without regard for the SSSOM hashing function. ## Hashing procedure The general principle of the SSSOM hashing function is to compute a hexadecimal-encoded FNV64 hash of a canonical S-expression representing the mapping record. ### Step 0: Propagate all condensed slots If the mapping set the mapping record to hash belongs to contains condensed slots, they MUST be propagated to the mapping record [as per the standard rules](spec-model.md#propagation). ### Step 1: Turn the mapping record into a canonical S-expression This step creates a representation of the mapping record into a canonical S-expression as per [RFC 9804](https://www.rfc-editor.org/rfc/rfc9804#name-canonical-representation). The S-expression MUST be assembled as follows: 1. Start with `(7:mapping(`. 2. Iterate over all slots of the `Mapping` class, in the order in which they are [listed](../Mapping/#slots) in the LinkML model. Exclude the `record_id` slot and the `mapping_cardinality` slot. For all other slots: 1. If the slot has no value for the mapping record to hash, skip to next slot. 2. Append to the S-expression `(N:SLOTNAME`, where _SLOTNAME_ is the LinkML name for the slot and _N_ is the length of the slot name (so, for example, `(10:subject_id`, `(9:author_id`, `(10:confidence`, etc.). 3. If the slot is defined as a multi-valued slot (and even if it has only one value in the mapping record to hash): 1. Append `(`. 2. Sort the list of values in lexicographical order and iterate over the sorted values. For each value _V_, append `N:V`, where _N_ is the length of _V_. 3. Append `)`. 4. If the slot is typed as a floating point number (e.g. `confidence`), convert the value into a string _V_ according to the rules set forth in the section [Formatting floating-point values](#formatting-floating-point-values). Then append `N:V`, where _N_ is the length of _V_. 5. If the slot is typed as an enumeration (e.g. `subject_type`), append `N:ENUMVALUE`, where _ENUMVALUE_ is the allowed value in the enumeration as specified in the LinkML model, and _N_ is the length of _ENUMVALUE_ (e.g. `9:owl class` for a possible value for the `subject_type` slot). 6. If the slot is typed as a date, append `10:YYYY-MM-DD`, where _YYYY-MM-DD_ is the representation of the value in ISO-8601 format. 7. If the slot is typed as an entity reference, ensure the value is expanded according to the mapping set’s prefix map and append `N:V`, where _V_ is the expanded reference and _N_ is the length of the expanded reference. 8. If the slot is of any other type, append `N:V`, where _V_ is the string value of the slot and _N_ is the length of the string value. 9. Append `)`. 3. If the implementation support [extension slots](spec-model.md#non-standard-slots) and the mapping record does have such slots: 1. Append `(10:extensions(`. 2. Sort extension values by their properties in lexicographical order. 3. For each extension value: 1. Append `(N:PROP`, where _PROP_ is the property identifying the extension and _N_ is the length of the property. 2. Use the table below to transform the extension value into a string _V_ based on the declared type of the extension. 3. Append `N:V)`, where _N_ is the length of the string value _V_. 4. Append `))`. 4. Append `))`. Converting extension values to string: | Declared extension type | Conversion to string | | ----------------------- | ---------------------------------------------------------------------------------------------------------- | | `xsd:string` | No conversion needed, use the value directly | | `xsd:integer` | Base 10 representation of the integer value | | `xsd:double` | Apply the rules from the section [Formatting floating-point values](#formatting-floating-point-values) | | `xsd:boolean` | `true` or `false` | | `xsd:date` | ISO-8601 representation: `YYYY-MM-DD` | | `xsd:datetime` | ISO-8601 representation: `YYYY-MM-DDThh:mm:ssTZ` where `TZ` is the zone offset (e.g. `+01:00` or `-06:30`) | | `xsd:anyURI` | No conversion needed, use the value directly | | `linkml:uriOrCurie` | Expand the value according to the set’s prefix map | | any other type | unspecified | ### Step 2: Compute the FNV64 hash of the S-expression Encode the S-expression assembled in step 1 into UTF-8 (if it was not already assembled directly in UTF-8). Then hash the array of bytes containing the UTF-8 representation of the S-expression using the 64-bit variant of the FNV-1a hash function as defined in [RFC 9923](https://www.rfc-editor.org/rfc/rfc9923.html). ### Step 3: Encode the hash into a hexadecimal string Encode the hash computed in step 2 into uppercase hexadecimal, also known as Base16 encoding as defined in [RFC 4648](https://datatracker.ietf.org/doc/html/rfc4648#section-8). ### Formatting floating-point values Whenever a floating-point number needs to be appended to the canonical S-expression built in Step 2 above, the following specific rules apply: 1. The non-fractional part MUST NOT be omitted, even it is zero. For example, `0.7` MUST NOT be written as `.7`. 2. The fractional part MUST be truncated to _up to_ 3 digits _as needed_. If the fractional part can be written in less than 3 digits, then it MUST NOT be right-padded with zeros. For example, `0.7` MUST NOT be written as `0.700`. 3. If the fractional part needs to be truncated, the value MUST be rounded to the nearest value representable with 3 digits, rounding half away from zero. This corresponds to the _roundTiesToAway_ mode as defined by [IEEE 754-2019 §4.3.1](https://doi.org/10.1109/IEEESTD.2019.8766229). The following table gives some examples of rounding after truncation: | Original value | Canonical representation | | -------------- | ------------------------ | | 0.7832... | 0.783 | | 0.7835... | 0.784 | | 0.7836... | 0.784 | | -0.7832... | -0.783 | | -0.7836... | -0.784 | | -0.7835... | -0.784 | ## Example > This section is not normative. It provides a step-by-step example of how to > apply the above procedure. Given the following mapping set in SSSOM/TSV format: ``` #curie_map: # FBbt: http://purl.obolibrary.org/obo/FBbt_ # UBERON: http://purl.obolibrary.org/obo/UBERON_ # orcid: https://orcid.org/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# subject_id predicate_id object_id mapping_justification creator_id FBbt:00001234 skos:exactMatch UBERON:0005678 semapv:ManualMappingCuration orcid:0000-0000-5678-1234|orcid:0000-0000-1234-5678 ``` Applying step 1 of the above procedure to the only mapping record of that set would yield the following canonical S-expression (**whitespaces added for clarity**, they MUST NOT appear in the actual S-expression): ``` (7:mapping( (10:subject_id44:http://purl.obolibrary.org/obo/FBbt_00001234) (12:predicate_id46:http://www.w3.org/2004/02/skos/core#exactMatch) (9:object_id45:http://purl.obolibrary.org/obo/UBERON_0005678) (21:mapping_justification51:https://w3id.org/semapv/vocab/ManualMappingCuration) (10:creator_id( 37:https://orcid.org/0000-0000-1234-5678 37:https://orcid.org/0000-0000-5678-1234 )) )) ``` Applying the FNV64 hash function to the above S-expression and encoding the resulting bytes in hexadecimal would yield the following final value: `0A442FB005783031`. ## Test vectors > This section is not normative. It provides examples of SSSOM mapping sets > along with the canonical S-expression and the Base16-encoded hash value of the > set’s only record. **Source set:** ``` #curie_map: # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.ince/food/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # wikidata: https://www.wikidata.org/wiki/ #subject_source: KF_FOOD:DB #object_source: wikidata:Q55118395 #object_source_version: http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl subject_id predicate_id object_id mapping_justification confidence mapping_date KF_FOOD:F001 skos:exactMatch FOODON:00002473 semapv:ManualMappingCuration 0.95 2022-05-02 ``` S-expression: ``` (7:mapping((10:subject_id34:https://kewl-foodie.ince/food/F001)(12:predicate_id46:http://www.w3.org/2004/02/skos/core#exactMatch)(9:object_id46:http://purl.obolibrary.org/obo/FOODON_00002473)(21:mapping_justification51:https://w3id.org/semapv/vocab/ManualMappingCuration)(14:subject_source32:https://kewl-foodie.ince/food/DB)(13:object_source39:https://www.wikidata.org/wiki/Q55118395)(21:object_source_version68:http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl)(12:mapping_date10:2022-05-02)(10:confidence4:0.95))) ``` Hash value: ``` 97170EB542E9AE8F ``` **Source set:** ``` #curie_map: # FBbt: http://purl.obolibrary.org/obo/FBbt_ # UBERON: http://purl.obolibrary.org/obo/UBERON_ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# # example: https://example.org/sets/record-id# record_id subject_id predicate_id object_id mapping_justification example:0000001 FBbt:0009124 skos:exactMatch UBERON:0000003 semapv:LexicalMatching ``` S-expression: ``` (7:mapping((10:subject_id43:http://purl.obolibrary.org/obo/FBbt_0009124)(12:predicate_id46:http://www.w3.org/2004/02/skos/core#exactMatch)(9:object_id45:http://purl.obolibrary.org/obo/UBERON_0000003)(21:mapping_justification45:https://w3id.org/semapv/vocab/LexicalMatching))) ``` Hash value: ``` 18F3436E89AA1AA2 ``` **Source set:** ``` #curie_map: # HP: http://purl.obolibrary.org/obo/HP_ # MP: http://purl.obolibrary.org/obo/MP_ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# #mapping_provider: https://w3id.org/sssom/core_team subject_id predicate_id object_id mapping_justification similarity_score HP:0009124 skos:exactMatch MP:0000003 semapv:LexicalSimilarityThresholdMatching 0.8 ``` S-expression: ``` (7:mapping((10:subject_id41:http://purl.obolibrary.org/obo/HP_0009124)(12:predicate_id46:http://www.w3.org/2004/02/skos/core#exactMatch)(9:object_id41:http://purl.obolibrary.org/obo/MP_0000003)(21:mapping_justification64:https://w3id.org/semapv/vocab/LexicalSimilarityThresholdMatching)(16:mapping_provider32:https://w3id.org/sssom/core_team)(16:similarity_score3:0.8))) ``` Hash value: ``` 0D45A2E8C64EBD65 ``` **Source set:** ``` #curie_map: # COMENT: https://example.com/entities/ # EXPROP: https://example.org/properties/ # ORGENT: https://example.org/entities/ # semapv: https://w3id.org/semapv/vocab/ # skos: http://www.w3.org/2004/02/skos/core# #extension_definitions: # - slot_name: ext_bar # property: EXPROP:barProperty # type_hint: xsd:integer # - slot_name: ext_baz # property: EXPROP:bazProperty # type_hint: linkml:Uriorcurie subject_id subject_label predicate_id object_id object_label mapping_justification ext_bar ext_baz ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration 111 ORGENT:BAZ_0001 ``` S-expression: ``` (7:mapping((10:subject_id33:https://example.org/entities/0001)(13:subject_label5:alice)(12:predicate_id46:http://www.w3.org/2004/02/skos/core#closeMatch)(9:object_id33:https://example.com/entities/0011)(12:object_label5:alpha)(21:mapping_justification51:https://w3id.org/semapv/vocab/ManualMappingCuration)(10:extensions((42:https://example.org/properties/barProperty3:111)(42:https://example.org/properties/bazProperty37:https://example.org/entities/BAZ_0001))))) ``` Hash value: ``` 66BD0A57A976A109 ``` ================================================ FILE: src/docs/spec-support.md ================================================ # SSSOM supporting functions This section defines functions and behaviours that SSSOM implementations should support to help users manipulate SSSOM mapping sets. - [Hashing mapping records](spec-support-hashing.md) - [Chaining rules](chaining-rules.md) In addition, SSSOM implementations SHOULD also support the proposed [Mapping Sameness Identifier](https://ts4nfdi.github.io/mapping-sameness-identifier/) standard; that is, implementations SHOULD offer a way to obtain the Mapping Sameness Identifier for a given SSSOM mapping record. ================================================ FILE: src/docs/toolkit.md ================================================ # The SSSOM Toolkit In the following we will give a brief introduction into the SSSOM toolkit. For more detailed documentation please refer to https://mapping-commons.github.io/sssom-py. ## Pre-requisites - Complete the [basic SSSOM tutorial](tutorial.md) - [Install SSSOM toolkit](https://mapping-commons.github.io/sssom-py/installation.html). Alternatively, you can install the [Ontology Development Kit (ODK)](https://github.com/INCATools/ontology-development-kit) and follow the tutorial using its [docker image](https://oboacademy.github.io/obook/howto/odk-setup/). - We are assuming a Unix shell for this tutorial, but most of the principles should apply to the Windows CMD as well. Windows users may prefer to install the ODK (see above). ## Overview SSSOM toolkit (STK), previously known as `sssom-py`, is a set of utility methods for processing SSSOM files, packaged as a Command Line Client (CLI) and a [python package](https://pypi.org/project/sssom/). In the following, we will extract mappings from an ontology an process them with the CLI. The goal is to give a sense of the functionality of the toolkit. Additional and more up-to-date information on usage can be found [here](https://mapping-commons.github.io/sssom-py). ## Table of Contents 1. `parse`: [Extracting mappings from an external source](#parse) 2. `merge`: [Combining mappings from several sources](#merge) 3. `convert`: [Converting an SSSOM mapping table into different formats](#convert) ## Extracting mappings from an external source One key issue developers are faced with is to convert various different mapping formats into a common representation (e.g. SSSOM). The SSSOM toolkit (STK) already implements a number of commonly use mapping formats: 1. [OWL Ontologies](https://en.wikipedia.org/wiki/Web_Ontology_Language) 2. [Alignment API](https://moex.gitlabpages.inria.fr/alignapi/) Format (format used by the Ontology Alignment Evaluation Initiative, OAEI) 3. Parsers for SNOMED mapping format and FHIR Concept Map are [in the making](https://github.com/mapping-commons/sssom-py/pull/207), June 2022. Here we use Uberon, an anatomy ontology in the biomedical domain. ``` wget http://purl.obolibrary.org/obo/uberon/uberon-base.json -O uberon-base.json ``` Feel free to download the file manually if you do not have `wget` installed. Now use `sssom parse` to extract all the mappings provided by the ontology. As there are multiple json based formats that can be parsed, you have to tell `sssom` which format you are using: `--input-format obographs-json`. ``` sssom parse uberon-base.json --input-format obographs-json --output uberon.sssom.tsv ``` From a CLI design perspective we already notice a few things: - `uberon-base.json` is passed to the STK _as an argument_ (without an option like `-i`). This is the case for most _primary inputs_ (mapping tables, source files) throughout the SSSOM client. - The output generated by the above command is large. There seem to be a lot of messages where some URL `does not follow any known prefixes`: ``` WARNING:root:http://dbpedia.org/ontology/AnatomicalStructure does not follow any known prefixes WARNING:root:http://uri.neuinfo.org/nif/nifstd/nlx_subcell_100205 does not follow any known prefixes WARNING:root:http://neurolex.org/wiki/Category:Embryonic_organism does not follow any known prefixes WARNING:root:http://www.informatics.jax.org/cookbook/figures/figure20.shtml does not follow any known prefixes WARNING:root:http://mbe.oxfordjournals.org/content/26/3/613/F1.large.jpg does not follow any known prefixes WARNING:root:http://palaeos.com/vertebrates/glossary/images/450x218xEctocuneiform.gif.pagespeed.ic.kaiuLYQELL.png does not follow any known prefixes WARNING:root:http://palaeos.com/vertebrates/bones/dermal/images/289x311xPalatine1.gif.pagespeed.ic.tglmNBrF4D.png does not follow any known prefixes WARNING:root:http://uri.neuinfo.org/nif/nifstd/nifext_14 does not follow any known prefixes .... ``` Understanding this is important to understand a lot about how SSSOM treats entities in general. ### Why are there so many `does not follow any known prefixes` warnings? CURIEs are a key concept for the representation of SSSOM documents, in particular its table. All fields that constitute a reference to some entity, such as ids (`subject_id`, `object_id`, `predicate_id`), and other fields such as `mapping_justification` are represented in CURIE syntax. The [Semantic Web](https://www.w3.org/standards/semanticweb/) uses URIs (which look more like URLs rather than CURIEs) to refer to entities - there is, however, no standard protocol to translate a URI into a _Compact_ URI (or CURIE). Efforts such as https://bioregistry.io/, https://github.com/prefixcommons or https://identifiers.org/ try to bring a bit of an organisation to prefixes. In particular the former two curate maps between prefixes and URIs. - URI: `http://purl.obolibrary.org/obo/MONDO_0000001` - CURIE: `MONDO:0000001` - PREFIX: `MONDO` - URI expansion: `http://purl.obolibrary.org/obo/MONDO_` Now the problem is that over the years, many very idiosyncratic URIs where used to denote entities in ontologies. While the STK tries to figure out the correct prefixes using https://bioregistry.io/, many times it fails - in these cases, the user _must provide its own prefix map_. Lets create a simple one, and save it as `metadata.yml` (we call it "metadata", because we will add more metadata to it in this tutorial): ``` curie_map: dbpedia: http://dbpedia.org/ontology/ ``` We can now use this _in addition to the default prefix maps_: ``` sssom parse uberon-base.json --input-format obographs-json --metadata metadata.yml --prefix-map-mode merged --output uberon.sssom.tsv ``` ## Combining mappings from several sources ## Converting an SSSOM mapping table into different formats ## Other methods: - cliquesummary - correlations - crosstab - dedupe - diff - dosql - partition - ptable - reconcile-prefixes - rewire - sort - sparql - split - validate _Under construction_. ================================================ FILE: src/docs/training.md ================================================ ## SSSOM Training materials - [Elevator pitch](#elevator) - [Tutorials and Guides](#guides) - [Related tutorials](#related) ### Elevator pitch ### Tutorials and Guides - [Mapping curation with SSSOM](https://oboacademy.github.io/obook/tutorial/sssom-tutorial/) - [Are these two entities the same? A guide.](https://oboacademy.github.io/obook/howto/are-two-entities-the-same/). An important tutorial that explains that it is not directly possible to determine if two things are the same across ontologies, but its still worth doing when explicitly recording the rationale. - [Linking across vocabularies: Semantic Entity Matching](https://oboacademy.github.io/obook/lesson/entity-matching/): Entity matching is the process of establishing a link between an identifier in one semantic space to an identifier in another. There are many cultures of thought around entity matching, including Ontology Matching, Entity Resolution and Entity Linking. ### Related tutorials - [Introduction to processing mappings with SSSOM and sssom-py CLI](https://oboacademy.github.io/obook/tutorial/sssom-toolkit/) - [Introduction to matching with OAK lexmatch](https://oboacademy.github.io/obook/tutorial/lexmatch-tutorial/) - [Curating Semantic Mappings with Biomappings](https://oboacademy.github.io/obook/tutorial/biomappings/) ================================================ FILE: src/docs/tutorial.md ================================================ # Introduction to mapping curation with SSSOM Mappings between entities from ontologies, terminologies and databases are created for many reasons (data integration, knowledge graphs) and maintained in many different ways (automated matching, manual curation). In the following tutorial, we will learn how to curate semantic mappings manually using SSSOM. Knowledge about manual mapping curation is important even in scenarios where most, if not all, of the mapping curation is performed automatically - the basic principles are still the same. ## Pre-requisites We expect the reader of this tutorial to have a basic understanding of the following: - What are ontology classes? What is a database? - What is an (ontology) mapping? - Why do we need to map across ontologies and between databases and ontologies? We do provide a few materials in the [Background](#background) section below that touch on the above concepts, but a detailed discussion is out of scope. ## Table of contents - [Background](#background) - [Ontology alignment](#align) - [What are we mapping?](#what) - [CURIEs, URIs and databases](#curie) - [How to create an SSSOM mapping set from scratch](#scratch) - [Manually curating mapping sets](#scratchstart) - [Automated processing 1: Creating an embedded SSSOM file](#automated1) ## Background As a reminder, a SSSOM mapping comprises three major components: 1. The **mapping** itself, that is, a triple `` that reflects a correspondence of a `subject` entity, for example a class in an ontology, to an `object` entity, for example an identifier in some database, via a semantic mapping `predicate`, such as `skos:exactMatch`. 2. A **mapping justification**, the process or activity that led us to consider the mapping to be correct or reasonable (typical examples: labels match exactly; two classes are logically equivalent; a domain expert determined that two terms reflect the same real world concept). 3. **Provenance metadata**, including information about `author` and `mapping_tool`. In the following, we will give pointers to some useful background materials before we describe how SSSOM mappings are created. ### Ontology alignment/matching Ontology alignment is the process of determining correspondences between ontological concepts. The usage of "alignment", "matching" and "mapping" is fuzzy in practice. From the perspective of SSSOM, alignment usually involves determining _all_ (or a more or less complete set of) correspondences between ontological concepts of two or more source ontologies. The most important resource on the subject is ["Ontology Matching"](https://link.springer.com/book/10.1007/978-3-642-38721-0) by Jérôme Euzenat and Pavel Shvaiko. If you are interested in really diving into the subject, there is no avoiding this book! This 25 minute course unit by the OpenHPI gives a nice overview over the area, which is relevant to all mapping activities: Another useful overview is this one by the Knowledge and Data VU Amsterdam. Especially after minute 12, we learn a bit about the differences of OWL and SKOS. A 10 minute deep-dive into Jerome Euzenat classification of ontology matching techniques can be seen here: ### What are we mapping? In SSSOM we are concerned with mapping _information entities_, i.e. representations of a real world entities. Examples of such entities are: - Classes, Individuals and Properties in an ontology. - Entities in Databases, such as a specific person in a "Person" table of a relational database. - A specific value in the slot of a data model, for example the "UNIVERSITY" constant in the `highest-degree` enumeration for a demographics survey data model. - A specific code from a code system or terminology such as ICD10CM. Information entities represent _real world objects_ such as diseases (e.g. Alzheimer's, Diabetes), kinds of vegetables (Asparagus, Broccoli), concrete instances of vegetables (a specific broccoli that was sold in your local supermarket yesterday). #### What kind of entities can we _not_ map with SSSOM? Some of the limitations of SSSOM are discussed in our [paper](https://arxiv.org/abs/2112.07051). A selection of the most important things that cannot be mapped at the moment: - Compound/complex entities, i.e. entities that are defined by more than one term. For example, we cannot currently map "Raw apple" (subject) to "Apple" and "Raw" (two objects). - Anything that is not an entity, e.g. unit conversion rules (1000mg maps to 1g * 1000) or functions. - Highly contextual entities like "PERSON:1" as they enter the hospital. As a rule of thumb, we can map any entity for which (1) we can provide a single identifier and (2) whose identifier establishes its context (i.e. no further information is needed to understand the meaning of the identifier). Note that _literal values_ are a special case - SSSOM is not designed for mapping literals to entity identifiers, but there are some discussions on how to do this anyways [here](https://github.com/mapping-commons/sssom/issues/81). ### CURIEs, URIs and databases A mapping involves three entities: 1. A `subject` (the entity which is mapped to some other entity) 1. An `object` (the entity the subject is mapped to) 1. A semantic `mapping predicate`, such as "skos:exactMatch" which defines how the subject entity is mapped to the object entity. All three _must_ be referred to by an **identifier in CURIE syntax** ([Compact URI](https://www.w3.org/TR/2010/NOTE-curie-20101216/)) when using the SSSOM table format or JSON, or an IRI (Internationalized Resource Identifier) when you are using the RDF representation of SSSOM. This is necessary to ensure that entities are globally unique and mapping sets are fully interoperable across an organisation and beyond. While these concepts are common practice in the Semantic Web world, they may be less well understood in the database world. In fact, they can be quite awkward: - Your database my use `p9787869` to identify a specific person in a "Person" table of a relational database. - Your data model for a demographics survey uses, among others, the `UNIVERSITY` constant in the `highest-degree` enumeration. To be compliant with SSSOM, such values must be "curified". While this process sounds daunting at first, it is essential: Both the `p9787869` identifier and the `UNIVERSITY` constant may be used in different contexts (different databases or data models) to refer to entirely different entities! While there is no 100% reliable guide for "curification", we usually recommend the following steps: * Choose a globally unique URI prefix which can unambiguously define the context of your entity. For example (1) `http://embl.org/ebi/person/p9787869` to refer to the person in your `Person` table and (2) `http://embl.org/demographics-survey-datamodel/demographics.highest_education#UNIVERSITY`. In an ideal world, these can be de-referenced (i.e. you can look them up in a web-browser), but the important thing is that they are globally unique (and persistent), so that they cannot be confused with, for example, the `UNIVERSITY` code in another data model. * We select a reasonable prefix for the code, for example (1) `embl.ebi.person` and (2) `demographics-survey-datamodel.demographics.highest_education`. Note these do not need to be globally unique anymore. Indeed, you could, if you wanted to, use (much) shorter prefixes. (NOTE: some people disagree with this and strive for globally unique prefixes. In the biomedical domain, for example, we try to coordinate prefixes at http://bioregistry.io/. This is not however, necessary when using SSSOM). * We record the prefixes and their URI prefixes (sometimes called URI expansions) in the `curie_map` of our SSSOM file: ``` curie_map: embl.ebi.person: "http://embl.org/ebi/person/" demographics-survey-datamodel.demographics.highest_education: "http://embl.org/demographics-survey-datamodel/demographics.highest_education#" ``` * Now we can refer to our entities in the SSSOM mapping table like this: (1) `embl.ebi.person:p9787869` and (2) `demographics-survey-datamodel.demographics.highest_education:UNIVERSITY`. This may strike some users as verbose - but the concept of unique identifiers for all information entities is _at the heart of SSSOM_. There is an initial cost to carefully defining namespaces for the various vocabularies and contexts (data model enums, value sets), but the ability to unambiguously refer to an entity will pay of as the organisation grows and data needs to be integrated from a wide variety of sources. _Tangent:_ See [here](https://hl7.org/fhir/conceptmap-example.ttl.html) for an example how [FHIR](http://hl7.org/fhir/), a standard for health care data exchange, published by HL7, deals with this: Rather than using a lot of prefixes, FHIR chooses to have one small namespace for `fhir`, and then having the path to the data model element all the way to its value as the local identifier. ## How to create an SSSOM mapping set from scratch SSSOM mapping sets can be created as part of automated processes, like ontology matchers, or manually by ontology curators. While there is overlap, it makes sense to look at both cases separately. To remind yourself why you should build SSSOM mapping sets in the first place, please refer to [the FAQ](faq.md#why). ### Manually curating mapping sets To gradually improve terminological mapping practices we are proposing a [5-star system for mappings](5star-mappings.md). For the sake of this tutorial, we will focus on producing a [solid 3-Star mapping set](5star-mappings.md) with the following metadata: **Core mapping metadata**: - `subject_id`: The ID of the subject of the mapping - `predicate_id`: The ID of the predicate of the mapping - `object_id`: The ID of the object of the mapping **Mapping justification metadata**: - `mapping_justification`: the process or activity that led us to believe the mapping to be correct or reasonable. **Basic provenance metadata**: - `mapping_date`: The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file. - `author_id`: Identifies the persons or groups responsible for asserting the mappings. Recommended to be a (pipe-separated) list of ORCIDs or otherwise identifying URLs, but any identifying string (such as name and affiliation) is permissible. - `mapping_set_description`: A description of the mapping set, providing context and motivation. - `license`: An identifier for a license description. - `mapping_set_id`: A unique identifier of the mapping set. - `mapping_set_version`: The version of a mapping set. - `subject_source`: URI of source the subject. - `subject_source_version`: The version of the source of the subject. - `object_source`: URI of source the subject. - `object_source_version`: The version of the source of the object. - `confidence`: the level of certainty you have for the mapping to be true (based on the process used to confirm or generate it). **Some convenience metadata** - `subject_label`: The human readable label of the subject. - `object_label`: The human readable label of the object. #### The tutorial scenario You are charged with aligning your organisations (KEWL FOODIE INC) internal database about food and nutrition with [Food Ontology (FOODON)](https://foodon.org/). In your database, you have a table with food items: | ID | LABEL | | --- | ---- | | F001 | apple | | F002 | gala | | F003 | pink | | F004 | braeburn | As a first pass, you are tasked to map the food items (kinds of apples) in your database to classes in the FOODON ontology. #### Getting the tools together To complete this tutorial, we need the following tools: 1. A table editor. In this tutorial we will use [Google Sheets](https://docs.google.com/spreadsheets/u/0/). Manually curating mappings is often done in a collaborative fashion. We like Google Sheets because it allows multiple people to edit the same mapping set at once. 1. OPTIONAL: The [SSSOM toolkit](https://mapping-commons.github.io/sssom-py) installed (requires python 3.9+). #### Creating a first draft of the mappings First create a google sheet with the following columns: | subject_id | subject_label | predicate_id | object_id | object_label | mapping_justification | mapping_date | author_id | subject_source | subject_source_version | object_source | object_source_version | confidence | |------------|---------------|--------------|-----------|--------------|-----------------------|--------------|-----------|----------------|------------------------|---------------|-----------------------|------------| As we are mapping database identifiers, our first step is _curiefy our database identifiers_. Read up in detail on why this is done [here](#curie). We chose to use the following URI prefix for our food database: http://kewl-foodie.com/foods/, with the `KF_FOODS:` prefix (for now, we just document this information in the side, but later, we will add this to our mapping table). Next, we will add all the entities we hope to align to the mapping table above (we removed some columns here for readability, we will get back to these later): | subject_id | subject_label | predicate_id | object_id | object_label | confidence | |--------------|---------------|--------------|-----------|--------------|-----------------------| | KF_FOOD:F001 | apple | | | | | | KF_FOOD:F002 | gala | | | | | | KF_FOOD:F003 | pink | | | | | | KF_FOOD:F004 | braeburn | | | | | While not necessary from a computational perspective, we recommend to document the labels of both the subject and the object to make the mapping table easier to process for human curators. The next step is now to try and identify suitable terms from FOODON to map to. In the biomedical domain, most curators will [search OLS](https://www.ebi.ac.uk/ols/search?q=apple&groupField=iri&start=0&ontology=foodon) or [Ontobee](https://www.ontobee.org/search?ontology=FOODON&keywords=apple&submit=Search+terms), but some more technically advanced users may choose to use [SPARQL over ontobee](https://api.triplydb.com/s/nq_xvl3JQ) or another endpoint: ``` PREFIX rdf: PREFIX rdfs: SELECT * WHERE { ?sub rdfs:label ?obj . FILTER(regex(str(?obj), "apple")) FILTER(STRSTARTS(str(?sub),"http://purl.obolibrary.org/obo/FOODON_")) } ``` A detailed discussion on mapping predicates can be found [here](mapping-predicates.md). ##### Mapping "apple", attempt 1 Our first attempt is to try and map `KF_FOOD:F001` (apple). At the time of writing, a search for the string "apple" just across the labels in FOODON reveals more than 300 results. There are no exact matches for the search string "apple", i.e. there is no entity in FOODON that has the label "apple" exactly. Rather than sifting through the large set of results, we move on to try to map a more specific element first. As FOODON is an ontology, having a mapping to a more specific element (e.g. `gala`) may help us to find an appropriate mapping for the more general concept (e.g. `apple`), which should be hierarchically related to the more specific term. ##### Mapping "gala" Indeed, a [search for "gala"](https://www.ebi.ac.uk/ols/search?q=gala&groupField=iri&start=0&ontology=foodon) reveals one single result: [Gala apple (whole)](https://www.ebi.ac.uk/ols/ontologies/foodon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFOODON_00003348). How do we know if this is a good mapping for our own database entity `gala`? This is a very difficult question, and there is no perfect answer. It is important to remember that mappings should not be judged in terms of "correct" or "wrong", but in terms of "fit for purpose", or, in the case of SSSOM, "fit for most purposes". The following thoughts should cross the curators mind: - There does not seem to be another FOODON class concerned with "Gala". - From the description, "A pome fruit of a Gala apple tree cultivar." it seems like we are indeed talking about a kind of apple. ([The picture in the OLS Term information box also helps.](https://www.ebi.ac.uk/ols/ontologies/foodon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFOODON_00003348)) - A quick email to our product team at KEWL FOODIE INC confirms that indeed, our `gala` database entity and FOODON's `Gala apple (whole)` class seem to refer to the same entity. As apples in our database are usually considered "whole", we do not concern ourselves further with the that slightly ambiguous part of the label. (Can I map my apple snack pack which has the "whole" apple cut in slices to `FOODON:00003348`?) We add the new mapping to our mapping table. Due to our domain expertise and consultation with the product team of our company, we are very confident (1.0 or 100%) that the mapping between `KF_FOOD:F002` and `FOODON:00003348` is exact (for exact matches, we use `skos:exactMatch` as per [SSSOM convention](https://mapping-commons.github.io/sssom/spec-model/#allowed-and-common-mapping-predicates)). | subject_id | subject_label | predicate_id | object_id | object_label | confidence | |--------------|---------------|-----------------|-----------------|--------------------|------------| | KF_FOOD:F001 | apple | | | | | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | 1 | | KF_FOOD:F003 | pink | | | | | | KF_FOOD:F004 | braeburn | | | | | ##### Mapping "apple", attempt 2 Given our mapping of [Gala apple (whole)](https://www.ebi.ac.uk/ols/ontologies/foodon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFOODON_00003348) we take a better look at the class hierarchy around. We notice three things: - There is indeed a class called "apple (whole)" which seems to fit our purpose. This also seems to be consistent with our choice of "Gala apple (whole)". - What is, however, annoying is that there is also a ["apple (whole or parts)"](https://www.ebi.ac.uk/ols/ontologies/foodon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFOODON_03310788&viewMode=All&siblings=false) class. KEWL FOODS INC definitely has plans to introduce products involving sliced Gala apples! - FOODON does not have a concept of a sliced Gala apple. Again, our judgement as curators is asked here. There is no "correct" or "wrong". To keep things consistent, we decide to map to the "whole" apple, but we take a mental note that this might change in the future. We also take a physical note to _document this design decision_ as a comment. | subject_id | subject_label | predicate_id | object_id | object_label | confidence | comment | |--------------|---------------|-----------------|-----------------|--------------------|------------|-----------------------------------------------------------------------------------------------------| | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | 0.95 | We could map to FOODON:03310788 instead to cover sliced apples, but only "whole" apple types exist. | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | 1 | | | KF_FOOD:F003 | pink | | | | | | | KF_FOOD:F004 | braeburn | | | | | | ##### Mapping "pink" In the same hierarchy as `apple (whole)`, we find [Pink apple (whole)](https://www.ebi.ac.uk/ols/ontologies/foodon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFOODON_00004186). This is seems like an excellent match, consistent with our previous design decisions. However two observations leave us uncertain: - The [Pink apple (whole)](https://www.ebi.ac.uk/ols/ontologies/foodon/terms?iri=http%3A%2F%2Fpurl.obolibrary.org%2Fobo%2FFOODON_00004186) class has no definition (at the time of writing this tutorial at least) and no pictures, so we cannot be 100% certain that our notion of "pink" is the same as Foodon. A search on Wikipedia reveals different names, like "Pink Pearl" and "Pink Lady", which makes us a bit uncertain. - In contrast to "Gala apple (whole)", "Pink apple (whole)" has a further subclass, "Pink apple (whole, raw)". What does that mean? All data in our KEWL FOODS INC database pertains to raw apple, so is this now a better match? Raw as opposed to what? Cooked? Again, there is no great recipe to solve this dilemma. We chose our default recipe: 1. prefer consistent mapping rules over occasionally increased precision (not always a good idea) 2. document design decision | subject_id | subject_label | predicate_id | object_id | object_label | confidence | comment | |--------------|---------------|-----------------|-----------------|--------------------|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | 0.95 | We could map to FOODON:03310788 instead to cover sliced apples, but only "whole" apple types exist. | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | 1 | | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | 0.9 | We could map to FOODON:00004187 instead which more specifically refers to "raw" Pink apples. Decided against to be consistent with other mapping choices. | | KF_FOOD:F004 | braeburn | | | | | | ##### Mapping "braeburn" We now turn our attention to the last database entity: `KF_FOOD:F004` (braeburn). Unfortunately, our search for `braeburn`, `brae-burn` yields no results in Foodon. We search Wikipedia and Google for potential synonyms of Braeburn that might have been missed by the FOODON developers, but are unsuccessful. In the end, we give up and decide that there is no matching concept for `KF_FOOD:F004` (braeburn) in FOODON. Now we have to make a choice and how to reflect that in our mapping set: - We can document directly the fact that there is no `skos:exactMatch` in our SSSOM table. - We can map `KF_FOOD:F004` (braeburn) to a more general concept, i.e. `apple (whole)`. - We can do both. For our data integration efforts, it is generally useful to know if no exact match could be found. Here, again, we have two options: - we can convey this information by omission. By not including a mapping in the dataset, it does not exist. The downside is that we do not know further down the line if (a) we have looked and there really was no suitable code and (b) we have not looked. - we can convey this information by using a special code `sssom:NoMapping`. (NOTE as of 2 May 2022, the final decision on how this is represented has not been made. Follow [this discussion](https://github.com/mapping-commons/sssom/issues/28)). In our case, we have plans to extend our manual mapping efforts with automated ones. We want to use manual non-mapping assertions to filter out false positive mappings with our automated approaches, so we decide to go with the second option and make the non-mapping explicit. The second question is whether to include a _less precise_ mapping. This depends heavily on the target use case. As a rule of thumb, if the target use case requires precise 1:1 mappings (for example, data transformation use cases often do), we do not include any broad mappings. If our use case is data aggregation, broad matches can still be very useful: At least, we will be able to use the hierarchical structure of FOODON to retrieve all kinds of apples in our FOOD database! We are interested in data aggregation, so we decide to include the mapping. | subject_id | subject_label | predicate_id | object_id | object_label | confidence | comment | |--------------|---------------|-----------------|-----------------|--------------------|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------| | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | 0.95 | We could map to FOODON:03310788 instead to cover sliced apples, but only "whole" apple types exist. | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | 1 | | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | 0.9 | We could map to FOODON:00004187 instead which more specifically refers to "raw" Pink apples. Decided against to be consistent with other mapping choices. | | KF_FOOD:F004 | braeburn | skos:exactMatch | sssom:NoMapping | | 1 | | | KF_FOOD:F004 | braeburn | skos:broadMatch | FOODON:00002473 | apple (whole) | 1 | | #### Adding rich metadata We are done curating the basic mappings. Next, we will add some richer metadata for the mapping set. For this tutorial we will add the metadata introduce [here](#scratchstart). **Mapping justification metadata**: `mapping_justification`: the process or activity that led us to believe the mapping to be correct or reasonable. This is the most important piece of metadata and a pivotal concept for SSSOM curation in general. Let us think about all the various ways that can lead us to believe a mapping to be correct. The most crude thing would be to document is: "a Human determined this mapping". We do that by documenting the mapping justification `semapv:HumanCuration`. This justification is a vague placeholder, but it instills some confidence in the mapping consumer (the user) that someone with at least some domain expertise determined the mapping to be ok. We will discuss `mapping_justification`s in more detailed in a later tutorial on automated matching, where we have many more fine-grained distinctions, like "the justification for asserting this mapping is that the label of the subject matches to an exact synonym of the object after applying 'stemming' during preprocessing". Nevertheless, modelling human curation better is one of the future goals of SSSOM. The key is to document "curation rules", which contain the conditions and assumptions made by the (human) mapping author when asserting the mapping. In the absence of a [formal element](https://github.com/mapping-commons/sssom/issues/166) (at least at the time of this writing, May 2022), you should try and document such curation rules in the `comment` field. **Basic provenance metadata**: `mapping_date`: The date the mapping was asserted. Why is this important? Time of an assertion is essential provenance. It allows us to prefer assertions (mapping decisions) that were done later, but it also gives us a hint how old a mapping is, in particular if the source versions are not, or cannot, be documented. It is a very easy element to document, and we should try to do that at all times. `author_id`: Identifies the persons or groups responsible for asserting the mappings. The author is a crucial bit of metadata, in particular in conjunction with the mapping justification `human curation`. A mapping consumer can look up the author of a mapping through their unique identifier (e.g. an [ORCiD](https://orcid.org/), which we use in the biomedical domain, but might be anything, including a unique database identifier). Again, we prefer PURLs here, that resolve to some useful information when you look them up. `mapping_set_id`: A unique identifier of the mapping set. This is a pivotal concept in FAIR data and data management in general: every unit of data that is shared around within an organisation (or the whole world) [should have a unique identifier](https://www.go-fair.org/fair-principles/f1-meta-data-assigned-globally-unique-persistent-identifiers/). As per Semantic Web conventions, we recommend using persistent URLs, or PURLs, to identify your mappings set. For example: http://purl.obolibrary.org/obo/mondo.owl is a unique identifier to an ontology and http://purl.obolibrary.org/obo/mondo/mapping/mondo.sssom.tsv refers to the "Mondo disease mappings". `mapping_set_version`: The version of a mapping set. Versioning is absolutely crucial for mapping sets, much the same way as it is for ontologies. We recommend to use [semantic versioning](https://semver.org/) or simple ISO Date versioning, like "2022-05-01". The latter is recommended by some organisations like the [OBO foundry](https://obofoundry.org/principles/fp-004-versioning.html) (it is easier to see how new a mapping set is, and it is easier to sort as a string), but semantic versioning is much more widely used. We use date based versioning in the tutorial. `mapping_set_description`: A description of the mapping set, providing context and motivation. This is another underrated piece of metadata that allows humans to understand and build trust towards a mapping set. A good description of a mapping set - describes the scope and content of a mapping set - describes the purpose for the creation of the mapping set - is reasonably short, but not too short (3-4 sentences) `license`: An identifier for a license description. One of the most serious impediments to reuse on the web is the absence of clear and **standardised** licenses. We recommend the creative commons licenses for open data, either CC-0 (public domain, no license) or CC-BY 4.0. (Some people prefer CC-BY 4.0, because it ensures that attribution is taken more seriously.) Even when using a proprietary license, it is good to be transparent here, so that an "accidentally leaked" data file is not mistakenly assumed to be "open". `subject_source`: URI of source the subject. This is one of the most important pieces of metadata: an unambiguous reference to a source. It is notoriously hard to standardise source references ([see past debate](https://github.com/mapping-commons/sssom/issues/126)). We recommend to use the standard URIs used in your own domain, for example OBO (`obo:mondo`) or Wikidata (`wikidata:Q7876491`). `subject_source_version`: The version of the source of the subject. In order to interpret a mapping, it is not enough to know the source. Sources changes all the time, whether they are database and/or ontology: classes are obsoleted, database records are deleted. What counts for an exact mapping may change through the evolution of a source. _Always_ document the source version, if you can. This can be very difficult for database systems that do not have a real notion of versioning. `object_source`: URI of source the object. See `subject_source`. `object_source_version`: The version of the source of the object. See `subject_source_version`. #### Mapping vs Mapping set metadata - where should it go? SSSOM distinguishes between `mapping` and `mapping_set` metadata, i.e. metadata that pertains to each individual mapping and metadata that pertains to the whole mapping set. To understand which is which, you can browse [the specification](https://mapping-commons.github.io/sssom/spec/). **Mapping metadata** is usually captured in the rows of the SSSOM mapping table. We have done this a lot so far during this tutorial: documenting our confidence in our mapping decision, and specifying the source of our subject id. However, in SSSOM we have the option to document some `mapping` metadata on the level of the `mapping_set`, which means that the `metadata` item applies to **all mappings in the mapping set**. We will capture `subject` and `object_source` this way, see a bit further below. We capture `mapping` level metadata in the usual way using our table: | subject_id | subject_label | predicate_id | object_id | object_label | confidence | comment | mapping_justification | mapping_date | author_id | subject_source_version | object_source_version | |--------------|---------------|-----------------|-----------------|--------------------|------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------|--------------|---------------------------|------------------------|----------------------------------------------------------------------| | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | 0.95 | We could map to FOODON:03310788 instead to cover sliced apples, but only "whole" apple types exist. | semapv:HumanCuration | 2022-05-02 | orcid:0000-0002-7356-1779 | | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | 1 | | semapv:HumanCuration | 2022-05-02 | orcid:0000-0002-7356-1779 | | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | 0.9 | We could map to FOODON:00004187 instead which more specifically refers to "raw" Pink apples. Decided against to be consistent with other mapping choices. | semapv:HumanCuration | 2022-05-02 | orcid:0000-0002-7356-1779 | | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | | KF_FOOD:F004 | braeburn | skos:exactMatch | sssom:NoMapping | | 1 | | semapv:HumanCuration | 2022-05-02 | orcid:0000-0002-7356-1779 | | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | | KF_FOOD:F004 | braeburn | skos:broadMatch | FOODON:00002473 | apple (whole) | 1 | | semapv:HumanCuration | 2022-05-02 | orcid:0000-0002-7356-1779 | | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | **Mapping set metadata**. In this tutorial, only `mapping_set_id`, `mapping_set_version`, `license` and `mapping_set_description` are purely `mapping_set` metadata. Everything else is considered `mapping` metadata. Mapping set metadata is captured in [YAML](https://yaml.org/) format. For this tutorial, we will capture the following: ``` mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv license: https://creativecommons.org/licenses/by/4.0/ mapping_set_version: "2022-06-01" mapping_set_description: "Manually curated alignment of KEWL FOODIE INC internal food and nutrition database with Food Ontology (FOODON). Intended to be used for ontological analysis and grouping of KEWL FOODIE INC related data." object_source: wikidata:Q55118395 subject_source: KF_FOOD:DB curie_map: KF_FOOD: https://kewl-foodie.inc/food/ wikidata: http://www.wikidata.org/entity/ FOODON: http://purl.obolibrary.org/obo/FOODON_ semapv: https://w3id.org/semapv/vocab/ skos: "http://www.w3.org/2004/02/skos/core#" sssom: https://w3id.org/sssom/ ``` Despite `object_source` and `subject_source` being _mapping_ metadata, we decided to capture them at mapping set level, as they are not likely to change throughout versions of the mapping set. Note that while the `object_source` resolves to an actual page on the web ([FOODON](https://www.wikidata.org/wiki/Q55118395)), `KF_FOOD:DB` does not. SSSOM requires a source to correspond to an IRI (see ongoing [debate](https://github.com/mapping-commons/sssom/issues/126)). This helps to ensure that it is unambiguously clear what the source was. Imagine someone documenting the string `INTERNAL_DB` or just `DB` - even in large organisations, but certainly on the web, this can cause clashes. The `curie_map` (better known as "prefix map") is another key concept in SSSOM (and most Semantic Web standards). It maps prefixes to URI expansions. This serves three main purposes. 1. Unambiguously identify the namespace of a prefix. The prefix `FOODON:`, all by itself, can be used by many different sources. `http://purl.obolibrary.org/obo/FOODON_` uniquely identifies the namespace of `FOODON`. This is important when merging different mapping sets together. 2. Expanding and resolving identifiers. Some identifier schemes like the one in the OBO Foundry, Wikidata and many others, resolve identifiers to a page on the web. This allows people (and sometimes machines) to look up additional information about an entity on the web. For example, when we expand FOODON:00002473 to http://purl.obolibrary.org/obo/FOODON_00002473, we can look this URI up in a browser. 3. Providing a recipe for creating RDF resources from CURIEs. RDF requires an entity to be represented by a full URI, e.g. . In this case, you can think of the `curie_map` in essence as a set of RDF [prefix declarations](https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-Prefix). This is only important if your use case requires serialisation into RDF. This concludes the manual curation tutorial. Next, we will process the two mapping sets using "SSSOM python toolkit" (aka sssom-py). ### Automated processing 1: Creating an embedded SSSOM file *Important note May 8 2022**: The SSSOM toolkit have not yet been updated to the most recent changes of the SSSOM data model. If you get an error `ValueError: match_type must be supplied`, you have to update your local installation. #### Embedded vs external mode for SSSOM metadata One problem with table formats like TSV or CSV, in contrast to more flexible tree shaped formats like JSON or XML, is that it is notoriously hard to include metadata about the whole table (for example, mapping **set** metadata) in them. There are essentially three options: 1. All metadata is stored as values in columns. While this is definitely possible, it is not ideal for a few reasons: 1. It is highly redundant. If we have to store the `mapping_set_id`, for example, as a value in a mapping table with 1000 mappings, it is repeated 1000 times. 2. It is less immediately clear whether a piece of metadata pertains to the `mapping_set` or a `mapping` (you have to study the specification to understand that `author_id` pertains to an individual mapping rather than the whole mapping set). 2. Metadata about the mapping set is stored within the TSV file header. Basically, we introduce a number of rows at the top of the TSV file that we reserve for metadata. The disadvantage is that many parsers for such flat files do not know how to deal with a header like this. 3. We keep metadata about tables and mapping sets separate, i.e. we keep one TSV file that contains the data and one YAML file that contains the mapping set metadata. This is often a good option, but keeping the two separate may cause a problem: in environments where the data is shared around (emailed, copied) the connection can get lost. In SSSOM, we opted for option 2 as the default, which we call "embedded mode" (the metadata is embedded). Most commands in the [SSSOM toolkit](https://github.com/mapping-commons/sssom-py) expect SSSOM files to be in embedded mode. However, we support option 3 (external mode) indirectly by providing operations to simply merge the two before other processing steps. #### Converting an SSSOM file from from external to embedded mode If you do not have the SSSOM toolkit installed, [do so now](https://mapping-commons.github.io/sssom-py/installation.html). Download the food mappings created before. If you feel confident with your own mappings, feel free to use these instead. - [Mappings](https://raw.githubusercontent.com/mapping-commons/sssom/master/examples/external/example1.sssom.tsv) - [Metadata](https://raw.githubusercontent.com/mapping-commons/sssom/master/examples/external/example1.sssom.yml) Now you let's use SSSOM toolkit to merge these two: ``` sssom parse example1.sssom.tsv -m example1.sssom.yml -o foodieinc-food.sssom.tsv ``` If you open `foodieinc-food.sssom.tsv`, you will see: ``` # comment: We could map to FOODON:00004187 instead which more specifically refers to # "raw" Pink apples. Decided against to be consistent with other mapping choices. # curie_map: # FOODON: http://purl.obolibrary.org/obo/FOODON_ # KF_FOOD: https://kewl-foodie.inc/food/ # skos: http://www.w3.org/2004/02/skos/core# # sssom: https://w3id.org/sssom/ # license: https://creativecommons.org/licenses/by/4.0/ # mapping_date: '2022-05-02' # mapping_set_description: Manually curated alignment of KEWL FOODIE INC internal food # and nutrition database with Food Ontology (FOODON). Intended to be used for ontological # analysis and grouping of KEWL FOODIE INC related data. # mapping_set_id: https://w3id.org/sssom/tutorial/example1.sssom.tsv # mapping_set_version: '2022-06-01' # object_source: wikidata:Q55118395 # object_source_version: http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl # subject_source: KF_FOOD:DB subject_id subject_label predicate_id object_id object_label mapping_justification author_id object_source_version mapping_date confidence comment KF_FOOD:F001 apple skos:exactMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.95 "We could map to FOODON:03310788 instead to cover sliced apples, but only ""whole"" apple types exist." KF_FOOD:F002 gala skos:exactMatch FOODON:00003348 Gala apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 KF_FOOD:F003 pink skos:exactMatch FOODON:00004186 Pink apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 0.9 "We could map to FOODON:00004187 instead which more specifically refers to ""raw"" Pink apples. Decided against to be consistent with other mapping choices." KF_FOOD:F004 braeburn skos:exactMatch sssom:NoMapping semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 KF_FOOD:F004 braeburn skos:broadMatch FOODON:00002473 apple (whole) semapv:ManualMappingCuration orcid:0000-0002-7356-1779 http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl 2022-05-02 1.0 ``` #### Converting an SSSOM file to JSON We will now convert the embedded SSSOM file we created before into JSON: ``` sssom convert foodieinc-food.sssom.tsv --output-format json -o foodieinc-food.sssom.json ``` While the JSON format is [not yet stable](https://github.com/mapping-commons/sssom/issues/102), it is close to completion. #### Diff between two versions The last part of this tutorial concerns one of the main motivations of using a controlled metadata model for mappings: versioning. One key concern for data management, and mapping management in particular, is to be able under understand the evolution of mappings over time. While this command is not stable yet, we can use it to understand the difference between two mappings sets: `sssom diff`. Let us try to look at the difference between an old version of our foodie-inc mapping set and our new one: ``` sssom diff foodieinc-food.sssom.tsv ../embedded/foodie-inc-2022-05-01.sssom.tsv -o diff.sssom.tsv ``` The outcome gives us the following information: | subject_id | subject_label | predicate_id | object_id | object_label | mapping_justification | author_id | object_source_version | mapping_date | confidence | comment | |--------------|---------------|-----------------|-----------------|-------------------------|--------------|---------------------------|----------------------------------------------------------------------|--------------|------------|----------------| | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004186 | Pink apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | 2022-05-02 | 0.9 | UNIQUE_1 | | KF_FOOD:F003 | pink | skos:exactMatch | FOODON:00004187 | Pink apple (whole, raw) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | 2022-05-02 | 0.9 | UNIQUE_2 | | KF_FOOD:F002 | gala | skos:exactMatch | FOODON:00003348 | Gala apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | 2022-05-02 | 1.0 | COMMON_TO_BOTH | | KF_FOOD:F004 | braeburn | skos:broadMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | 2022-05-02 | 1.0 | COMMON_TO_BOTH | | KF_FOOD:F001 | apple | skos:exactMatch | FOODON:00002473 | apple (whole) | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | 2022-05-02 | 0.95 | COMMON_TO_BOTH | | KF_FOOD:F004 | braeburn | skos:exactMatch | sssom:NoMapping | | semapv:ManualMappingCuration | orcid:0000-0002-7356-1779 | http://purl.obolibrary.org/obo/foodon/releases/2022-02-01/foodon.owl | 2022-05-02 | 1.0 | COMMON_TO_BOTH | This can be used to understand that the first mapping is only present in the new mapping set, while the second mapping was present in the old mapping set - all the other ones are in common between the two. ================================================ FILE: src/docs/tutorials/omop-mappings.md ================================================ # How to gradually enrich OMOP mappings with SSSOM This document is a guide for OMOP ETL developers to think about gradually improving the (documentation of the) strength of evidence for their vocabulary mappings. ## Example table from OMOP Generated manually with Athena on the 20th July 2023. The start and end dates are invented. | concept_id_1 | concept_id_2 | relationship_id | valid_start_date | valid_end_date | invalid_reason | |--------------|--------------|-----------------|------------------|----------------|----------------| | 44499396 | 4028717 | Maps to | 19700101 | 20991231 | | | 45586281 | 4028717 | Maps to | 73754 | 20991231 | | ## Level 1, basic mapping table, basic provenance The SSSOM metadata provided is conceptually correct, but fictitious. The reader should imagine this being provided as a separate CONCEPT_MAPPINGS.CSV table that can be joined on `subject_id`->`concept_id_1`, `object_id`->`concept_id_2` for all rows with a `Maps to` `relationship_id` (this is assuming that the `concept_id_1`,`concept_id_2` tuple is unique for `Maps to`). | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | |---|---|---|---|---|---|---|---|---| | OMOP:44499396 | OMOP:4028717 | omoprel:mapsTo | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | | OMOP:45586281 | OMOP:4028717 | omoprel:mapsTo | OHDSI:Odysseus | OHDSI_TOOLS:Usagi | 1.4.3 | semapv:LexicalMatching | ORCID:0000-0003-4147-1485 | | OMOP:45610575 | OMOP:441554 | omoprel:mapsTo | OHDSI:UMLS | | | semapv:UnspecifiedMatching | | | What we see here: 1. all identifiers are prefixed to make sure they are interpreted correctly when they are reused. This includes OMOP ids (e.g. `OMOP:44499396`) as well as ORCIDs (OPTIONAL) 1. "Maps to" is encoded using a proper identifier rather than a string (OPTIONAL) 1. All three mappings have a `mapping_justification` to distinguish for example if the mapping was determined by human manual curation (`semapv:ManualMappingCuration`) or lexical matching (`semapv:LexicalMatching`). Many other justifications exist and/or can be created. If the justification for the mapping is unknown, we can make our lack of knowledge transparent by using `semapv:UnspecifiedMatching`. 1. `author_id`, in the case of `semapv:ManualMappingCuration`, tells us who the person is that determined the mapping. This is basic provenance. If the identity of the author can be connected with a public record such as ORCID, this can help mapping users to increase trust in a mapping. `reviewer_id` tells us that some human looked at the mapping after it was proposed by a tool, and "signed off" on it. This can be valuable, again, to increase trust. `review_date` can also be given to add context to the review. 1. If the match was generated by the tool, some basic provenance is added (`mapping_tool`, `mapping_tool_version`). ## Level 2: Curate semantic mapping predicate | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | review_date | author_id | |---------------|--------------|-----------------|------------------|-------------------|----------------------|--------------------------------|---------------------------|-------------|---------------------------| | OMOP:44499396 | OMOP:4028717 | skos:broadMatch | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | | ORCID:0000-0003-4147-1485 | | OMOP:45586281 | OMOP:4028717 | skos:exactMatch | OHDSI:Odysseus | OHDSI_TOOLS:Usagi | 1.4.3 | semapv:LexicalMatching | ORCID:0000-0003-4147-1485 | 2021-01-01 | | | OMOP:45610575 | OMOP:441554 | skos:exactMatch | OHDSI:UMLS | | | semapv:UnspecifiedMatching | | | | What do we see here? 1. Rather than `Maps to`, the mapping predicate (e.g. `skos:exactMatch`) is a semantic mapping predicate from a standardised vocabulary ([SKOS](https://www.w3.org/TR/skos-reference)). Here, we distinguish between `skos:exactMatch` and `skos:broadMatch`, but there are other predicates, see for example in the [Semantic Mapping Vocabulary](https://github.com/mapping-commons/semantic-mapping-vocabulary/blob/main/semapv-properties.tsv). ## Level 3: Document confidence widely `confidence` is an incredibly useful metric for downstream users, including ETL engineers and data analysts. In an ideal world, all mappings have some kind of `confidence` associated with them. `confidence` scores should be read as "the strength of evidence provided in this record/table row (i.e mapping justification) leads us to believe the mapping (e.g. `OMOP:44499396 --[skos:broadMatch]--> OMOP:4028717`) is correct with 90% confidence. | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | confidence | |---|---|---|---|---|---|---|---|---|---| | OMOP:44499396 | OMOP:4028717 | skos:broadMatch | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | 0.9 | | OMOP:45586281 | OMOP:4028717 | skos:exactMatch | OHDSI:Odysseus | OHDSI_TOOLS:Usagi | 1.4.3 | semapv:LexicalMatching | ORCID:0000-0003-4147-1485 | 0.8 | | OMOP:45610575 | OMOP:441554 | skos:exactMatch | OHDSI:UMLS | | | semapv:UnspecifiedMatching | | | 0.6 | What do we see here? - For matching tools, confidence can be calculated by proxies such as "lexical similarity", "edit distance", "cosine similarity of node embedding" and other metrics. In the example above, Usagi has determined that the subject and objects match, but it was only 80% sure (we dont know why - this is [more advanced SSSOM](../mapping-justifications.md)) - For case where an external mapping is reused using ETL, `confidence` describes the level of trust you as an ETL expert have in the fidelty of the mapping provided by the source. ## Level 4: Document curation rules | subject_id | object_id | predicate_id | mapping_provider | mapping_tool | mapping_tool_version | mapping_justification | reviewer_id | author_id | confidence | curation_rule | |---|---|---|---|---|---|---|---|---|---|---| | OMOP:44499396 | OMOP:4028717 | skos:broadMatch | OHDSI:Odysseus | | | semapv:ManualMappingCuration | | ORCID:0000-0003-4147-1485 | 0.9 | OHDSI_CURATION_RULE:19 | What do we see here? - For manual matches, it is often unclear by what criteria a match was established. Documenting the curation rules can help increasing consistency for manual curation, and transparency for downstream users. - `OHDSI_CURATION_RULE:19` is a rule defined by your own curation rulebook. This can be _anything_. For example `OHDSI_CURATION_RULE:19` could correspond to the following rule: ``` OHDSI_CURATION_RULE:19 = If the subject concept does not have an exact match in the object source vocabulary, we select the nearest broad ("up-hill") concept applicable. Conceptually, if both terms would exist in the same terminology, the subject concept can be defined as a subconcept of the object concept. The determination for both criteria (nearest broad, conceptally subconcept) is performed through medical expert judgement. ``` ================================================ FILE: src/docs/usecases.md ================================================ # Use cases and case studies ## Mondo disease mappings - [Mondo mappings](https://github.com/monarch-initiative/mondo/tree/master/src/ontology/mappings) ## The National Microbiome Data Collaborative (NMDC, https://microbiomedata.org/): - [Various data model mappings](https://github.com/microbiomedata/nmdc-schema/tree/main/sssom), e.g. MIXS, GOLD, etc ## CCDH (mapping clinical data models, ontologies and value sets) - https://harmonization.datacommons.cancer.gov/ccdh-resources - SNOMED-NCIT [example](https://docs.google.com/spreadsheets/d/18luA05E9wLukOFamsRV3FWVnoCr57o8qAHe-aGYrPr8/edit#gid=509055704) ## Cross-species mapping efforts - [mouse-human mapping commons](https://github.com/mapping-commons/mh_mapping_initiative) ## Microbial traits - https://github.com/mapping-commons/microbial-trait-mappings ## Biomappings - https://github.com/biomappings/biomappings/tree/master/docs/_data/sssom - Decentralized curation of mappings, especially ones that aren't incident to ontology terms ## Clinical mappings [Gdocs](https://docs.google.com/document/d/1p7MVn0UGro6SMgnCfi70BOYgrDRoNkEjpXXAl8_hYXw/edit) for discussion. ================================================ FILE: src/docs/workshops.md ================================================ # Workshops ## Mapping Commons Workshop Series Wikidata: https://www.wikidata.org/wiki/Q108394475 - [1st Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings](events/mc2021.md) - [2nd Mapping Commons Workshop on Simple Standard for Sharing Ontology Mappings](events/mc2023.md) ================================================ FILE: src/sssom_schema/__init__.py ================================================ from .datamodel.sssom_schema import * ================================================ FILE: src/sssom_schema/context/sssom_schema.context.jsonld ================================================ { "@context": { "xsd": "http://www.w3.org/2001/XMLSchema#", "dcterms": "http://purl.org/dc/terms/", "linkml": "https://w3id.org/linkml/", "oboInOwl": "http://www.geneontology.org/formats/oboInOwl#", "owl": "http://www.w3.org/2002/07/owl#", "pav": "http://purl.org/pav/", "prov": "http://www.w3.org/ns/prov#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "semapv": "https://w3id.org/semapv/vocab/", "skos": "http://www.w3.org/2004/02/skos/core#", "sssom": "https://w3id.org/sssom/", "@vocab": "https://w3id.org/sssom/", "author_id": { "@type": "rdfs:Resource", "@id": "pav:authoredBy" }, "author_label": { "@id": "author_label" }, "cardinality_scope": { "@id": "cardinality_scope" }, "comment": { "@id": "rdfs:comment" }, "confidence": { "@type": "xsd:double", "@id": "confidence" }, "creator_id": { "@type": "rdfs:Resource", "@id": "dcterms:creator" }, "creator_label": { "@id": "creator_label" }, "curation_rule": { "@type": "rdfs:Resource", "@id": "curation_rule" }, "curation_rule_text": { "@id": "curation_rule_text" }, "curie_map": { "@type": "@id", "@id": "curie_map" }, "documentation": { "@type": "xsd:anyURI", "@id": "documentation" }, "extension_definitions": { "@type": "@id", "@id": "extension_definitions" }, "property": { "@type": "xsd:anyURI", "@id": "property" }, "slot_name": { "@id": "slot_name" }, "type_hint": { "@type": "xsd:anyURI", "@id": "type_hint" }, "homepage": { "@type": "xsd:anyURI", "@id": "homepage" }, "imports": { "@type": "xsd:anyURI", "@id": "imports" }, "issue_tracker": { "@type": "xsd:anyURI", "@id": "issue_tracker" }, "issue_tracker_item": { "@type": "rdfs:Resource", "@id": "issue_tracker_item" }, "last_updated": { "@type": "xsd:date", "@id": "last_updated" }, "license": { "@type": "xsd:anyURI", "@id": "dcterms:license" }, "local_name": { "@id": "local_name" }, "mapping_cardinality": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "mapping_cardinality" }, "mapping_date": { "@type": "xsd:date", "@id": "dcterms:created" }, "mapping_justification": { "@type": "rdfs:Resource", "@id": "mapping_justification" }, "mapping_provider": { "@type": "xsd:anyURI", "@id": "mapping_provider" }, "mapping_registry_description": { "@id": "mapping_registry_description" }, "mapping_registry_id": { "@type": "rdfs:Resource", "@id": "mapping_registry_id" }, "mapping_registry_title": { "@id": "mapping_registry_title" }, "mapping_set_confidence": { "@type": "xsd:double", "@id": "mapping_set_confidence" }, "mapping_set_description": { "@id": "dcterms:description" }, "mapping_set_group": { "@id": "mapping_set_group" }, "mapping_set_id": { "@type": "xsd:anyURI", "@id": "mapping_set_id" }, "mapping_set_references": { "@type": "@id", "@id": "mapping_set_references" }, "mapping_set_source": { "@type": "xsd:anyURI", "@id": "prov:wasDerivedFrom" }, "mapping_set_title": { "@id": "dcterms:title" }, "mapping_set_version": { "@id": "owl:versionInfo" }, "mapping_source": { "@type": "rdfs:Resource", "@id": "mapping_source" }, "mapping_tool": { "@id": "mapping_tool" }, "mapping_tool_id": { "@type": "rdfs:Resource", "@id": "mapping_tool_id" }, "mapping_tool_version": { "@id": "mapping_tool_version" }, "mappings": { "@type": "@id", "@id": "mappings" }, "match_string": { "@id": "match_string" }, "mirror_from": { "@type": "xsd:anyURI", "@id": "mirror_from" }, "object_category": { "@id": "object_category" }, "object_id": { "@type": "rdfs:Resource", "@id": "owl:annotatedTarget" }, "object_label": { "@id": "object_label" }, "object_match_field": { "@type": "rdfs:Resource", "@id": "object_match_field" }, "object_preprocessing": { "@type": "rdfs:Resource", "@id": "object_preprocessing" }, "object_source": { "@type": "rdfs:Resource", "@id": "object_source" }, "object_source_version": { "@id": "object_source_version" }, "object_type": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "object_type" }, "other": { "@id": "other" }, "predicate_id": { "@type": "rdfs:Resource", "@id": "owl:annotatedProperty" }, "predicate_label": { "@id": "predicate_label" }, "predicate_modifier": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "predicate_modifier" }, "predicate_type": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "predicate_type" }, "prefix_name": { "@id": "prefix_name" }, "prefix_url": { "@type": "xsd:anyURI", "@id": "prefix_url" }, "propagated": { "@type": "xsd:boolean", "@id": "propagated" }, "publication_date": { "@type": "xsd:date", "@id": "dcterms:issued" }, "record_id": { "@type": "rdfs:Resource", "@id": "record_id" }, "registry_confidence": { "@type": "xsd:double", "@id": "registry_confidence" }, "review_date": { "@type": "xsd:date", "@id": "review_date" }, "reviewer_agreement": { "@type": "xsd:double", "@id": "reviewer_agreement" }, "reviewer_id": { "@type": "rdfs:Resource", "@id": "reviewer_id" }, "reviewer_label": { "@id": "reviewer_label" }, "see_also": { "@type": "xsd:anyURI", "@id": "rdfs:seeAlso" }, "similarity_measure": { "@id": "similarity_measure" }, "similarity_score": { "@type": "xsd:double", "@id": "similarity_score" }, "sssom_version": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "sssom_version" }, "subject_category": { "@id": "subject_category" }, "subject_id": { "@type": "rdfs:Resource", "@id": "owl:annotatedSource" }, "subject_label": { "@id": "subject_label" }, "subject_match_field": { "@type": "rdfs:Resource", "@id": "subject_match_field" }, "subject_preprocessing": { "@type": "rdfs:Resource", "@id": "subject_preprocessing" }, "subject_source": { "@type": "rdfs:Resource", "@id": "subject_source" }, "subject_source_version": { "@id": "subject_source_version" }, "subject_type": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "subject_type" }, "added_in": { "@context": { "text": "skos:notation", "description": "skos:prefLabel", "meaning": "@id" }, "@id": "added_in" }, "ExtensionDefinition": { "@id": "ExtensionDefinition" }, "Mapping": { "@id": "owl:Axiom" }, "MappingRegistry": { "@id": "MappingRegistry" }, "MappingSet": { "@id": "MappingSet" }, "MappingSetReference": { "@id": "MappingSetReference" }, "NoTermFound": { "@id": "NoTermFound" }, "Prefix": { "@id": "Prefix" }, "Propagatable": { "@id": "Propagatable" }, "Versionable": { "@id": "Versionable" } } } ================================================ FILE: src/sssom_schema/context/sssom_schema.jsonld ================================================ { "name": "sssom", "description": "Datamodel for Simple Standard for Sharing Ontological Mappings (SSSOM)", "see_also": [ "https://github.com/mapping-commons/sssom", "https://mapping-commons.github.io/sssom/home/" ], "id": "https://w3id.org/sssom/schema/", "imports": [ "linkml:types" ], "license": "https://creativecommons.org/publicdomain/zero/1.0/", "prefixes": [ { "prefix_prefix": "dcterms", "prefix_reference": "http://purl.org/dc/terms/" }, { "prefix_prefix": "linkml", "prefix_reference": "https://w3id.org/linkml/" }, { "prefix_prefix": "sssom", "prefix_reference": "https://w3id.org/sssom/" }, { "prefix_prefix": "rdfs", "prefix_reference": "http://www.w3.org/2000/01/rdf-schema#" }, { "prefix_prefix": "rdf", "prefix_reference": "http://www.w3.org/1999/02/22-rdf-syntax-ns#" }, { "prefix_prefix": "oboInOwl", "prefix_reference": "http://www.geneontology.org/formats/oboInOwl#" }, { "prefix_prefix": "pav", "prefix_reference": "http://purl.org/pav/" }, { "prefix_prefix": "prov", "prefix_reference": "http://www.w3.org/ns/prov#" }, { "prefix_prefix": "skos", "prefix_reference": "http://www.w3.org/2004/02/skos/core#" }, { "prefix_prefix": "xsd", "prefix_reference": "http://www.w3.org/2001/XMLSchema#" }, { "prefix_prefix": "semapv", "prefix_reference": "https://w3id.org/semapv/vocab/" } ], "default_curi_maps": [ "semweb_context", "obo_context" ], "default_prefix": "sssom", "default_range": "string", "types": [ { "name": "EntityReference", "definition_uri": "https://w3id.org/sssom/EntityReference", "description": "A reference to an entity involved in the mapping.\n", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/spec/#tsv" ], "typeof": "uriorcurie", "base": "str", "uri": "http://www.w3.org/2000/01/rdf-schema#Resource", "repr": "str", "@type": "TypeDefinition" }, { "name": "NonRelativeURI", "definition_uri": "https://w3id.org/sssom/NonRelativeURI", "description": "A URI as per RFC 3986, that is a string that matches the production of the \"URI\" rule defined in Appendix A of that RFC. Contrary to the underlying LinkML type, this specifically excludes _relative URI references_, which do not start with a scheme component. Relative URI references are forbidden because SSSOM has no built-in mechanism to provide the base URI that would be needed to resolve relative URI references into non-relative ones.", "examples": [ { "value": "https://example.org/path/to/file.txt#L4", "description": "A URI that is URL to a HTTP resource.", "@type": "Example" }, { "value": "urn:oasis:names:tc:entity:xmlns:xml:catalog", "description": "A URI that is the URN of the namespace for the OASIS XML Catalogs specification.", "@type": "Example" }, { "value": "ldap://example.org/cn=Alice,dc=example,dc=org?mail", "description": "A URI that is a LDAP query URL.", "@type": "Example" }, { "value": "mailto:alice@example.org", "description": "A URI that is an email address.", "@type": "Example" }, { "value": "file.txt", "description": "An _invalid_ example, as it a relative URI (path only, no scheme).", "@type": "Example" }, { "value": "/path/to/file.txt", "description": "An _invalid_ example; though it appears to be an _absolute path_, it is a _relative URI_ because of the absence of a scheme.", "@type": "Example" }, { "value": "//example.org/path/to/file.txt", "description": "An _invalid_ example; though it includes an authority component (example.org), it has no scheme and is therefore a _relative URI_.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/448" ], "typeof": "uri", "base": "URI", "uri": "http://www.w3.org/2001/XMLSchema#anyURI", "repr": "str", "@type": "TypeDefinition" }, { "name": "string", "definition_uri": "https://w3id.org/linkml/String", "description": "A character string", "notes": [ "In RDF serializations, a slot with range of string is treated as a literal or type xsd:string. If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"string\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Text" ], "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "@type": "TypeDefinition" }, { "name": "integer", "definition_uri": "https://w3id.org/linkml/Integer", "description": "An integer", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"integer\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Integer" ], "base": "int", "uri": "http://www.w3.org/2001/XMLSchema#integer", "@type": "TypeDefinition" }, { "name": "boolean", "definition_uri": "https://w3id.org/linkml/Boolean", "description": "A binary (true or false) value", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"boolean\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Boolean" ], "base": "Bool", "uri": "http://www.w3.org/2001/XMLSchema#boolean", "repr": "bool", "@type": "TypeDefinition" }, { "name": "float", "definition_uri": "https://w3id.org/linkml/Float", "description": "A real number that conforms to the xsd:float specification", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"float\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Float" ], "base": "float", "uri": "http://www.w3.org/2001/XMLSchema#float", "@type": "TypeDefinition" }, { "name": "double", "definition_uri": "https://w3id.org/linkml/Double", "description": "A real number that conforms to the xsd:double specification", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"double\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "close_mappings": [ "schema:Float" ], "base": "float", "uri": "http://www.w3.org/2001/XMLSchema#double", "@type": "TypeDefinition" }, { "name": "decimal", "definition_uri": "https://w3id.org/linkml/Decimal", "description": "A real number with arbitrary precision that conforms to the xsd:decimal specification", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"decimal\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "broad_mappings": [ "schema:Number" ], "base": "Decimal", "uri": "http://www.w3.org/2001/XMLSchema#decimal", "@type": "TypeDefinition" }, { "name": "time", "definition_uri": "https://w3id.org/linkml/Time", "description": "A time object represents a (local) time of day, independent of any particular day", "notes": [ "URI is dateTime because OWL reasoners do not work with straight date or time", "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"time\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Time" ], "base": "XSDTime", "uri": "http://www.w3.org/2001/XMLSchema#time", "repr": "str", "@type": "TypeDefinition" }, { "name": "date", "definition_uri": "https://w3id.org/linkml/Date", "description": "a date (year, month and day) in an idealized calendar", "notes": [ "URI is dateTime because OWL reasoners don't work with straight date or time", "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"date\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:Date" ], "base": "XSDDate", "uri": "http://www.w3.org/2001/XMLSchema#date", "repr": "str", "@type": "TypeDefinition" }, { "name": "datetime", "definition_uri": "https://w3id.org/linkml/Datetime", "description": "The combination of a date and time", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"datetime\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "exact_mappings": [ "schema:DateTime" ], "base": "XSDDateTime", "uri": "http://www.w3.org/2001/XMLSchema#dateTime", "repr": "str", "@type": "TypeDefinition" }, { "name": "date_or_datetime", "definition_uri": "https://w3id.org/linkml/DateOrDatetime", "description": "Either a date or a datetime", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"date_or_datetime\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "https://w3id.org/linkml/DateOrDatetime", "repr": "str", "@type": "TypeDefinition" }, { "name": "uriorcurie", "definition_uri": "https://w3id.org/linkml/Uriorcurie", "description": "a URI or a CURIE", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"uriorcurie\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "URIorCURIE", "uri": "http://www.w3.org/2001/XMLSchema#anyURI", "repr": "str", "@type": "TypeDefinition" }, { "name": "curie", "definition_uri": "https://w3id.org/linkml/Curie", "conforms_to": "https://www.w3.org/TR/curie/", "description": "a compact URI", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"curie\"." ], "comments": [ "in RDF serializations this MUST be expanded to a URI", "in non-RDF serializations MAY be serialized as the compact representation" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "Curie", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "uri", "definition_uri": "https://w3id.org/linkml/Uri", "conforms_to": "https://www.ietf.org/rfc/rfc3987.txt", "description": "a complete URI", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"uri\"." ], "comments": [ "in RDF serializations a slot with range of uri is treated as a literal or type xsd:anyURI unless it is an identifier or a reference to an identifier, in which case it is translated directly to a node" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "close_mappings": [ "schema:URL" ], "base": "URI", "uri": "http://www.w3.org/2001/XMLSchema#anyURI", "repr": "str", "@type": "TypeDefinition" }, { "name": "ncname", "definition_uri": "https://w3id.org/linkml/Ncname", "description": "Prefix part of CURIE", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"ncname\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "NCName", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "objectidentifier", "definition_uri": "https://w3id.org/linkml/Objectidentifier", "description": "A URI or CURIE that represents an object in the model.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"objectidentifier\"." ], "comments": [ "Used for inheritance and type checking" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "ElementIdentifier", "uri": "http://www.w3.org/ns/shex#iri", "repr": "str", "@type": "TypeDefinition" }, { "name": "nodeidentifier", "definition_uri": "https://w3id.org/linkml/Nodeidentifier", "description": "A URI, CURIE or BNODE that represents a node in a model.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"nodeidentifier\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "NodeIdentifier", "uri": "http://www.w3.org/ns/shex#nonLiteral", "repr": "str", "@type": "TypeDefinition" }, { "name": "jsonpointer", "definition_uri": "https://w3id.org/linkml/Jsonpointer", "conforms_to": "https://datatracker.ietf.org/doc/html/rfc6901", "description": "A string encoding a JSON Pointer. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to a valid object within the current instance document when encoded in tree form.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"jsonpointer\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "jsonpath", "definition_uri": "https://w3id.org/linkml/Jsonpath", "conforms_to": "https://www.ietf.org/archive/id/draft-goessner-dispatch-jsonpath-00.html", "description": "A string encoding a JSON Path. The value of the string MUST conform to JSON Point syntax and SHOULD dereference to zero or more valid objects within the current instance document when encoded in tree form.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"jsonpath\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" }, { "name": "sparqlpath", "definition_uri": "https://w3id.org/linkml/Sparqlpath", "conforms_to": "https://www.w3.org/TR/sparql11-query/#propertypaths", "description": "A string encoding a SPARQL Property Path. The value of the string MUST conform to SPARQL syntax and SHOULD dereference to zero or more valid objects within the current instance document when encoded as RDF.", "notes": [ "If you are authoring schemas in LinkML YAML, the type is referenced with the lower case \"sparqlpath\"." ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", "base": "str", "uri": "http://www.w3.org/2001/XMLSchema#string", "repr": "str", "@type": "TypeDefinition" } ], "enums": [ { "name": "sssom_version_enum", "definition_uri": "https://w3id.org/sssom/SssomVersionEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "1.0", "description": "SSSOM specification version 1.0", "meaning": "sssom:version1.0" }, { "text": "1.1", "description": "SSSOM specification version 1.1", "meaning": "sssom:version1.1" } ] }, { "name": "entity_type_enum", "definition_uri": "https://w3id.org/sssom/EntityTypeEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "owl class", "meaning": "owl:Class" }, { "text": "owl object property", "meaning": "owl:ObjectProperty" }, { "text": "owl data property", "meaning": "owl:DataProperty" }, { "text": "owl annotation property", "meaning": "owl:AnnotationProperty" }, { "text": "owl named individual", "meaning": "owl:NamedIndividual" }, { "text": "skos concept", "meaning": "skos:Concept" }, { "text": "rdfs resource", "meaning": "rdfs:Resource" }, { "text": "rdfs class", "meaning": "rdfs:Class" }, { "text": "rdfs literal", "description": "This value indicates that the entity being mapped is not a semantic entity with a distinct identifier, but is instead represented entirely by its literal label. This value MUST NOT be used in the predicate_type slot.", "meaning": "rdfs:Literal", "see_also": [ "https://mapping-commons.github.io/sssom/spec-model/#literal-mappings", "https://github.com/mapping-commons/sssom/issues/234", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/literals.sssom.tsv" ] }, { "text": "rdfs datatype", "meaning": "rdfs:Datatype" }, { "text": "rdf property", "meaning": "rdf:Property" }, { "text": "composed entity expression", "description": "This value indicates that the entity ID does not represent a single entity, but a composite involving several individual entities. This value MUST NOT be used in the predicate_type slot. This specifications does not prescribe how an ID representing a composite entity should be interpreted; this is left at the discretion of applications.", "meaning": "sssom:ComposedEntityExpression", "see_also": [ "https://github.com/mapping-commons/sssom/issues/402", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/composite-entities.sssom.tsv" ] } ] }, { "name": "predicate_modifier_enum", "definition_uri": "https://w3id.org/sssom/PredicateModifierEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "Not", "description": "Negating the mapping predicate. The meaning of the triple becomes subject_id is not a predicate_id match to object_id.", "meaning": "sssom:NegatedPredicate" } ] }, { "name": "mapping_cardinality_enum", "definition_uri": "https://w3id.org/sssom/MappingCardinalityEnum", "from_schema": "https://w3id.org/sssom/schema/", "permissible_values": [ { "text": "1:1", "description": "Indicates the mapping record is about a one-to-one mapping, that is, the subject and the object are only mapped to each other, exclusive of any other subject or object." }, { "text": "1:n", "description": "Indicates the mapping record is about a one-to-many mapping, that is, the same subject is mapped to several different objects." }, { "text": "n:1", "description": "Indicates the mapping record is about a many-to-one mapping, that is, several different subjects are mapped to the same object." }, { "text": "n:n", "description": "Indicates the mapping record is about a many-to-many mapping, that is, the subject is mapped to several different objects and the object is mapped to several different subjects." }, { "text": "1:0", "description": "Indicates that the subject has no match in the object vocabulary. This value MUST only be used when the object_id is sssom:NoTermFound." }, { "text": "0:1", "description": "Indicates that the object has no match in the subject vocabulary. This value MUST only be used when the subject_id is sssom:NoTermFound." }, { "text": "0:0", "description": "Indicates that there is no match between the subject vocabulary and the object vocabulary. This value MUST only be used when both the subject_id and the object_id are sssom:NoTermFound." } ] } ], "slots": [ { "name": "prefix_name", "definition_uri": "https://w3id.org/sssom/prefix_name", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/prefix_name", "key": true, "owner": "Prefix", "domain_of": [ "Prefix" ], "range": "ncname", "required": true, "@type": "SlotDefinition" }, { "name": "prefix_url", "definition_uri": "https://w3id.org/sssom/prefix_url", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/prefix_url", "owner": "Prefix", "domain_of": [ "Prefix" ], "range": "uri", "@type": "SlotDefinition" }, { "name": "sssom_version", "definition_uri": "https://w3id.org/sssom/sssom_version", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The version of the SSSOM specification a mapping set is compliant with.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/439", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/version.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/sssom_version", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "sssom_version_enum", "@type": "SlotDefinition" }, { "name": "curie_map", "definition_uri": "https://w3id.org/sssom/curie_map", "description": "A dictionary that contains prefixes as keys and their URI expansions as values.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/225", "https://github.com/mapping-commons/sssom/pull/349", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curie_map.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/curie_map", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "Prefix", "multivalued": true, "inlined": true, "@type": "SlotDefinition" }, { "name": "mirror_from", "definition_uri": "https://w3id.org/sssom/mirror_from", "description": "A URL location from which to obtain a resource, such as a mapping set.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mirror_from", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "registry_confidence", "definition_uri": "https://w3id.org/sssom/registry_confidence", "description": "This value is set by the creator/maintainer of the mapping registry and reflects the confidence the mapping registry has in the correctness (i.e., precision) of mappings in the mapping set.\nWhen not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default.", "examples": [ { "value": "0.95", "description": "A confidence score of 0.95, indicating 95% confidence.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model" ], "slot_uri": "https://w3id.org/sssom/registry_confidence", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "last_updated", "definition_uri": "https://w3id.org/sssom/last_updated", "description": "The date this reference was last updated.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/last_updated", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "date", "@type": "SlotDefinition" }, { "name": "local_name", "definition_uri": "https://w3id.org/sssom/local_name", "description": "The local name assigned to file that corresponds to the downloaded mapping set.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/local_name", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_references", "definition_uri": "https://w3id.org/sssom/mapping_set_references", "description": "A list of mapping set references.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_set_references", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "MappingSetReference", "recommended": true, "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "mapping_registry_id", "definition_uri": "https://w3id.org/sssom/mapping_registry_id", "description": "The unique identifier of a mapping registry.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_registry_id", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "EntityReference", "required": true, "@type": "SlotDefinition" }, { "name": "mapping_registry_title", "definition_uri": "https://w3id.org/sssom/mapping_registry_title", "description": "The title of a mapping registry.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_registry_title", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_registry_description", "definition_uri": "https://w3id.org/sssom/mapping_registry_description", "description": "The description of a mapping registry.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_registry_description", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "string", "@type": "SlotDefinition" }, { "name": "imports", "definition_uri": "https://w3id.org/sssom/imports", "description": "A list of registries that should be imported into this one.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/imports", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "NonRelativeURI", "multivalued": true, "@type": "SlotDefinition" }, { "name": "documentation", "definition_uri": "https://w3id.org/sssom/documentation", "description": "A URL to the documentation of this mapping commons.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/documentation", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "homepage", "definition_uri": "https://w3id.org/sssom/homepage", "description": "A URL to a homepage of this mapping commons.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/homepage", "owner": "MappingRegistry", "domain_of": [ "MappingRegistry" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "mappings", "definition_uri": "https://w3id.org/sssom/mappings", "description": "Contains a list of mapping objects.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mappings", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "Mapping", "recommended": true, "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "subject_id", "definition_uri": "https://w3id.org/sssom/subject_id", "description": "The ID of the subject of the mapping.", "examples": [ { "value": "HP:0009894", "description": "The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears'", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#annotatedSource", "http://www.w3.org/2002/07/owl#annotatedSource" ], "slot_uri": "http://www.w3.org/2002/07/owl#annotatedSource", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "subject_label", "definition_uri": "https://w3id.org/sssom/subject_label", "description": "The label of subject of the mapping.", "examples": [ { "value": "Thickened ears", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "recommended": true, "@type": "SlotDefinition" }, { "name": "subject_category", "definition_uri": "https://w3id.org/sssom/subject_category", "description": "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases.", "examples": [ { "value": "UBERON:0001062", "description": "The CURIE of the Uberon term for \"anatomical entity\".", "@type": "Example" }, { "value": "anatomical entity", "description": "A string, rather than ID, describing the \"anatomical entity\" category. This is possible, but less preferred than using an ID.", "@type": "Example" }, { "value": "biolink:Gene", "description": "The CURIE of the biolink class for genes.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/13", "https://github.com/mapping-commons/sssom/issues/256" ], "slot_uri": "https://w3id.org/sssom/subject_category", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "subject_type", "definition_uri": "https://w3id.org/sssom/subject_type", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The type of entity that is being mapped.", "examples": [ { "value": "owl:Class", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_type", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "entity_type_enum", "@type": "SlotDefinition" }, { "name": "predicate_id", "definition_uri": "https://w3id.org/sssom/predicate_id", "description": "The ID of the predicate or relation that relates the subject and object of this match.", "examples": [ { "value": "owl:sameAs", "description": "The subject and the object are instances (owl individuals), and the two instances are the same.", "@type": "Example" }, { "value": "owl:equivalentClass", "description": "The subject and the object are classes (owl class), and the two classes are the same.", "@type": "Example" }, { "value": "owl:equivalentProperty", "description": "The subject and the object are properties (owl object, data, annotation properties), and the two properties are the same.", "@type": "Example" }, { "value": "rdfs:subClassOf", "description": "The subject and the object are classes (owl class), and the subject is a subclass of the object.", "@type": "Example" }, { "value": "rdfs:subPropertyOf", "description": "The subject and the object are properties (owl object, data, annotation properties), and the subject is a subproperty of the object.", "@type": "Example" }, { "value": "skos:relatedMatch", "description": "The subject and the object are associated in some unspecified way.", "@type": "Example" }, { "value": "skos:closeMatch", "description": "The subject and the object are sufficiently similar that they can be used interchangeably in some information retrieval applications.", "@type": "Example" }, { "value": "skos:exactMatch", "description": "The subject and the object can, with a high degree of confidence, be used interchangeably across a wide range of information retrieval applications.", "@type": "Example" }, { "value": "skos:narrowMatch", "description": "From the SKOS primer: A triple skos:narrower (and skos:narrowMatch) asserts that , the object of the triple, is a narrower concept than , the subject of the triple.", "@type": "Example" }, { "value": "skos:broadMatch", "description": "From the SKOS primer: A triple skos:broader (and skos:broadMatch) asserts that , the object of the triple, is a broader concept than , the subject of the triple.", "@type": "Example" }, { "value": "oboInOwl:hasDbXref", "description": "Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go).", "@type": "Example" }, { "value": "rdfs:seeAlso", "description": "The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#annotatedProperty", "http://www.w3.org/2002/07/owl#annotatedProperty" ], "slot_uri": "http://www.w3.org/2002/07/owl#annotatedProperty", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "required": true, "@type": "SlotDefinition" }, { "name": "predicate_modifier", "definition_uri": "https://w3id.org/sssom/predicate_modifier", "description": "A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion", "examples": [ { "value": "Not", "description": "Negates the predicate, see documentation of predicate_modifier_enum", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/107" ], "slot_uri": "https://w3id.org/sssom/predicate_modifier", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "predicate_modifier_enum", "@type": "SlotDefinition" }, { "name": "predicate_label", "definition_uri": "https://w3id.org/sssom/predicate_label", "description": "The label of the predicate/relation of the mapping.", "examples": [ { "value": "has cross-reference", "description": "The label of the oboInOwl:hasDbXref property to represent cross-references.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/predicate_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "predicate_type", "definition_uri": "https://w3id.org/sssom/predicate_type", "instantiates": [ "sssom:Propagatable", "sssom:Versionable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" }, { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The type of the predicate used to map the subject and object entities.", "examples": [ { "value": "owl:AnnotationProperty", "@type": "Example" }, { "value": "owl:ObjectProperty", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/143", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/predicate-types.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/predicate_type", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "entity_type_enum", "@type": "SlotDefinition" }, { "name": "object_id", "definition_uri": "https://w3id.org/sssom/object_id", "description": "The ID of the object of the mapping.", "examples": [ { "value": "HP:0009894", "description": "The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears'.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#annotatedTarget", "http://www.w3.org/2002/07/owl#annotatedTarget" ], "slot_uri": "http://www.w3.org/2002/07/owl#annotatedTarget", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "object_label", "definition_uri": "https://w3id.org/sssom/object_label", "description": "The label of object of the mapping.", "examples": [ { "value": "Thickened ears", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "recommended": true, "@type": "SlotDefinition" }, { "name": "object_category", "definition_uri": "https://w3id.org/sssom/object_category", "description": "The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases.", "examples": [ { "value": "UBERON:0001062", "description": "The CURIE of the Uberon term for \"anatomical entity\".", "@type": "Example" }, { "value": "anatomical entity", "description": "A string, rather than ID, describing the \"anatomical entity\" category. This is possible, but less preferred than using an ID.", "@type": "Example" }, { "value": "biolink:Gene", "description": "The CURIE of the biolink class for genes.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/13", "https://github.com/mapping-commons/sssom/issues/256" ], "slot_uri": "https://w3id.org/sssom/object_category", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_justification", "definition_uri": "https://w3id.org/sssom/mapping_justification", "description": "A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable.", "examples": [ { "value": "semapv:LexicalMatching", "@type": "Example" }, { "value": "semapv:ManualMappingCuration", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/semantic-mapping-vocabulary/", "https://www.ebi.ac.uk/ols4/ontologies/semapv" ], "slot_uri": "https://w3id.org/sssom/mapping_justification", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "required": true, "pattern": "^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining|MappingInversion|StructuralMatching|InstanceBasedMatching|BackgroundKnowledgeBasedMatching)$", "any_of": [ { "equals_string": "semapv:LexicalMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:LogicalReasoning", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:CompositeMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:UnspecifiedMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:SemanticSimilarityThresholdMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:LexicalSimilarityThresholdMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:MappingChaining", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:MappingReview", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:ManualMappingCuration", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:MappingInversion", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:StructuralMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:InstanceBasedMatching", "@type": "AnonymousSlotExpression" }, { "equals_string": "semapv:BackgroundKnowledgeBasedMatching", "@type": "AnonymousSlotExpression" } ], "@type": "SlotDefinition" }, { "name": "object_type", "definition_uri": "https://w3id.org/sssom/object_type", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The type of entity that is being mapped.", "examples": [ { "value": "owl:Class", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_type", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "entity_type_enum", "@type": "SlotDefinition" }, { "name": "mapping_set_id", "definition_uri": "https://w3id.org/sssom/mapping_set_id", "description": "A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv", "description": "A persistent URI pointing to the latest version of the Mondo - NCIT mapping in the Mondo namespace.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_set_id", "owner": "MappingSetReference", "domain_of": [ "MappingSet", "MappingSetReference" ], "range": "NonRelativeURI", "required": true, "@type": "SlotDefinition" }, { "name": "mapping_set_version", "definition_uri": "https://w3id.org/sssom/mapping_set_version", "description": "A version string for the mapping.", "examples": [ { "value": "2020-01-01", "description": "A date-based version that indicates that the mapping was published on the 1st January in 2021.", "@type": "Example" }, { "value": "1.2.1", "description": "(A semantic version tag that indicates that this is the 1st major, 2nd minor version, patch 1 (https://semver.org/).)", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2002/07/owl#versionInfo" ], "slot_uri": "http://www.w3.org/2002/07/owl#versionInfo", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_group", "definition_uri": "https://w3id.org/sssom/mapping_set_group", "description": "Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_set_group", "owner": "MappingSetReference", "domain_of": [ "MappingSetReference" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_title", "definition_uri": "https://w3id.org/sssom/mapping_set_title", "description": "The display name of a mapping set.", "examples": [ { "value": "The Mondo-OMIM mappings by Monarch Initiative.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/title" ], "slot_uri": "http://purl.org/dc/terms/title", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_description", "definition_uri": "https://w3id.org/sssom/mapping_set_description", "description": "A description of the mapping set.", "examples": [ { "value": "This mapping set was produced to integrate human and mouse phenotype data at the IMPC. It is primarily used for making mouse phenotypes searchable by human synonyms at https://mousephenotype.org/.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/description" ], "slot_uri": "http://purl.org/dc/terms/description", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_confidence", "definition_uri": "https://w3id.org/sssom/mapping_set_confidence", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "Mapping-set level confidence is assigned by the creator of the mapping set to indicate their overall confidence in the correctness (i.e., precision) of mappings in the mapping set. Mapping set confidence is intended to be used in cases were the creator wants to express an overall confidence into the agent that curated the individual mappings, for example a lexical matching tool, or a group of students.\nWhen not explicitly specified, confidence estimation algorithms should consider the mapping set confidence to be 1.0 by default.", "examples": [ { "value": "0.95", "description": "A confidence score of 0.95, indicating 95% confidence that the mappings in the mapping set are correct.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model", "https://github.com/mapping-commons/sssom/issues/438", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/mapping_set_confidence.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/mapping_set_confidence", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "creator_id", "definition_uri": "https://w3id.org/sssom/creator_id", "description": "Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "examples": [ { "value": "orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165", "description": "The ORCID of the (multiple) creators of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `creator_id` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "orcid:0000-0002-7356-1779", "description": "The ORCID of the creator of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/creator" ], "slot_uri": "http://purl.org/dc/terms/creator", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "creator_label", "definition_uri": "https://w3id.org/sssom/creator_label", "description": "A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id.", "examples": [ { "value": "Nicolas Matentzoglu|Chris Mungall", "description": "The human-readable names of the (multiple) creators of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `creator_label` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "Nicolas Matentzoglu", "description": "The human-readable name of the creator of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/creator_label", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "author_id", "definition_uri": "https://w3id.org/sssom/author_id", "description": "Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "examples": [ { "value": "orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165", "description": "The ORCID of the (multiple) authors of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `author_id` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "orcid:0000-0002-7356-1779", "description": "The ORCID of the author of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/pav/authoredBy" ], "slot_uri": "http://purl.org/pav/authoredBy", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "author_label", "definition_uri": "https://w3id.org/sssom/author_label", "description": "A string representing the author of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in author_id) for that author. It is not expected that there should be any link between author_id and author_label; in particular, author_label is not intended to provide a human-friendly version of an identifier in author_id.", "examples": [ { "value": "Nicolas Matentzoglu|Chris Mungall", "description": "The human-readable names of the (multiple) authors of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `author_label` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "Nicolas Matentzoglu", "description": "The human-readable name of the author of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/author_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "reviewer_id", "definition_uri": "https://w3id.org/sssom/reviewer_id", "description": "Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs.", "examples": [ { "value": "orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165", "description": "The ORCID of the (multiple) reviewers of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `reviewer_id` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "orcid:0000-0002-7356-1779", "description": "The ORCID of the reviewer of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/reviewer_id", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "reviewer_label", "definition_uri": "https://w3id.org/sssom/reviewer_label", "description": "A string representing the reviewer of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in reviewer_id) for that reviewer. It is not expected that there should be any link between reviewer_id and reviewer_label; in particular, reviewer_label is not intended to provide a human-friendly version of an identifier in reviewer_id.", "examples": [ { "value": "Nicolas Matentzoglu|Chris Mungall", "description": "The human-readable names of the (multiple) reviewers of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `reviewer_label` are represented as single strings containing `|`-separated values.", "@type": "Example" }, { "value": "Nicolas Matentzoglu", "description": "The human-readable name of the reviewer of the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/reviewer_label", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "license", "definition_uri": "https://w3id.org/sssom/license", "description": "A url to the license of the mapping. In absence of a license we assume no license.", "examples": [ { "value": "https://creativecommons.org/licenses/by/4.0/", "description": "The URI of the Creative Commons Attribution 4.0 International license.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/license" ], "slot_uri": "http://purl.org/dc/terms/license", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "subject_source", "definition_uri": "https://w3id.org/sssom/subject_source", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "URI of vocabulary or identifier source for the subject.", "examples": [ { "value": "obo:mondo.owl", "description": "A persistent OBO CURIE pointing to the latest version of the Mondo ontology.", "@type": "Example" }, { "value": "wikidata:Q7876491", "description": "A Wikidata identifier for the Uberon ontology resource.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_source", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "subject_source_version", "definition_uri": "https://w3id.org/sssom/subject_source_version", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Version IRI or version string of the source of the subject term.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl", "description": "A persistent Version IRI pointing to the Mondo version '2021-01-30'", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_source_version", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "object_source", "definition_uri": "https://w3id.org/sssom/object_source", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "URI of vocabulary or identifier source for the object.", "examples": [ { "value": "obo:mondo.owl", "description": "A persistent OBO CURIE pointing to the latest version of the Mondo ontology.", "@type": "Example" }, { "value": "wikidata:Q7876491", "description": "A Wikidata identifier for the Uberon ontology resource.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_source", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "object_source_version", "definition_uri": "https://w3id.org/sssom/object_source_version", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Version IRI or version string of the source of the object term.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl", "description": "A persistent Version IRI pointing to the Mondo version '2021-01-30'", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_source_version", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_provider", "definition_uri": "https://w3id.org/sssom/mapping_provider", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived.", "examples": [ { "value": "https://www.ohdsi.org/", "description": "A URL pointing to the Observational Health Data Sciences and Informatics initiative.", "@type": "Example" }, { "value": "https://monarchinitiative.org/", "description": "A URL pointing to the Monarch Initiative Resource.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_provider", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "mapping_set_source", "definition_uri": "https://w3id.org/sssom/mapping_set_source", "description": "A mapping set or set of mapping set that was used to derive the mapping set.", "examples": [ { "value": "http://purl.obolibrary.org/obo/mondo/mappings/2022-05-20/mondo_exactmatch_ncit.sssom.tsv", "description": "A persistent, ideally versioned, link to the mapping set from which the current mapping set is derived.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/ns/prov#wasDerivedFrom" ], "slot_uri": "http://www.w3.org/ns/prov#wasDerivedFrom", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "NonRelativeURI", "multivalued": true, "@type": "SlotDefinition" }, { "name": "mapping_source", "definition_uri": "https://w3id.org/sssom/mapping_source", "description": "The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple mapping sets or deriving one mapping set from another.", "examples": [ { "value": "MONDO_MAPPINGS:mondo_exactmatch_ncit.sssom.tsv", "description": "A reference to the mapping set that originally contained this mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_source", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "mapping_cardinality", "definition_uri": "https://w3id.org/sssom/mapping_cardinality", "description": "A value indicating whether the subject (respectively object) of this mapping record is present in other records involving a different object (respectively subject), within the subset of records defined by the cardinality_scope slot (or within the entire mapping set if cardinality_scope is undefined). Note that this is a convenience field, whose values can always be derived from the mapping set.", "examples": [ { "value": "1:1", "description": "A one-to-one mapping. There are no other records in which the same subject is mapped to a different object, and no other records in which the same object is mapped to a different subject.", "@type": "Example" }, { "value": "1:n", "description": "A one-to-many mapping. There are other records in which the same subject is mapped to at least one different object than the object present in this record; there are no other records in which the object is mapped to a different subject.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-with-unmapped-entities.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-empty.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/mapping_cardinality", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "mapping_cardinality_enum", "@type": "SlotDefinition" }, { "name": "cardinality_scope", "definition_uri": "https://w3id.org/sssom/cardinality_scope", "instantiates": [ "sssom:Propagatable", "sssom:Versionable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" }, { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined.", "examples": [ { "value": "predicate_id", "description": "Indicates that mapping_cardinality is computed relatively to all mappings that have the same predicate.", "@type": "Example" }, { "value": "predicate_id|object_source", "description": "Indicates that mapping_cardinality is computed relatively to all mappings that have the same predicate and the same object source. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots like `cardinality_scope` are represented as a single string containing `|`-separated values.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/467", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-predicate.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-predicate+object_source.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/cardinality_scope", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "mapping_tool", "definition_uri": "https://w3id.org/sssom/mapping_tool", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference.", "examples": [ { "value": "https://github.com/AgreementMakerLight/AML-Project", "description": "A URL pointing to the AgreementMakerLight project.", "@type": "Example" }, { "value": "AgreementMakerLight", "description": "A string (name) denoting the AgreementMakerLight project.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_tool", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_tool_id", "definition_uri": "https://w3id.org/sssom/mapping_tool_id", "instantiates": [ "sssom:Propagatable", "sssom:Versionable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" }, { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The ID (entity reference) of the tool or algorithm that was used to generate the mapping.", "examples": [ { "value": "wikidata:Q58057366", "description": "A wikidata PURL identifying the AgreementMakerLight project.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/blob/master/examples/schema/mapping_tool_id.sssom.tsv", "https://github.com/mapping-commons/sssom/issues/449" ], "slot_uri": "https://w3id.org/sssom/mapping_tool_id", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "mapping_tool_version", "definition_uri": "https://w3id.org/sssom/mapping_tool_version", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Version string that denotes the version of the mapping tool used.", "examples": [ { "value": "v3.2", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/mapping_tool_version", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_date", "definition_uri": "https://w3id.org/sssom/mapping_date", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file.", "examples": [ { "value": "2021-01-01", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/created" ], "slot_uri": "http://purl.org/dc/terms/created", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "date", "@type": "SlotDefinition" }, { "name": "publication_date", "definition_uri": "https://w3id.org/sssom/publication_date", "description": "The date the mapping was published. This is different from the date the mapping was asserted.", "examples": [ { "value": "2021-01-01", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/issued" ], "slot_uri": "http://purl.org/dc/terms/issued", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "date", "@type": "SlotDefinition" }, { "name": "review_date", "definition_uri": "https://w3id.org/sssom/review_date", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The date the mapping was reviewed. This is different from the date the mapping was asserted and published. If this field is used in a mapping, reviewer_id and/or reviewer_label MUST also be be set.", "examples": [ { "value": "2021-01-01", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/511", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/review_date.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/review_date", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "date", "@type": "SlotDefinition" }, { "name": "confidence", "definition_uri": "https://w3id.org/sssom/confidence", "description": "A value assigned by the creator of the mapping to denote the creator's confidence or estimated probability that the mapping record is correct. A value of 1.0 means the creator has full confidence in the correctness of the mapping record, while a value of 0.0 means the creator is fully unsure whether the mapping record is correct or not.\nWhen not explicitly specified, confidence estimation algorithms should consider the mapping confidence to be 1.0 by default.", "examples": [ { "value": "0.95", "description": "A confidence score of 0.95, indicating 95% confidence.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model" ], "slot_uri": "https://w3id.org/sssom/confidence", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "reviewer_agreement", "definition_uri": "https://w3id.org/sssom/reviewer_agreement", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A value assigned by the reviewer of the mapping to denote their confidence that the mapping record is correct. A value of 1.0 means the reviewer fully agrees with the mapping record. A value of -1.0 means the reviewer fully disagrees with the mapping record. A value of 0.0 means the reviewer is not sure whether the mapping record is correct or not.\nWhen not explicitly specified, confidence estimation algorithms should consider the reviewer agreement to be 1.0 by default.", "examples": [ { "value": "1.0", "description": "A reviewer agreement of 1.0 denotes that the reviewer considers the mapping record to be correct with full confidence", "@type": "Example" }, { "value": "-1.0", "description": "A reviewer agreement of -1.0 denotes that the reviewer considers the mapping record to be incorrect with full confidence", "@type": "Example" }, { "value": "0.0", "description": "A reviewer agreement of 0.0 denotes that the reviewer is not sure whether the mapping record is correct or not.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/confidence-model", "https://github.com/mapping-commons/sssom/issues/510", "https://github.com/mapping-commons/sssom/pull/519" ], "slot_uri": "https://w3id.org/sssom/reviewer_agreement", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "double", "minimum_value": -1.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "subject_match_field", "definition_uri": "https://w3id.org/sssom/subject_match_field", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "examples": [ { "value": "rdfs:label", "description": "The RDFS label property (rdfs:label) was used to match the subject.", "@type": "Example" }, { "value": "skos:prefLabel", "description": "The SKOS preferred label property (skos:prefLabel) was used to match the subject.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/mapping-justifications/#lexical-matching", "https://github.com/mapping-commons/sssom/issues/413" ], "slot_uri": "https://w3id.org/sssom/subject_match_field", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "object_match_field", "definition_uri": "https://w3id.org/sssom/object_match_field", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section.", "examples": [ { "value": "rdfs:label", "description": "The RDFS label property (rdfs:label) was used to match the object.", "@type": "Example" }, { "value": "skos:prefLabel", "description": "The SKOS preferred label property (skos:prefLabel) was used to match the object.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://mapping-commons.github.io/sssom/mapping-justifications/#lexical-matching", "https://github.com/mapping-commons/sssom/issues/413" ], "slot_uri": "https://w3id.org/sssom/object_match_field", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "match_string", "definition_uri": "https://w3id.org/sssom/match_string", "description": "String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots.", "examples": [ { "value": "gala", "description": "The 'gala' string was matched for both subject and object.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/match_string", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "subject_preprocessing", "definition_uri": "https://w3id.org/sssom/subject_preprocessing", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "examples": [ { "value": "semapv:Stemming", "@type": "Example" }, { "value": "semapv:StopWordRemoval", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/subject_preprocessing", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "object_preprocessing", "definition_uri": "https://w3id.org/sssom/object_preprocessing", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows.", "examples": [ { "value": "semapv:Stemming", "@type": "Example" }, { "value": "semapv:StopWordRemoval", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/object_preprocessing", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "curation_rule", "definition_uri": "https://w3id.org/sssom/curation_rule", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule.", "examples": [ { "value": "DISEASE_MAPPING_COMMONS_RULES:MPR2", "description": "A reference to the Disease Mapping Commons rule with the ID MPR2.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule-propagated.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/curation_rule", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "curation_rule_text", "definition_uri": "https://w3id.org/sssom/curation_rule_text", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider.", "examples": [ { "value": "The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality.", "@type": "Example" }, { "value": "The two diseases are used synonymous in the medical literature.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text-propagated.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/curation_rule_text", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "multivalued": true, "@type": "SlotDefinition" }, { "name": "similarity_score", "definition_uri": "https://w3id.org/sssom/similarity_score", "description": "A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm.", "examples": [ { "value": "0.95", "description": "A similarity score of 0.95, indicating 95% similarity.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/385", "https://github.com/mapping-commons/sssom/pull/386", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/similarity_score", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "double", "minimum_value": 0.0, "maximum_value": 1.0, "@type": "SlotDefinition" }, { "name": "similarity_measure", "definition_uri": "https://w3id.org/sssom/similarity_measure", "instantiates": [ "sssom:Propagatable" ], "annotations": [ { "tag": "propagated", "value": true, "@type": "Annotation" } ], "description": "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.", "examples": [ { "value": "https://www.wikidata.org/entity/Q865360", "description": "the Wikidata IRI for the Jaccard index measure).", "@type": "Example" }, { "value": "wikidata:Q865360", "description": "the Wikidata CURIE for the Jaccard index measure).", "@type": "Example" }, { "value": "Levenshtein distance", "description": "a score to measure the distance between two character sequences).", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/385", "https://github.com/mapping-commons/sssom/pull/386", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/similarity_measure", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "issue_tracker_item", "definition_uri": "https://w3id.org/sssom/issue_tracker_item", "description": "The issue tracker item discussing this mapping.", "examples": [ { "value": "SSSOM_GITHUB_ISSUE:166", "description": "A URL resolving to an issue discussing a new SSSOM element request", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/78", "https://github.com/mapping-commons/sssom/pull/259", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker_item.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/issue_tracker_item", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "issue_tracker", "definition_uri": "https://w3id.org/sssom/issue_tracker", "description": "A URL location of the issue tracker for this entity.", "examples": [ { "value": "https://github.com/mapping-commons/mh_mapping_initiative/issues", "description": "A URL resolving to the issue tracker of the Mouse-Human mapping initiative", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/78", "https://github.com/mapping-commons/sssom/pull/259", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/issue_tracker", "owner": "MappingRegistry", "domain_of": [ "MappingSet", "MappingRegistry" ], "range": "NonRelativeURI", "@type": "SlotDefinition" }, { "name": "see_also", "definition_uri": "https://w3id.org/sssom/see_also", "description": "A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment", "examples": [ { "value": "https://github.com/mapping-commons/mh_mapping_initiative/pull/41", "description": "A URL pointing to the pull request that introduced the mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/422" ], "mappings": [ "http://www.w3.org/2000/01/rdf-schema#seeAlso" ], "slot_uri": "http://www.w3.org/2000/01/rdf-schema#seeAlso", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "NonRelativeURI", "multivalued": true, "@type": "SlotDefinition" }, { "name": "other", "definition_uri": "https://w3id.org/sssom/other", "description": "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/other", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "comment", "definition_uri": "https://w3id.org/sssom/comment", "description": "Free text field containing either curator notes or text generated by tool providing additional informative information.", "examples": [ { "value": "This mapping is weird in that the hierarchical position of the two terms is very different.", "description": "A comment explaining a mapping authors reservation on a mapping.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://www.w3.org/2000/01/rdf-schema#comment" ], "slot_uri": "http://www.w3.org/2000/01/rdf-schema#comment", "owner": "Mapping", "domain_of": [ "MappingSet", "Mapping" ], "range": "string", "@type": "SlotDefinition" }, { "name": "extension_definitions", "definition_uri": "https://w3id.org/sssom/extension_definitions", "description": "A list that defines the extension slots used in the mapping set.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/328", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/extension_definitions", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "range": "ExtensionDefinition", "multivalued": true, "inlined": true, "inlined_as_list": true, "@type": "SlotDefinition" }, { "name": "record_id", "definition_uri": "https://w3id.org/sssom/record_id", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A unique identifier for a mapping record, that is for an instance of the Mapping class (in the SSSOM/TSV serialisation, this corresponds to an individual row after propagation is applied). This slot is intended to uniquely identify one such record within a mapping set and may for example act as the resource identifier for the record when it is serialised into RDF. This slot MUST NOT be used to \u201cgroup\u201d several records together to indicate that they pertain to a single mapping (for example, that they represent different versions of the same mapping), by assigning the same ID to several records. When it is used, every record within a set MUST have a unique, non-empty value. The identifier MUST be a URI; beyond that, its format is unconstrained and the identifier MUST be treated as an opaque string.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/359", "https://github.com/mapping-commons/blob/master/examples/schema/record-ids.sssom.tsv" ], "slot_uri": "https://w3id.org/sssom/record_id", "owner": "Mapping", "domain_of": [ "Mapping" ], "range": "EntityReference", "@type": "SlotDefinition" }, { "name": "extensionDefinition__slot_name", "description": "The name of the extension slot.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/slot_name", "alias": "slot_name", "owner": "ExtensionDefinition", "domain_of": [ "ExtensionDefinition" ], "range": "ncname", "required": true, "@type": "SlotDefinition" }, { "name": "extensionDefinition__property", "description": "The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous).", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/property", "alias": "property", "owner": "ExtensionDefinition", "domain_of": [ "ExtensionDefinition" ], "range": "uriorcurie", "@type": "SlotDefinition" }, { "name": "extensionDefinition__type_hint", "description": "Expected type of the values of the extension slot.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/type_hint", "alias": "type_hint", "owner": "ExtensionDefinition", "domain_of": [ "ExtensionDefinition" ], "range": "uriorcurie", "@type": "SlotDefinition" }, { "name": "propagatable__propagated", "description": "Indicates whether a slot can be propagated from a mapping down to individual mappings.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/propagated", "alias": "propagated", "owner": "Propagatable", "domain_of": [ "Propagatable" ], "range": "boolean", "@type": "SlotDefinition" }, { "name": "versionable__added_in", "description": "The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0.", "from_schema": "https://w3id.org/sssom/schema/", "slot_uri": "https://w3id.org/sssom/added_in", "alias": "added_in", "owner": "Versionable", "domain_of": [ "Versionable" ], "range": "sssom_version_enum", "@type": "SlotDefinition" }, { "name": "mapping_set_license", "definition_uri": "https://w3id.org/sssom/license", "description": "A url to the license of the mapping. In absence of a license we assume no license.", "examples": [ { "value": "https://creativecommons.org/licenses/by/4.0/", "description": "The URI of the Creative Commons Attribution 4.0 International license.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "http://purl.org/dc/terms/license" ], "is_a": "license", "domain": "MappingSet", "slot_uri": "http://purl.org/dc/terms/license", "alias": "license", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "license", "range": "NonRelativeURI", "required": true, "@type": "SlotDefinition" }, { "name": "mapping_set_similarity_measure", "definition_uri": "https://w3id.org/sssom/similarity_measure", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified.", "examples": [ { "value": "https://www.wikidata.org/entity/Q865360", "description": "the Wikidata IRI for the Jaccard index measure).", "@type": "Example" }, { "value": "wikidata:Q865360", "description": "the Wikidata CURIE for the Jaccard index measure).", "@type": "Example" }, { "value": "Levenshtein distance", "description": "a score to measure the distance between two character sequences).", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/385", "https://github.com/mapping-commons/sssom/pull/386", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv" ], "is_a": "similarity_measure", "domain": "MappingSet", "slot_uri": "https://w3id.org/sssom/similarity_measure", "alias": "similarity_measure", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "similarity_measure", "range": "string", "@type": "SlotDefinition" }, { "name": "mapping_set_curation_rule", "definition_uri": "https://w3id.org/sssom/curation_rule", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule.", "examples": [ { "value": "DISEASE_MAPPING_COMMONS_RULES:MPR2", "description": "A reference to the Disease Mapping Commons rule with the ID MPR2.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule-propagated.sssom.tsv" ], "is_a": "curation_rule", "domain": "MappingSet", "slot_uri": "https://w3id.org/sssom/curation_rule", "alias": "curation_rule", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "curation_rule", "range": "EntityReference", "multivalued": true, "@type": "SlotDefinition" }, { "name": "mapping_set_curation_rule_text", "definition_uri": "https://w3id.org/sssom/curation_rule_text", "instantiates": [ "sssom:Versionable" ], "annotations": [ { "tag": "added_in", "value": "1.1", "@type": "Annotation" } ], "description": "A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider.", "examples": [ { "value": "The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality.", "@type": "Example" }, { "value": "The two diseases are used synonymous in the medical literature.", "@type": "Example" } ], "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/166", "https://github.com/mapping-commons/sssom/pull/258", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text.sssom.tsv", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text-propagated.sssom.tsv" ], "is_a": "curation_rule_text", "domain": "MappingSet", "slot_uri": "https://w3id.org/sssom/curation_rule_text", "alias": "curation_rule_text", "owner": "MappingSet", "domain_of": [ "MappingSet" ], "is_usage_slot": true, "usage_slot_name": "curation_rule_text", "range": "string", "multivalued": true, "@type": "SlotDefinition" } ], "classes": [ { "name": "MappingSet", "definition_uri": "https://w3id.org/sssom/MappingSet", "description": "Represents a set of mappings.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "sssom_version", "curie_map", "mappings", "mapping_set_id", "mapping_set_version", "mapping_set_source", "mapping_set_title", "mapping_set_description", "mapping_set_confidence", "creator_id", "creator_label", "mapping_set_license", "subject_type", "subject_source", "subject_source_version", "object_type", "object_source", "object_source_version", "predicate_type", "mapping_provider", "cardinality_scope", "mapping_tool", "mapping_tool_id", "mapping_tool_version", "mapping_date", "publication_date", "subject_match_field", "object_match_field", "subject_preprocessing", "object_preprocessing", "mapping_set_similarity_measure", "mapping_set_curation_rule", "mapping_set_curation_rule_text", "see_also", "issue_tracker", "other", "comment", "extension_definitions" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/MappingSet", "@type": "ClassDefinition" }, { "name": "Mapping", "definition_uri": "https://w3id.org/sssom/Mapping", "description": "Represents an individual mapping between a pair of entities.", "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "owl:Axiom" ], "slots": [ "record_id", "subject_id", "subject_label", "subject_category", "predicate_id", "predicate_label", "predicate_modifier", "object_id", "object_label", "object_category", "mapping_justification", "author_id", "author_label", "reviewer_id", "reviewer_label", "creator_id", "creator_label", "license", "subject_type", "subject_source", "subject_source_version", "object_type", "object_source", "object_source_version", "predicate_type", "mapping_provider", "mapping_source", "mapping_cardinality", "cardinality_scope", "mapping_tool", "mapping_tool_id", "mapping_tool_version", "mapping_date", "publication_date", "review_date", "confidence", "reviewer_agreement", "curation_rule", "curation_rule_text", "subject_match_field", "object_match_field", "match_string", "subject_preprocessing", "object_preprocessing", "similarity_score", "similarity_measure", "see_also", "issue_tracker_item", "other", "comment" ], "slot_usage": {}, "class_uri": "http://www.w3.org/2002/07/owl#Axiom", "unique_keys": [ { "unique_key_name": "record_identifier", "unique_key_slots": [ "record_id" ], "description": "Each mapping within a mapping set MAY be identified by a unique, opaque record identifier. This slot MUST be used consistently, in that either all mappings in the set have a such a record identifier, or none of them have one. The behaviour when a set contains both mappings with a record identifier and mappings without a record identifier is unspecified. The behaviour when two mappings have the same record identifier is unspecified.", "@type": "UniqueKey" } ], "rules": [ { "preconditions": { "slot_conditions": [ { "name": "subject_type", "equals_string": "rdfs literal", "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "subject_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "subject_type", "none_of": [ { "equals_string": "rdfs literal", "@type": "AnonymousSlotExpression" } ], "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "subject_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "object_type", "equals_string": "rdfs literal", "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "object_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "object_type", "none_of": [ { "equals_string": "rdfs literal", "@type": "AnonymousSlotExpression" } ], "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "slot_conditions": [ { "name": "object_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "review_date", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "any_of": [ { "slot_conditions": [ { "name": "reviewer_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, { "slot_conditions": [ { "name": "reviewer_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" } ], "@type": "AnonymousClassExpression" }, "description": "If a review date is provided, then at at least one of reviewer_id or reviewer_label must also be provided", "@type": "ClassRule" }, { "preconditions": { "slot_conditions": [ { "name": "reviewer_agreement", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, "postconditions": { "any_of": [ { "slot_conditions": [ { "name": "reviewer_id", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" }, { "slot_conditions": [ { "name": "reviewer_label", "required": true, "@type": "SlotDefinition" } ], "@type": "AnonymousClassExpression" } ], "@type": "AnonymousClassExpression" }, "description": "If a reviewer agreement value is provided, then at at least one of reviewer_id or reviewer_label must also be provided", "@type": "ClassRule" } ], "@type": "ClassDefinition" }, { "name": "MappingRegistry", "definition_uri": "https://w3id.org/sssom/MappingRegistry", "description": "A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "mapping_registry_id", "mapping_registry_title", "mapping_registry_description", "imports", "mapping_set_references", "documentation", "homepage", "issue_tracker" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/MappingRegistry", "@type": "ClassDefinition" }, { "name": "MappingSetReference", "definition_uri": "https://w3id.org/sssom/MappingSetReference", "description": "A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "mapping_set_id", "mirror_from", "registry_confidence", "mapping_set_group", "last_updated", "local_name" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/MappingSetReference", "@type": "ClassDefinition" }, { "name": "Prefix", "definition_uri": "https://w3id.org/sssom/Prefix", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "prefix_name", "prefix_url" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/Prefix", "@type": "ClassDefinition" }, { "name": "ExtensionDefinition", "definition_uri": "https://w3id.org/sssom/ExtensionDefinition", "description": "A definition of an extension (non-standard) slot.", "from_schema": "https://w3id.org/sssom/schema/", "slots": [ "extensionDefinition__slot_name", "extensionDefinition__property", "extensionDefinition__type_hint" ], "slot_usage": {}, "attributes": [ { "name": "slot_name", "description": "The name of the extension slot.", "range": "ncname", "required": true, "@type": "SlotDefinition" }, { "name": "property", "description": "The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous).", "range": "uriorcurie", "@type": "SlotDefinition" }, { "name": "type_hint", "description": "Expected type of the values of the extension slot.", "range": "uriorcurie", "@type": "SlotDefinition" } ], "class_uri": "https://w3id.org/sssom/ExtensionDefinition", "@type": "ClassDefinition" }, { "name": "Propagatable", "definition_uri": "https://w3id.org/sssom/Propagatable", "description": "Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/305" ], "mappings": [ "sssom:Propagatable" ], "slots": [ "propagatable__propagated" ], "slot_usage": {}, "attributes": [ { "name": "propagated", "description": "Indicates whether a slot can be propagated from a mapping down to individual mappings.", "range": "boolean", "@type": "SlotDefinition" } ], "class_uri": "https://w3id.org/sssom/Propagatable", "@type": "ClassDefinition" }, { "name": "Versionable", "definition_uri": "https://w3id.org/sssom/Versionable", "description": "Metamodel extension class to manage slots that may not exist in all versions of the model.", "from_schema": "https://w3id.org/sssom/schema/", "mappings": [ "sssom:Versionable" ], "slots": [ "versionable__added_in" ], "slot_usage": {}, "attributes": [ { "name": "added_in", "description": "The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0.", "range": "sssom_version_enum", "@type": "SlotDefinition" } ], "class_uri": "https://w3id.org/sssom/Versionable", "@type": "ClassDefinition" }, { "name": "NoTermFound", "definition_uri": "https://w3id.org/sssom/NoTermFound", "description": "sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found.", "from_schema": "https://w3id.org/sssom/schema/", "see_also": [ "https://github.com/mapping-commons/sssom/issues/28", "https://github.com/mapping-commons/sssom/blob/master/examples/schema/no_term_found.sssom.tsv" ], "mappings": [ "sssom:NoTermFound" ], "slot_usage": {}, "class_uri": "https://w3id.org/sssom/NoTermFound", "@type": "ClassDefinition" } ], "metamodel_version": "1.7.0", "source_file": "sssom_schema.yaml", "source_file_date": "2026-04-04T08:06:51", "source_file_size": 58930, "generation_date": "2026-04-08T14:02:56", "@type": "SchemaDefinition", "@context": [ "project/jsonld/sssom_schema.context.jsonld", "https://w3id.org/linkml/types.context.jsonld", { "@base": "https://w3id.org/sssom/" } ] } ================================================ FILE: src/sssom_schema/datamodel/__init__.py ================================================ ================================================ FILE: src/sssom_schema/datamodel/sssom_schema.py ================================================ # Auto generated from sssom_schema.yaml by pythongen.py version: 0.0.1 # Generation date: 2026-04-14T16:00:32 # Schema: sssom # # id: https://w3id.org/sssom/schema/ # description: Datamodel for Simple Standard for Sharing Ontological Mappings (SSSOM) # license: https://creativecommons.org/publicdomain/zero/1.0/ import dataclasses import re from dataclasses import dataclass from datetime import ( date, datetime, time ) from typing import ( Any, ClassVar, Dict, List, Optional, Union ) from jsonasobj2 import ( JsonObj, as_dict ) from linkml_runtime.linkml_model.meta import ( EnumDefinition, PermissibleValue, PvFormulaOptions ) from linkml_runtime.utils.curienamespace import CurieNamespace from linkml_runtime.utils.enumerations import EnumDefinitionImpl from linkml_runtime.utils.formatutils import ( camelcase, sfx, underscore ) from linkml_runtime.utils.metamodelcore import ( bnode, empty_dict, empty_list ) from linkml_runtime.utils.slot import Slot from linkml_runtime.utils.yamlutils import ( YAMLRoot, extended_float, extended_int, extended_str ) from rdflib import ( Namespace, URIRef ) from linkml_runtime.linkml_model.types import Boolean, Date, Double, Ncname, String, Uri, Uriorcurie from linkml_runtime.utils.metamodelcore import Bool, NCName, URI, URIorCURIE, XSDDate metamodel_version = "1.7.0" version = None # Namespaces DCTERMS = CurieNamespace('dcterms', 'http://purl.org/dc/terms/') LINKML = CurieNamespace('linkml', 'https://w3id.org/linkml/') OBOINOWL = CurieNamespace('oboInOwl', 'http://www.geneontology.org/formats/oboInOwl#') OWL = CurieNamespace('owl', 'http://www.w3.org/2002/07/owl#') PAV = CurieNamespace('pav', 'http://purl.org/pav/') PROV = CurieNamespace('prov', 'http://www.w3.org/ns/prov#') RDF = CurieNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') RDFS = CurieNamespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#') SEMAPV = CurieNamespace('semapv', 'https://w3id.org/semapv/vocab/') SKOS = CurieNamespace('skos', 'http://www.w3.org/2004/02/skos/core#') SSSOM = CurieNamespace('sssom', 'https://w3id.org/sssom/') XSD = CurieNamespace('xsd', 'http://www.w3.org/2001/XMLSchema#') DEFAULT_ = SSSOM # Types class EntityReference(Uriorcurie): """ A reference to an entity involved in the mapping. """ type_class_uri = RDFS["Resource"] type_class_curie = "rdfs:Resource" type_name = "EntityReference" type_model_uri = SSSOM.EntityReference class NonRelativeURI(Uri): """ A URI as per RFC 3986, that is a string that matches the production of the "URI" rule defined in Appendix A of that RFC. Contrary to the underlying LinkML type, this specifically excludes _relative URI references_, which do not start with a scheme component. Relative URI references are forbidden because SSSOM has no built-in mechanism to provide the base URI that would be needed to resolve relative URI references into non-relative ones. """ type_class_uri = XSD["anyURI"] type_class_curie = "xsd:anyURI" type_name = "NonRelativeURI" type_model_uri = SSSOM.NonRelativeURI # Class references class PrefixPrefixName(NCName): pass @dataclass(repr=False) class MappingSet(YAMLRoot): """ Represents a set of mappings. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["MappingSet"] class_class_curie: ClassVar[str] = "sssom:MappingSet" class_name: ClassVar[str] = "mapping set" class_model_uri: ClassVar[URIRef] = SSSOM.MappingSet mapping_set_id: Union[str, NonRelativeURI] = None license: Union[str, NonRelativeURI] = None sssom_version: Optional[Union[str, "SssomVersionEnum"]] = None curie_map: Optional[Union[dict[Union[str, PrefixPrefixName], Union[dict, "Prefix"]], list[Union[dict, "Prefix"]]]] = empty_dict() mappings: Optional[Union[Union[dict, "Mapping"], list[Union[dict, "Mapping"]]]] = empty_list() mapping_set_version: Optional[str] = None mapping_set_source: Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]] = empty_list() mapping_set_title: Optional[str] = None mapping_set_description: Optional[str] = None mapping_set_confidence: Optional[float] = None creator_id: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() creator_label: Optional[Union[str, list[str]]] = empty_list() subject_type: Optional[Union[str, "EntityTypeEnum"]] = None subject_source: Optional[Union[str, EntityReference]] = None subject_source_version: Optional[str] = None object_type: Optional[Union[str, "EntityTypeEnum"]] = None object_source: Optional[Union[str, EntityReference]] = None object_source_version: Optional[str] = None predicate_type: Optional[Union[str, "EntityTypeEnum"]] = None mapping_provider: Optional[Union[str, NonRelativeURI]] = None cardinality_scope: Optional[Union[str, list[str]]] = empty_list() mapping_tool: Optional[str] = None mapping_tool_id: Optional[Union[str, EntityReference]] = None mapping_tool_version: Optional[str] = None mapping_date: Optional[Union[str, XSDDate]] = None publication_date: Optional[Union[str, XSDDate]] = None subject_match_field: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() object_match_field: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() subject_preprocessing: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() object_preprocessing: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() similarity_measure: Optional[str] = None curation_rule: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() curation_rule_text: Optional[Union[str, list[str]]] = empty_list() see_also: Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]] = empty_list() issue_tracker: Optional[Union[str, NonRelativeURI]] = None other: Optional[str] = None comment: Optional[str] = None extension_definitions: Optional[Union[Union[dict, "ExtensionDefinition"], list[Union[dict, "ExtensionDefinition"]]]] = empty_list() def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.mapping_set_id): self.MissingRequiredField("mapping_set_id") if not isinstance(self.mapping_set_id, NonRelativeURI): self.mapping_set_id = NonRelativeURI(self.mapping_set_id) if self._is_empty(self.license): self.MissingRequiredField("license") if not isinstance(self.license, NonRelativeURI): self.license = NonRelativeURI(self.license) if self.sssom_version is not None and not isinstance(self.sssom_version, SssomVersionEnum): self.sssom_version = SssomVersionEnum(self.sssom_version) self._normalize_inlined_as_dict(slot_name="curie_map", slot_type=Prefix, key_name="prefix_name", keyed=True) self._normalize_inlined_as_list(slot_name="mappings", slot_type=Mapping, key_name="predicate_id", keyed=False) if self.mapping_set_version is not None and not isinstance(self.mapping_set_version, str): self.mapping_set_version = str(self.mapping_set_version) if not isinstance(self.mapping_set_source, list): self.mapping_set_source = [self.mapping_set_source] if self.mapping_set_source is not None else [] self.mapping_set_source = [v if isinstance(v, NonRelativeURI) else NonRelativeURI(v) for v in self.mapping_set_source] if self.mapping_set_title is not None and not isinstance(self.mapping_set_title, str): self.mapping_set_title = str(self.mapping_set_title) if self.mapping_set_description is not None and not isinstance(self.mapping_set_description, str): self.mapping_set_description = str(self.mapping_set_description) if self.mapping_set_confidence is not None and not isinstance(self.mapping_set_confidence, float): self.mapping_set_confidence = float(self.mapping_set_confidence) if not isinstance(self.creator_id, list): self.creator_id = [self.creator_id] if self.creator_id is not None else [] self.creator_id = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.creator_id] if not isinstance(self.creator_label, list): self.creator_label = [self.creator_label] if self.creator_label is not None else [] self.creator_label = [v if isinstance(v, str) else str(v) for v in self.creator_label] if self.subject_type is not None and not isinstance(self.subject_type, EntityTypeEnum): self.subject_type = EntityTypeEnum(self.subject_type) if self.subject_source is not None and not isinstance(self.subject_source, EntityReference): self.subject_source = EntityReference(self.subject_source) if self.subject_source_version is not None and not isinstance(self.subject_source_version, str): self.subject_source_version = str(self.subject_source_version) if self.object_type is not None and not isinstance(self.object_type, EntityTypeEnum): self.object_type = EntityTypeEnum(self.object_type) if self.object_source is not None and not isinstance(self.object_source, EntityReference): self.object_source = EntityReference(self.object_source) if self.object_source_version is not None and not isinstance(self.object_source_version, str): self.object_source_version = str(self.object_source_version) if self.predicate_type is not None and not isinstance(self.predicate_type, EntityTypeEnum): self.predicate_type = EntityTypeEnum(self.predicate_type) if self.mapping_provider is not None and not isinstance(self.mapping_provider, NonRelativeURI): self.mapping_provider = NonRelativeURI(self.mapping_provider) if not isinstance(self.cardinality_scope, list): self.cardinality_scope = [self.cardinality_scope] if self.cardinality_scope is not None else [] self.cardinality_scope = [v if isinstance(v, str) else str(v) for v in self.cardinality_scope] if self.mapping_tool is not None and not isinstance(self.mapping_tool, str): self.mapping_tool = str(self.mapping_tool) if self.mapping_tool_id is not None and not isinstance(self.mapping_tool_id, EntityReference): self.mapping_tool_id = EntityReference(self.mapping_tool_id) if self.mapping_tool_version is not None and not isinstance(self.mapping_tool_version, str): self.mapping_tool_version = str(self.mapping_tool_version) if self.mapping_date is not None and not isinstance(self.mapping_date, XSDDate): self.mapping_date = XSDDate(self.mapping_date) if self.publication_date is not None and not isinstance(self.publication_date, XSDDate): self.publication_date = XSDDate(self.publication_date) if not isinstance(self.subject_match_field, list): self.subject_match_field = [self.subject_match_field] if self.subject_match_field is not None else [] self.subject_match_field = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.subject_match_field] if not isinstance(self.object_match_field, list): self.object_match_field = [self.object_match_field] if self.object_match_field is not None else [] self.object_match_field = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.object_match_field] if not isinstance(self.subject_preprocessing, list): self.subject_preprocessing = [self.subject_preprocessing] if self.subject_preprocessing is not None else [] self.subject_preprocessing = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.subject_preprocessing] if not isinstance(self.object_preprocessing, list): self.object_preprocessing = [self.object_preprocessing] if self.object_preprocessing is not None else [] self.object_preprocessing = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.object_preprocessing] if self.similarity_measure is not None and not isinstance(self.similarity_measure, str): self.similarity_measure = str(self.similarity_measure) if not isinstance(self.curation_rule, list): self.curation_rule = [self.curation_rule] if self.curation_rule is not None else [] self.curation_rule = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.curation_rule] if not isinstance(self.curation_rule_text, list): self.curation_rule_text = [self.curation_rule_text] if self.curation_rule_text is not None else [] self.curation_rule_text = [v if isinstance(v, str) else str(v) for v in self.curation_rule_text] if not isinstance(self.see_also, list): self.see_also = [self.see_also] if self.see_also is not None else [] self.see_also = [v if isinstance(v, NonRelativeURI) else NonRelativeURI(v) for v in self.see_also] if self.issue_tracker is not None and not isinstance(self.issue_tracker, NonRelativeURI): self.issue_tracker = NonRelativeURI(self.issue_tracker) if self.other is not None and not isinstance(self.other, str): self.other = str(self.other) if self.comment is not None and not isinstance(self.comment, str): self.comment = str(self.comment) self._normalize_inlined_as_list(slot_name="extension_definitions", slot_type=ExtensionDefinition, key_name="slot_name", keyed=False) super().__post_init__(**kwargs) @dataclass(repr=False) class Mapping(YAMLRoot): """ Represents an individual mapping between a pair of entities. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = OWL["Axiom"] class_class_curie: ClassVar[str] = "owl:Axiom" class_name: ClassVar[str] = "mapping" class_model_uri: ClassVar[URIRef] = SSSOM.Mapping predicate_id: Union[str, EntityReference] = None mapping_justification: Union[str, EntityReference] = None record_id: Optional[Union[str, EntityReference]] = None subject_id: Optional[Union[str, EntityReference]] = None subject_label: Optional[str] = None subject_category: Optional[str] = None predicate_label: Optional[str] = None predicate_modifier: Optional[Union[str, "PredicateModifierEnum"]] = None object_id: Optional[Union[str, EntityReference]] = None object_label: Optional[str] = None object_category: Optional[str] = None author_id: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() author_label: Optional[Union[str, list[str]]] = empty_list() reviewer_id: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() reviewer_label: Optional[Union[str, list[str]]] = empty_list() creator_id: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() creator_label: Optional[Union[str, list[str]]] = empty_list() license: Optional[Union[str, NonRelativeURI]] = None subject_type: Optional[Union[str, "EntityTypeEnum"]] = None subject_source: Optional[Union[str, EntityReference]] = None subject_source_version: Optional[str] = None object_type: Optional[Union[str, "EntityTypeEnum"]] = None object_source: Optional[Union[str, EntityReference]] = None object_source_version: Optional[str] = None predicate_type: Optional[Union[str, "EntityTypeEnum"]] = None mapping_provider: Optional[Union[str, NonRelativeURI]] = None mapping_source: Optional[Union[str, EntityReference]] = None mapping_cardinality: Optional[Union[str, "MappingCardinalityEnum"]] = None cardinality_scope: Optional[Union[str, list[str]]] = empty_list() mapping_tool: Optional[str] = None mapping_tool_id: Optional[Union[str, EntityReference]] = None mapping_tool_version: Optional[str] = None mapping_date: Optional[Union[str, XSDDate]] = None publication_date: Optional[Union[str, XSDDate]] = None review_date: Optional[Union[str, XSDDate]] = None confidence: Optional[float] = None reviewer_agreement: Optional[float] = None curation_rule: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() curation_rule_text: Optional[Union[str, list[str]]] = empty_list() subject_match_field: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() object_match_field: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() match_string: Optional[Union[str, list[str]]] = empty_list() subject_preprocessing: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() object_preprocessing: Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]] = empty_list() similarity_score: Optional[float] = None similarity_measure: Optional[str] = None see_also: Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]] = empty_list() issue_tracker_item: Optional[Union[str, EntityReference]] = None other: Optional[str] = None comment: Optional[str] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.predicate_id): self.MissingRequiredField("predicate_id") if not isinstance(self.predicate_id, EntityReference): self.predicate_id = EntityReference(self.predicate_id) if self._is_empty(self.mapping_justification): self.MissingRequiredField("mapping_justification") if not isinstance(self.mapping_justification, EntityReference): self.mapping_justification = EntityReference(self.mapping_justification) if self.record_id is not None and not isinstance(self.record_id, EntityReference): self.record_id = EntityReference(self.record_id) if self.subject_id is not None and not isinstance(self.subject_id, EntityReference): self.subject_id = EntityReference(self.subject_id) if self.subject_label is not None and not isinstance(self.subject_label, str): self.subject_label = str(self.subject_label) if self.subject_category is not None and not isinstance(self.subject_category, str): self.subject_category = str(self.subject_category) if self.predicate_label is not None and not isinstance(self.predicate_label, str): self.predicate_label = str(self.predicate_label) if self.predicate_modifier is not None and not isinstance(self.predicate_modifier, PredicateModifierEnum): self.predicate_modifier = PredicateModifierEnum(self.predicate_modifier) if self.object_id is not None and not isinstance(self.object_id, EntityReference): self.object_id = EntityReference(self.object_id) if self.object_label is not None and not isinstance(self.object_label, str): self.object_label = str(self.object_label) if self.object_category is not None and not isinstance(self.object_category, str): self.object_category = str(self.object_category) if not isinstance(self.author_id, list): self.author_id = [self.author_id] if self.author_id is not None else [] self.author_id = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.author_id] if not isinstance(self.author_label, list): self.author_label = [self.author_label] if self.author_label is not None else [] self.author_label = [v if isinstance(v, str) else str(v) for v in self.author_label] if not isinstance(self.reviewer_id, list): self.reviewer_id = [self.reviewer_id] if self.reviewer_id is not None else [] self.reviewer_id = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.reviewer_id] if not isinstance(self.reviewer_label, list): self.reviewer_label = [self.reviewer_label] if self.reviewer_label is not None else [] self.reviewer_label = [v if isinstance(v, str) else str(v) for v in self.reviewer_label] if not isinstance(self.creator_id, list): self.creator_id = [self.creator_id] if self.creator_id is not None else [] self.creator_id = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.creator_id] if not isinstance(self.creator_label, list): self.creator_label = [self.creator_label] if self.creator_label is not None else [] self.creator_label = [v if isinstance(v, str) else str(v) for v in self.creator_label] if self.license is not None and not isinstance(self.license, NonRelativeURI): self.license = NonRelativeURI(self.license) if self.subject_type is not None and not isinstance(self.subject_type, EntityTypeEnum): self.subject_type = EntityTypeEnum(self.subject_type) if self.subject_source is not None and not isinstance(self.subject_source, EntityReference): self.subject_source = EntityReference(self.subject_source) if self.subject_source_version is not None and not isinstance(self.subject_source_version, str): self.subject_source_version = str(self.subject_source_version) if self.object_type is not None and not isinstance(self.object_type, EntityTypeEnum): self.object_type = EntityTypeEnum(self.object_type) if self.object_source is not None and not isinstance(self.object_source, EntityReference): self.object_source = EntityReference(self.object_source) if self.object_source_version is not None and not isinstance(self.object_source_version, str): self.object_source_version = str(self.object_source_version) if self.predicate_type is not None and not isinstance(self.predicate_type, EntityTypeEnum): self.predicate_type = EntityTypeEnum(self.predicate_type) if self.mapping_provider is not None and not isinstance(self.mapping_provider, NonRelativeURI): self.mapping_provider = NonRelativeURI(self.mapping_provider) if self.mapping_source is not None and not isinstance(self.mapping_source, EntityReference): self.mapping_source = EntityReference(self.mapping_source) if self.mapping_cardinality is not None and not isinstance(self.mapping_cardinality, MappingCardinalityEnum): self.mapping_cardinality = MappingCardinalityEnum(self.mapping_cardinality) if not isinstance(self.cardinality_scope, list): self.cardinality_scope = [self.cardinality_scope] if self.cardinality_scope is not None else [] self.cardinality_scope = [v if isinstance(v, str) else str(v) for v in self.cardinality_scope] if self.mapping_tool is not None and not isinstance(self.mapping_tool, str): self.mapping_tool = str(self.mapping_tool) if self.mapping_tool_id is not None and not isinstance(self.mapping_tool_id, EntityReference): self.mapping_tool_id = EntityReference(self.mapping_tool_id) if self.mapping_tool_version is not None and not isinstance(self.mapping_tool_version, str): self.mapping_tool_version = str(self.mapping_tool_version) if self.mapping_date is not None and not isinstance(self.mapping_date, XSDDate): self.mapping_date = XSDDate(self.mapping_date) if self.publication_date is not None and not isinstance(self.publication_date, XSDDate): self.publication_date = XSDDate(self.publication_date) if self.review_date is not None and not isinstance(self.review_date, XSDDate): self.review_date = XSDDate(self.review_date) if self.confidence is not None and not isinstance(self.confidence, float): self.confidence = float(self.confidence) if self.reviewer_agreement is not None and not isinstance(self.reviewer_agreement, float): self.reviewer_agreement = float(self.reviewer_agreement) if not isinstance(self.curation_rule, list): self.curation_rule = [self.curation_rule] if self.curation_rule is not None else [] self.curation_rule = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.curation_rule] if not isinstance(self.curation_rule_text, list): self.curation_rule_text = [self.curation_rule_text] if self.curation_rule_text is not None else [] self.curation_rule_text = [v if isinstance(v, str) else str(v) for v in self.curation_rule_text] if not isinstance(self.subject_match_field, list): self.subject_match_field = [self.subject_match_field] if self.subject_match_field is not None else [] self.subject_match_field = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.subject_match_field] if not isinstance(self.object_match_field, list): self.object_match_field = [self.object_match_field] if self.object_match_field is not None else [] self.object_match_field = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.object_match_field] if not isinstance(self.match_string, list): self.match_string = [self.match_string] if self.match_string is not None else [] self.match_string = [v if isinstance(v, str) else str(v) for v in self.match_string] if not isinstance(self.subject_preprocessing, list): self.subject_preprocessing = [self.subject_preprocessing] if self.subject_preprocessing is not None else [] self.subject_preprocessing = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.subject_preprocessing] if not isinstance(self.object_preprocessing, list): self.object_preprocessing = [self.object_preprocessing] if self.object_preprocessing is not None else [] self.object_preprocessing = [v if isinstance(v, EntityReference) else EntityReference(v) for v in self.object_preprocessing] if self.similarity_score is not None and not isinstance(self.similarity_score, float): self.similarity_score = float(self.similarity_score) if self.similarity_measure is not None and not isinstance(self.similarity_measure, str): self.similarity_measure = str(self.similarity_measure) if not isinstance(self.see_also, list): self.see_also = [self.see_also] if self.see_also is not None else [] self.see_also = [v if isinstance(v, NonRelativeURI) else NonRelativeURI(v) for v in self.see_also] if self.issue_tracker_item is not None and not isinstance(self.issue_tracker_item, EntityReference): self.issue_tracker_item = EntityReference(self.issue_tracker_item) if self.other is not None and not isinstance(self.other, str): self.other = str(self.other) if self.comment is not None and not isinstance(self.comment, str): self.comment = str(self.comment) super().__post_init__(**kwargs) @dataclass(repr=False) class MappingRegistry(YAMLRoot): """ A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["MappingRegistry"] class_class_curie: ClassVar[str] = "sssom:MappingRegistry" class_name: ClassVar[str] = "mapping registry" class_model_uri: ClassVar[URIRef] = SSSOM.MappingRegistry mapping_registry_id: Union[str, EntityReference] = None mapping_registry_title: Optional[str] = None mapping_registry_description: Optional[str] = None imports: Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]] = empty_list() mapping_set_references: Optional[Union[Union[dict, "MappingSetReference"], list[Union[dict, "MappingSetReference"]]]] = empty_list() documentation: Optional[Union[str, NonRelativeURI]] = None homepage: Optional[Union[str, NonRelativeURI]] = None issue_tracker: Optional[Union[str, NonRelativeURI]] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.mapping_registry_id): self.MissingRequiredField("mapping_registry_id") if not isinstance(self.mapping_registry_id, EntityReference): self.mapping_registry_id = EntityReference(self.mapping_registry_id) if self.mapping_registry_title is not None and not isinstance(self.mapping_registry_title, str): self.mapping_registry_title = str(self.mapping_registry_title) if self.mapping_registry_description is not None and not isinstance(self.mapping_registry_description, str): self.mapping_registry_description = str(self.mapping_registry_description) if not isinstance(self.imports, list): self.imports = [self.imports] if self.imports is not None else [] self.imports = [v if isinstance(v, NonRelativeURI) else NonRelativeURI(v) for v in self.imports] self._normalize_inlined_as_list(slot_name="mapping_set_references", slot_type=MappingSetReference, key_name="mapping_set_id", keyed=False) if self.documentation is not None and not isinstance(self.documentation, NonRelativeURI): self.documentation = NonRelativeURI(self.documentation) if self.homepage is not None and not isinstance(self.homepage, NonRelativeURI): self.homepage = NonRelativeURI(self.homepage) if self.issue_tracker is not None and not isinstance(self.issue_tracker, NonRelativeURI): self.issue_tracker = NonRelativeURI(self.issue_tracker) super().__post_init__(**kwargs) @dataclass(repr=False) class MappingSetReference(YAMLRoot): """ A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["MappingSetReference"] class_class_curie: ClassVar[str] = "sssom:MappingSetReference" class_name: ClassVar[str] = "mapping set reference" class_model_uri: ClassVar[URIRef] = SSSOM.MappingSetReference mapping_set_id: Union[str, NonRelativeURI] = None mirror_from: Optional[Union[str, NonRelativeURI]] = None registry_confidence: Optional[float] = None mapping_set_group: Optional[str] = None last_updated: Optional[Union[str, XSDDate]] = None local_name: Optional[str] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.mapping_set_id): self.MissingRequiredField("mapping_set_id") if not isinstance(self.mapping_set_id, NonRelativeURI): self.mapping_set_id = NonRelativeURI(self.mapping_set_id) if self.mirror_from is not None and not isinstance(self.mirror_from, NonRelativeURI): self.mirror_from = NonRelativeURI(self.mirror_from) if self.registry_confidence is not None and not isinstance(self.registry_confidence, float): self.registry_confidence = float(self.registry_confidence) if self.mapping_set_group is not None and not isinstance(self.mapping_set_group, str): self.mapping_set_group = str(self.mapping_set_group) if self.last_updated is not None and not isinstance(self.last_updated, XSDDate): self.last_updated = XSDDate(self.last_updated) if self.local_name is not None and not isinstance(self.local_name, str): self.local_name = str(self.local_name) super().__post_init__(**kwargs) @dataclass(repr=False) class Prefix(YAMLRoot): _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["Prefix"] class_class_curie: ClassVar[str] = "sssom:Prefix" class_name: ClassVar[str] = "prefix" class_model_uri: ClassVar[URIRef] = SSSOM.Prefix prefix_name: Union[str, PrefixPrefixName] = None prefix_url: Optional[Union[str, URI]] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.prefix_name): self.MissingRequiredField("prefix_name") if not isinstance(self.prefix_name, PrefixPrefixName): self.prefix_name = PrefixPrefixName(self.prefix_name) if self.prefix_url is not None and not isinstance(self.prefix_url, URI): self.prefix_url = URI(self.prefix_url) super().__post_init__(**kwargs) @dataclass(repr=False) class ExtensionDefinition(YAMLRoot): """ A definition of an extension (non-standard) slot. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["ExtensionDefinition"] class_class_curie: ClassVar[str] = "sssom:ExtensionDefinition" class_name: ClassVar[str] = "extension definition" class_model_uri: ClassVar[URIRef] = SSSOM.ExtensionDefinition slot_name: Union[str, NCName] = None property: Optional[Union[str, URIorCURIE]] = None type_hint: Optional[Union[str, URIorCURIE]] = None def __post_init__(self, *_: str, **kwargs: Any): if self._is_empty(self.slot_name): self.MissingRequiredField("slot_name") if not isinstance(self.slot_name, NCName): self.slot_name = NCName(self.slot_name) if self.property is not None and not isinstance(self.property, URIorCURIE): self.property = URIorCURIE(self.property) if self.type_hint is not None and not isinstance(self.type_hint, URIorCURIE): self.type_hint = URIorCURIE(self.type_hint) super().__post_init__(**kwargs) @dataclass(repr=False) class Propagatable(YAMLRoot): """ Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["Propagatable"] class_class_curie: ClassVar[str] = "sssom:Propagatable" class_name: ClassVar[str] = "Propagatable" class_model_uri: ClassVar[URIRef] = SSSOM.Propagatable propagated: Optional[Union[bool, Bool]] = None def __post_init__(self, *_: str, **kwargs: Any): if self.propagated is not None and not isinstance(self.propagated, Bool): self.propagated = Bool(self.propagated) super().__post_init__(**kwargs) @dataclass(repr=False) class Versionable(YAMLRoot): """ Metamodel extension class to manage slots that may not exist in all versions of the model. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["Versionable"] class_class_curie: ClassVar[str] = "sssom:Versionable" class_name: ClassVar[str] = "Versionable" class_model_uri: ClassVar[URIRef] = SSSOM.Versionable added_in: Optional[Union[str, "SssomVersionEnum"]] = None def __post_init__(self, *_: str, **kwargs: Any): if self.added_in is not None and not isinstance(self.added_in, SssomVersionEnum): self.added_in = SssomVersionEnum(self.added_in) super().__post_init__(**kwargs) class NoTermFound(YAMLRoot): """ sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found. """ _inherited_slots: ClassVar[list[str]] = [] class_class_uri: ClassVar[URIRef] = SSSOM["NoTermFound"] class_class_curie: ClassVar[str] = "sssom:NoTermFound" class_name: ClassVar[str] = "NoTermFound" class_model_uri: ClassVar[URIRef] = SSSOM.NoTermFound # Enumerations class SssomVersionEnum(EnumDefinitionImpl): _defn = EnumDefinition( name="SssomVersionEnum", ) @classmethod def _addvals(cls): setattr(cls, "1.0", PermissibleValue( text="1.0", description="SSSOM specification version 1.0", meaning=SSSOM["version1.0"])) setattr(cls, "1.1", PermissibleValue( text="1.1", description="SSSOM specification version 1.1", meaning=SSSOM["version1.1"])) class EntityTypeEnum(EnumDefinitionImpl): _defn = EnumDefinition( name="EntityTypeEnum", ) @classmethod def _addvals(cls): setattr(cls, "owl class", PermissibleValue( text="owl class", meaning=OWL["Class"])) setattr(cls, "owl object property", PermissibleValue( text="owl object property", meaning=OWL["ObjectProperty"])) setattr(cls, "owl data property", PermissibleValue( text="owl data property", meaning=OWL["DataProperty"])) setattr(cls, "owl annotation property", PermissibleValue( text="owl annotation property", meaning=OWL["AnnotationProperty"])) setattr(cls, "owl named individual", PermissibleValue( text="owl named individual", meaning=OWL["NamedIndividual"])) setattr(cls, "skos concept", PermissibleValue( text="skos concept", meaning=SKOS["Concept"])) setattr(cls, "rdfs resource", PermissibleValue( text="rdfs resource", meaning=RDFS["Resource"])) setattr(cls, "rdfs class", PermissibleValue( text="rdfs class", meaning=RDFS["Class"])) setattr(cls, "rdfs literal", PermissibleValue( text="rdfs literal", description="""This value indicates that the entity being mapped is not a semantic entity with a distinct identifier, but is instead represented entirely by its literal label. This value MUST NOT be used in the predicate_type slot.""", meaning=RDFS["Literal"])) setattr(cls, "rdfs datatype", PermissibleValue( text="rdfs datatype", meaning=RDFS["Datatype"])) setattr(cls, "rdf property", PermissibleValue( text="rdf property", meaning=RDF["Property"])) setattr(cls, "composed entity expression", PermissibleValue( text="composed entity expression", description="""This value indicates that the entity ID does not represent a single entity, but a composite involving several individual entities. This value MUST NOT be used in the predicate_type slot. This specifications does not prescribe how an ID representing a composite entity should be interpreted; this is left at the discretion of applications.""", meaning=SSSOM["ComposedEntityExpression"])) class PredicateModifierEnum(EnumDefinitionImpl): Not = PermissibleValue( text="Not", description="""Negating the mapping predicate. The meaning of the triple becomes subject_id is not a predicate_id match to object_id.""", meaning=SSSOM["NegatedPredicate"]) _defn = EnumDefinition( name="PredicateModifierEnum", ) class MappingCardinalityEnum(EnumDefinitionImpl): _defn = EnumDefinition( name="MappingCardinalityEnum", ) @classmethod def _addvals(cls): setattr(cls, "1:1", PermissibleValue( text="1:1", description="""Indicates the mapping record is about a one-to-one mapping, that is, the subject and the object are only mapped to each other, exclusive of any other subject or object.""")) setattr(cls, "1:n", PermissibleValue( text="1:n", description="""Indicates the mapping record is about a one-to-many mapping, that is, the same subject is mapped to several different objects.""")) setattr(cls, "n:1", PermissibleValue( text="n:1", description="""Indicates the mapping record is about a many-to-one mapping, that is, several different subjects are mapped to the same object.""")) setattr(cls, "n:n", PermissibleValue( text="n:n", description="""Indicates the mapping record is about a many-to-many mapping, that is, the subject is mapped to several different objects and the object is mapped to several different subjects.""")) setattr(cls, "1:0", PermissibleValue( text="1:0", description="""Indicates that the subject has no match in the object vocabulary. This value MUST only be used when the object_id is sssom:NoTermFound.""")) setattr(cls, "0:1", PermissibleValue( text="0:1", description="""Indicates that the object has no match in the subject vocabulary. This value MUST only be used when the subject_id is sssom:NoTermFound.""")) setattr(cls, "0:0", PermissibleValue( text="0:0", description="""Indicates that there is no match between the subject vocabulary and the object vocabulary. This value MUST only be used when both the subject_id and the object_id are sssom:NoTermFound.""")) # Slots class slots: pass slots.prefix_name = Slot(uri=SSSOM.prefix_name, name="prefix_name", curie=SSSOM.curie('prefix_name'), model_uri=SSSOM.prefix_name, domain=None, range=URIRef) slots.prefix_url = Slot(uri=SSSOM.prefix_url, name="prefix_url", curie=SSSOM.curie('prefix_url'), model_uri=SSSOM.prefix_url, domain=None, range=Optional[Union[str, URI]]) slots.sssom_version = Slot(uri=SSSOM.sssom_version, name="sssom_version", curie=SSSOM.curie('sssom_version'), model_uri=SSSOM.sssom_version, domain=None, range=Optional[Union[str, "SssomVersionEnum"]]) slots.curie_map = Slot(uri=SSSOM.curie_map, name="curie_map", curie=SSSOM.curie('curie_map'), model_uri=SSSOM.curie_map, domain=None, range=Optional[Union[dict[Union[str, PrefixPrefixName], Union[dict, Prefix]], list[Union[dict, Prefix]]]]) slots.mirror_from = Slot(uri=SSSOM.mirror_from, name="mirror_from", curie=SSSOM.curie('mirror_from'), model_uri=SSSOM.mirror_from, domain=None, range=Optional[Union[str, NonRelativeURI]]) slots.registry_confidence = Slot(uri=SSSOM.registry_confidence, name="registry_confidence", curie=SSSOM.curie('registry_confidence'), model_uri=SSSOM.registry_confidence, domain=None, range=Optional[float]) slots.last_updated = Slot(uri=SSSOM.last_updated, name="last_updated", curie=SSSOM.curie('last_updated'), model_uri=SSSOM.last_updated, domain=None, range=Optional[Union[str, XSDDate]]) slots.local_name = Slot(uri=SSSOM.local_name, name="local_name", curie=SSSOM.curie('local_name'), model_uri=SSSOM.local_name, domain=None, range=Optional[str]) slots.mapping_set_references = Slot(uri=SSSOM.mapping_set_references, name="mapping_set_references", curie=SSSOM.curie('mapping_set_references'), model_uri=SSSOM.mapping_set_references, domain=None, range=Optional[Union[Union[dict, MappingSetReference], list[Union[dict, MappingSetReference]]]]) slots.mapping_registry_id = Slot(uri=SSSOM.mapping_registry_id, name="mapping_registry_id", curie=SSSOM.curie('mapping_registry_id'), model_uri=SSSOM.mapping_registry_id, domain=None, range=Union[str, EntityReference]) slots.mapping_registry_title = Slot(uri=SSSOM.mapping_registry_title, name="mapping_registry_title", curie=SSSOM.curie('mapping_registry_title'), model_uri=SSSOM.mapping_registry_title, domain=None, range=Optional[str]) slots.mapping_registry_description = Slot(uri=SSSOM.mapping_registry_description, name="mapping_registry_description", curie=SSSOM.curie('mapping_registry_description'), model_uri=SSSOM.mapping_registry_description, domain=None, range=Optional[str]) slots.imports = Slot(uri=SSSOM.imports, name="imports", curie=SSSOM.curie('imports'), model_uri=SSSOM.imports, domain=None, range=Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]]) slots.documentation = Slot(uri=SSSOM.documentation, name="documentation", curie=SSSOM.curie('documentation'), model_uri=SSSOM.documentation, domain=None, range=Optional[Union[str, NonRelativeURI]]) slots.homepage = Slot(uri=SSSOM.homepage, name="homepage", curie=SSSOM.curie('homepage'), model_uri=SSSOM.homepage, domain=None, range=Optional[Union[str, NonRelativeURI]]) slots.mappings = Slot(uri=SSSOM.mappings, name="mappings", curie=SSSOM.curie('mappings'), model_uri=SSSOM.mappings, domain=None, range=Optional[Union[Union[dict, Mapping], list[Union[dict, Mapping]]]]) slots.subject_id = Slot(uri=OWL.annotatedSource, name="subject_id", curie=OWL.curie('annotatedSource'), model_uri=SSSOM.subject_id, domain=None, range=Optional[Union[str, EntityReference]]) slots.subject_label = Slot(uri=SSSOM.subject_label, name="subject_label", curie=SSSOM.curie('subject_label'), model_uri=SSSOM.subject_label, domain=None, range=Optional[str]) slots.subject_category = Slot(uri=SSSOM.subject_category, name="subject_category", curie=SSSOM.curie('subject_category'), model_uri=SSSOM.subject_category, domain=None, range=Optional[str]) slots.subject_type = Slot(uri=SSSOM.subject_type, name="subject_type", curie=SSSOM.curie('subject_type'), model_uri=SSSOM.subject_type, domain=None, range=Optional[Union[str, "EntityTypeEnum"]]) slots.predicate_id = Slot(uri=OWL.annotatedProperty, name="predicate_id", curie=OWL.curie('annotatedProperty'), model_uri=SSSOM.predicate_id, domain=None, range=Union[str, EntityReference]) slots.predicate_modifier = Slot(uri=SSSOM.predicate_modifier, name="predicate_modifier", curie=SSSOM.curie('predicate_modifier'), model_uri=SSSOM.predicate_modifier, domain=None, range=Optional[Union[str, "PredicateModifierEnum"]]) slots.predicate_label = Slot(uri=SSSOM.predicate_label, name="predicate_label", curie=SSSOM.curie('predicate_label'), model_uri=SSSOM.predicate_label, domain=None, range=Optional[str]) slots.predicate_type = Slot(uri=SSSOM.predicate_type, name="predicate_type", curie=SSSOM.curie('predicate_type'), model_uri=SSSOM.predicate_type, domain=None, range=Optional[Union[str, "EntityTypeEnum"]]) slots.object_id = Slot(uri=OWL.annotatedTarget, name="object_id", curie=OWL.curie('annotatedTarget'), model_uri=SSSOM.object_id, domain=None, range=Optional[Union[str, EntityReference]]) slots.object_label = Slot(uri=SSSOM.object_label, name="object_label", curie=SSSOM.curie('object_label'), model_uri=SSSOM.object_label, domain=None, range=Optional[str]) slots.object_category = Slot(uri=SSSOM.object_category, name="object_category", curie=SSSOM.curie('object_category'), model_uri=SSSOM.object_category, domain=None, range=Optional[str]) slots.mapping_justification = Slot(uri=SSSOM.mapping_justification, name="mapping_justification", curie=SSSOM.curie('mapping_justification'), model_uri=SSSOM.mapping_justification, domain=None, range=Union[str, EntityReference], pattern=re.compile(r'^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining|MappingInversion|StructuralMatching|InstanceBasedMatching|BackgroundKnowledgeBasedMatching)$')) slots.object_type = Slot(uri=SSSOM.object_type, name="object_type", curie=SSSOM.curie('object_type'), model_uri=SSSOM.object_type, domain=None, range=Optional[Union[str, "EntityTypeEnum"]]) slots.mapping_set_id = Slot(uri=SSSOM.mapping_set_id, name="mapping_set_id", curie=SSSOM.curie('mapping_set_id'), model_uri=SSSOM.mapping_set_id, domain=None, range=Union[str, NonRelativeURI]) slots.mapping_set_version = Slot(uri=OWL.versionInfo, name="mapping_set_version", curie=OWL.curie('versionInfo'), model_uri=SSSOM.mapping_set_version, domain=None, range=Optional[str]) slots.mapping_set_group = Slot(uri=SSSOM.mapping_set_group, name="mapping_set_group", curie=SSSOM.curie('mapping_set_group'), model_uri=SSSOM.mapping_set_group, domain=None, range=Optional[str]) slots.mapping_set_title = Slot(uri=DCTERMS.title, name="mapping_set_title", curie=DCTERMS.curie('title'), model_uri=SSSOM.mapping_set_title, domain=None, range=Optional[str]) slots.mapping_set_description = Slot(uri=DCTERMS.description, name="mapping_set_description", curie=DCTERMS.curie('description'), model_uri=SSSOM.mapping_set_description, domain=None, range=Optional[str]) slots.mapping_set_confidence = Slot(uri=SSSOM.mapping_set_confidence, name="mapping_set_confidence", curie=SSSOM.curie('mapping_set_confidence'), model_uri=SSSOM.mapping_set_confidence, domain=None, range=Optional[float]) slots.creator_id = Slot(uri=DCTERMS.creator, name="creator_id", curie=DCTERMS.curie('creator'), model_uri=SSSOM.creator_id, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.creator_label = Slot(uri=SSSOM.creator_label, name="creator_label", curie=SSSOM.curie('creator_label'), model_uri=SSSOM.creator_label, domain=None, range=Optional[Union[str, list[str]]]) slots.author_id = Slot(uri=PAV.authoredBy, name="author_id", curie=PAV.curie('authoredBy'), model_uri=SSSOM.author_id, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.author_label = Slot(uri=SSSOM.author_label, name="author_label", curie=SSSOM.curie('author_label'), model_uri=SSSOM.author_label, domain=None, range=Optional[Union[str, list[str]]]) slots.reviewer_id = Slot(uri=SSSOM.reviewer_id, name="reviewer_id", curie=SSSOM.curie('reviewer_id'), model_uri=SSSOM.reviewer_id, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.reviewer_label = Slot(uri=SSSOM.reviewer_label, name="reviewer_label", curie=SSSOM.curie('reviewer_label'), model_uri=SSSOM.reviewer_label, domain=None, range=Optional[Union[str, list[str]]]) slots.license = Slot(uri=DCTERMS.license, name="license", curie=DCTERMS.curie('license'), model_uri=SSSOM.license, domain=None, range=Optional[Union[str, NonRelativeURI]]) slots.subject_source = Slot(uri=SSSOM.subject_source, name="subject_source", curie=SSSOM.curie('subject_source'), model_uri=SSSOM.subject_source, domain=None, range=Optional[Union[str, EntityReference]]) slots.subject_source_version = Slot(uri=SSSOM.subject_source_version, name="subject_source_version", curie=SSSOM.curie('subject_source_version'), model_uri=SSSOM.subject_source_version, domain=None, range=Optional[str]) slots.object_source = Slot(uri=SSSOM.object_source, name="object_source", curie=SSSOM.curie('object_source'), model_uri=SSSOM.object_source, domain=None, range=Optional[Union[str, EntityReference]]) slots.object_source_version = Slot(uri=SSSOM.object_source_version, name="object_source_version", curie=SSSOM.curie('object_source_version'), model_uri=SSSOM.object_source_version, domain=None, range=Optional[str]) slots.mapping_provider = Slot(uri=SSSOM.mapping_provider, name="mapping_provider", curie=SSSOM.curie('mapping_provider'), model_uri=SSSOM.mapping_provider, domain=None, range=Optional[Union[str, NonRelativeURI]]) slots.mapping_set_source = Slot(uri=PROV.wasDerivedFrom, name="mapping_set_source", curie=PROV.curie('wasDerivedFrom'), model_uri=SSSOM.mapping_set_source, domain=None, range=Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]]) slots.mapping_source = Slot(uri=SSSOM.mapping_source, name="mapping_source", curie=SSSOM.curie('mapping_source'), model_uri=SSSOM.mapping_source, domain=None, range=Optional[Union[str, EntityReference]]) slots.mapping_cardinality = Slot(uri=SSSOM.mapping_cardinality, name="mapping_cardinality", curie=SSSOM.curie('mapping_cardinality'), model_uri=SSSOM.mapping_cardinality, domain=None, range=Optional[Union[str, "MappingCardinalityEnum"]]) slots.cardinality_scope = Slot(uri=SSSOM.cardinality_scope, name="cardinality_scope", curie=SSSOM.curie('cardinality_scope'), model_uri=SSSOM.cardinality_scope, domain=None, range=Optional[Union[str, list[str]]]) slots.mapping_tool = Slot(uri=SSSOM.mapping_tool, name="mapping_tool", curie=SSSOM.curie('mapping_tool'), model_uri=SSSOM.mapping_tool, domain=None, range=Optional[str]) slots.mapping_tool_id = Slot(uri=SSSOM.mapping_tool_id, name="mapping_tool_id", curie=SSSOM.curie('mapping_tool_id'), model_uri=SSSOM.mapping_tool_id, domain=None, range=Optional[Union[str, EntityReference]]) slots.mapping_tool_version = Slot(uri=SSSOM.mapping_tool_version, name="mapping_tool_version", curie=SSSOM.curie('mapping_tool_version'), model_uri=SSSOM.mapping_tool_version, domain=None, range=Optional[str]) slots.mapping_date = Slot(uri=DCTERMS.created, name="mapping_date", curie=DCTERMS.curie('created'), model_uri=SSSOM.mapping_date, domain=None, range=Optional[Union[str, XSDDate]]) slots.publication_date = Slot(uri=DCTERMS.issued, name="publication_date", curie=DCTERMS.curie('issued'), model_uri=SSSOM.publication_date, domain=None, range=Optional[Union[str, XSDDate]]) slots.review_date = Slot(uri=SSSOM.review_date, name="review_date", curie=SSSOM.curie('review_date'), model_uri=SSSOM.review_date, domain=None, range=Optional[Union[str, XSDDate]]) slots.confidence = Slot(uri=SSSOM.confidence, name="confidence", curie=SSSOM.curie('confidence'), model_uri=SSSOM.confidence, domain=None, range=Optional[float]) slots.reviewer_agreement = Slot(uri=SSSOM.reviewer_agreement, name="reviewer_agreement", curie=SSSOM.curie('reviewer_agreement'), model_uri=SSSOM.reviewer_agreement, domain=None, range=Optional[float]) slots.subject_match_field = Slot(uri=SSSOM.subject_match_field, name="subject_match_field", curie=SSSOM.curie('subject_match_field'), model_uri=SSSOM.subject_match_field, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.object_match_field = Slot(uri=SSSOM.object_match_field, name="object_match_field", curie=SSSOM.curie('object_match_field'), model_uri=SSSOM.object_match_field, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.match_string = Slot(uri=SSSOM.match_string, name="match_string", curie=SSSOM.curie('match_string'), model_uri=SSSOM.match_string, domain=None, range=Optional[Union[str, list[str]]]) slots.subject_preprocessing = Slot(uri=SSSOM.subject_preprocessing, name="subject_preprocessing", curie=SSSOM.curie('subject_preprocessing'), model_uri=SSSOM.subject_preprocessing, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.object_preprocessing = Slot(uri=SSSOM.object_preprocessing, name="object_preprocessing", curie=SSSOM.curie('object_preprocessing'), model_uri=SSSOM.object_preprocessing, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.curation_rule = Slot(uri=SSSOM.curation_rule, name="curation_rule", curie=SSSOM.curie('curation_rule'), model_uri=SSSOM.curation_rule, domain=None, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.curation_rule_text = Slot(uri=SSSOM.curation_rule_text, name="curation_rule_text", curie=SSSOM.curie('curation_rule_text'), model_uri=SSSOM.curation_rule_text, domain=None, range=Optional[Union[str, list[str]]]) slots.similarity_score = Slot(uri=SSSOM.similarity_score, name="similarity_score", curie=SSSOM.curie('similarity_score'), model_uri=SSSOM.similarity_score, domain=None, range=Optional[float]) slots.similarity_measure = Slot(uri=SSSOM.similarity_measure, name="similarity_measure", curie=SSSOM.curie('similarity_measure'), model_uri=SSSOM.similarity_measure, domain=None, range=Optional[str]) slots.issue_tracker_item = Slot(uri=SSSOM.issue_tracker_item, name="issue_tracker_item", curie=SSSOM.curie('issue_tracker_item'), model_uri=SSSOM.issue_tracker_item, domain=None, range=Optional[Union[str, EntityReference]]) slots.issue_tracker = Slot(uri=SSSOM.issue_tracker, name="issue_tracker", curie=SSSOM.curie('issue_tracker'), model_uri=SSSOM.issue_tracker, domain=None, range=Optional[Union[str, NonRelativeURI]]) slots.see_also = Slot(uri=RDFS.seeAlso, name="see_also", curie=RDFS.curie('seeAlso'), model_uri=SSSOM.see_also, domain=None, range=Optional[Union[Union[str, NonRelativeURI], list[Union[str, NonRelativeURI]]]]) slots.other = Slot(uri=SSSOM.other, name="other", curie=SSSOM.curie('other'), model_uri=SSSOM.other, domain=None, range=Optional[str]) slots.comment = Slot(uri=RDFS.comment, name="comment", curie=RDFS.curie('comment'), model_uri=SSSOM.comment, domain=None, range=Optional[str]) slots.extension_definitions = Slot(uri=SSSOM.extension_definitions, name="extension_definitions", curie=SSSOM.curie('extension_definitions'), model_uri=SSSOM.extension_definitions, domain=None, range=Optional[Union[Union[dict, ExtensionDefinition], list[Union[dict, ExtensionDefinition]]]]) slots.record_id = Slot(uri=SSSOM.record_id, name="record_id", curie=SSSOM.curie('record_id'), model_uri=SSSOM.record_id, domain=None, range=Optional[Union[str, EntityReference]]) slots.extensionDefinition__slot_name = Slot(uri=SSSOM.slot_name, name="extensionDefinition__slot_name", curie=SSSOM.curie('slot_name'), model_uri=SSSOM.extensionDefinition__slot_name, domain=None, range=Union[str, NCName]) slots.extensionDefinition__property = Slot(uri=SSSOM.property, name="extensionDefinition__property", curie=SSSOM.curie('property'), model_uri=SSSOM.extensionDefinition__property, domain=None, range=Optional[Union[str, URIorCURIE]]) slots.extensionDefinition__type_hint = Slot(uri=SSSOM.type_hint, name="extensionDefinition__type_hint", curie=SSSOM.curie('type_hint'), model_uri=SSSOM.extensionDefinition__type_hint, domain=None, range=Optional[Union[str, URIorCURIE]]) slots.propagatable__propagated = Slot(uri=SSSOM.propagated, name="propagatable__propagated", curie=SSSOM.curie('propagated'), model_uri=SSSOM.propagatable__propagated, domain=None, range=Optional[Union[bool, Bool]]) slots.versionable__added_in = Slot(uri=SSSOM.added_in, name="versionable__added_in", curie=SSSOM.curie('added_in'), model_uri=SSSOM.versionable__added_in, domain=None, range=Optional[Union[str, "SssomVersionEnum"]]) slots.mapping_set_license = Slot(uri=DCTERMS.license, name="mapping set_license", curie=DCTERMS.curie('license'), model_uri=SSSOM.mapping_set_license, domain=MappingSet, range=Union[str, NonRelativeURI]) slots.mapping_set_similarity_measure = Slot(uri=SSSOM.similarity_measure, name="mapping set_similarity_measure", curie=SSSOM.curie('similarity_measure'), model_uri=SSSOM.mapping_set_similarity_measure, domain=MappingSet, range=Optional[str]) slots.mapping_set_curation_rule = Slot(uri=SSSOM.curation_rule, name="mapping set_curation_rule", curie=SSSOM.curie('curation_rule'), model_uri=SSSOM.mapping_set_curation_rule, domain=MappingSet, range=Optional[Union[Union[str, EntityReference], list[Union[str, EntityReference]]]]) slots.mapping_set_curation_rule_text = Slot(uri=SSSOM.curation_rule_text, name="mapping set_curation_rule_text", curie=SSSOM.curie('curation_rule_text'), model_uri=SSSOM.mapping_set_curation_rule_text, domain=MappingSet, range=Optional[Union[str, list[str]]]) ================================================ FILE: src/sssom_schema/schema/sssom_schema.yaml ================================================ id: https://w3id.org/sssom/schema/ name: sssom description: Datamodel for Simple Standard for Sharing Ontological Mappings (SSSOM) imports: - linkml:types prefixes: dcterms: http://purl.org/dc/terms/ linkml: https://w3id.org/linkml/ sssom: https://w3id.org/sssom/ rdfs: http://www.w3.org/2000/01/rdf-schema# rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# oboInOwl: http://www.geneontology.org/formats/oboInOwl# pav: http://purl.org/pav/ prov: http://www.w3.org/ns/prov# skos: http://www.w3.org/2004/02/skos/core# xsd: http://www.w3.org/2001/XMLSchema# semapv: https://w3id.org/semapv/vocab/ see_also: - https://github.com/mapping-commons/sssom - https://mapping-commons.github.io/sssom/home/ default_curi_maps: - semweb_context - obo_context default_prefix: sssom default_range: string enums: sssom_version_enum: permissible_values: "1.0": meaning: sssom:version1.0 description: SSSOM specification version 1.0 "1.1": meaning: sssom:version1.1 description: SSSOM specification version 1.1 entity_type_enum: permissible_values: owl class: meaning: owl:Class owl object property: meaning: owl:ObjectProperty owl data property: meaning: owl:DataProperty owl annotation property: meaning: owl:AnnotationProperty owl named individual: meaning: owl:NamedIndividual skos concept: meaning: skos:Concept rdfs resource: meaning: rdfs:Resource rdfs class: meaning: rdfs:Class rdfs literal: meaning: rdfs:Literal description: This value indicates that the entity being mapped is not a semantic entity with a distinct identifier, but is instead represented entirely by its literal label. This value MUST NOT be used in the predicate_type slot. see_also: - https://mapping-commons.github.io/sssom/spec-model/#literal-mappings - https://github.com/mapping-commons/sssom/issues/234 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/literals.sssom.tsv rdfs datatype: meaning: rdfs:Datatype rdf property: meaning: rdf:Property composed entity expression: meaning: sssom:ComposedEntityExpression description: This value indicates that the entity ID does not represent a single entity, but a composite involving several individual entities. This value MUST NOT be used in the predicate_type slot. This specifications does not prescribe how an ID representing a composite entity should be interpreted; this is left at the discretion of applications. see_also: - https://github.com/mapping-commons/sssom/issues/402 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/composite-entities.sssom.tsv predicate_modifier_enum: permissible_values: Not: description: Negating the mapping predicate. The meaning of the triple becomes subject_id is not a predicate_id match to object_id. meaning: sssom:NegatedPredicate mapping_cardinality_enum: permissible_values: "1:1": description: Indicates the mapping record is about a one-to-one mapping, that is, the subject and the object are only mapped to each other, exclusive of any other subject or object. "1:n": description: Indicates the mapping record is about a one-to-many mapping, that is, the same subject is mapped to several different objects. "n:1": description: Indicates the mapping record is about a many-to-one mapping, that is, several different subjects are mapped to the same object. "n:n": description: Indicates the mapping record is about a many-to-many mapping, that is, the subject is mapped to several different objects and the object is mapped to several different subjects. "1:0": description: Indicates that the subject has no match in the object vocabulary. This value MUST only be used when the object_id is sssom:NoTermFound. "0:1": description: Indicates that the object has no match in the subject vocabulary. This value MUST only be used when the subject_id is sssom:NoTermFound. "0:0": description: Indicates that there is no match between the subject vocabulary and the object vocabulary. This value MUST only be used when both the subject_id and the object_id are sssom:NoTermFound. types: EntityReference: typeof: uriorcurie description: | A reference to an entity involved in the mapping. base: str uri: rdfs:Resource see_also: - https://mapping-commons.github.io/sssom/spec/#tsv NonRelativeURI: typeof: uri base: URI uri: xsd:anyURI description: >- A URI as per RFC 3986, that is a string that matches the production of the "URI" rule defined in Appendix A of that RFC. Contrary to the underlying LinkML type, this specifically excludes _relative URI references_, which do not start with a scheme component. Relative URI references are forbidden because SSSOM has no built-in mechanism to provide the base URI that would be needed to resolve relative URI references into non-relative ones. see_also: - https://github.com/mapping-commons/sssom/issues/448 examples: - value: https://example.org/path/to/file.txt#L4 description: A URI that is URL to a HTTP resource. - value: urn:oasis:names:tc:entity:xmlns:xml:catalog description: >- A URI that is the URN of the namespace for the OASIS XML Catalogs specification. - value: ldap://example.org/cn=Alice,dc=example,dc=org?mail description: A URI that is a LDAP query URL. - value: mailto:alice@example.org description: A URI that is an email address. - value: file.txt description: An _invalid_ example, as it a relative URI (path only, no scheme). - value: /path/to/file.txt description: >- An _invalid_ example; though it appears to be an _absolute path_, it is a _relative URI_ because of the absence of a scheme. - value: //example.org/path/to/file.txt description: >- An _invalid_ example; though it includes an authority component (example.org), it has no scheme and is therefore a _relative URI_. slots: prefix_name: key: true range: ncname prefix_url: range: uri sssom_version: description: The version of the SSSOM specification a mapping set is compliant with. range: sssom_version_enum instantiates: - sssom:Versionable annotations: added_in: "1.1" see_also: - https://github.com/mapping-commons/sssom/issues/439 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/version.sssom.tsv curie_map: description: A dictionary that contains prefixes as keys and their URI expansions as values. range: prefix multivalued: true inlined: true see_also: - https://github.com/mapping-commons/sssom/issues/225 - https://github.com/mapping-commons/sssom/pull/349 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curie_map.sssom.tsv mirror_from: description: A URL location from which to obtain a resource, such as a mapping set. range: NonRelativeURI registry_confidence: description: This value is set by the creator/maintainer of the mapping registry and reflects the confidence the mapping registry has in the correctness (i.e., precision) of mappings in the mapping set. When not explicitly specified, confidence estimation algorithms should consider the registry confidence in a mapping set to be 1.0 by default. range: double minimum_value: 0.0 maximum_value: 1.0 see_also: - https://mapping-commons.github.io/sssom/confidence-model examples: - value: "0.95" description: A confidence score of 0.95, indicating 95% confidence. last_updated: description: The date this reference was last updated. range: date local_name: description: The local name assigned to file that corresponds to the downloaded mapping set. range: string mapping_set_references: description: A list of mapping set references. range: mapping set reference multivalued: true recommended: true mapping_registry_id: description: The unique identifier of a mapping registry. range: EntityReference required: true mapping_registry_title: description: The title of a mapping registry. range: string mapping_registry_description: description: The description of a mapping registry. range: string imports: description: A list of registries that should be imported into this one. multivalued: true range: NonRelativeURI documentation: description: A URL to the documentation of this mapping commons. range: NonRelativeURI homepage: description: A URL to a homepage of this mapping commons. range: NonRelativeURI mappings: description: Contains a list of mapping objects. range: mapping multivalued: true inlined_as_list: true recommended: true subject_id: description: The ID of the subject of the mapping. range: EntityReference mappings: - owl:annotatedSource slot_uri: owl:annotatedSource examples: - value: HP:0009894 description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears' subject_label: description: The label of subject of the mapping. range: string examples: - value: Thickened ears recommended: true subject_category: description: The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases. range: string see_also: - https://github.com/mapping-commons/sssom/issues/13 - https://github.com/mapping-commons/sssom/issues/256 examples: - value: UBERON:0001062 description: The CURIE of the Uberon term for "anatomical entity". - value: anatomical entity description: A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID. - value: biolink:Gene description: The CURIE of the biolink class for genes. subject_type: description: The type of entity that is being mapped. range: entity_type_enum instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: owl:Class predicate_id: description: The ID of the predicate or relation that relates the subject and object of this match. mappings: - owl:annotatedProperty range: EntityReference required: true slot_uri: owl:annotatedProperty examples: - value: owl:sameAs description: The subject and the object are instances (owl individuals), and the two instances are the same. - value: owl:equivalentClass description: The subject and the object are classes (owl class), and the two classes are the same. - value: owl:equivalentProperty description: The subject and the object are properties (owl object, data, annotation properties), and the two properties are the same. - value: rdfs:subClassOf description: The subject and the object are classes (owl class), and the subject is a subclass of the object. - value: rdfs:subPropertyOf description: The subject and the object are properties (owl object, data, annotation properties), and the subject is a subproperty of the object. - value: skos:relatedMatch description: The subject and the object are associated in some unspecified way. - value: skos:closeMatch description: The subject and the object are sufficiently similar that they can be used interchangeably in some information retrieval applications. - value: skos:exactMatch description: The subject and the object can, with a high degree of confidence, be used interchangeably across a wide range of information retrieval applications. - value: skos:narrowMatch description: "From the SKOS primer: A triple skos:narrower (and skos:narrowMatch) asserts that , the object of the triple, is a narrower concept than , the subject of the triple." - value: skos:broadMatch description: "From the SKOS primer: A triple skos:broader (and skos:broadMatch) asserts that , the object of the triple, is a broader concept than , the subject of the triple." - value: oboInOwl:hasDbXref description: Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go). - value: rdfs:seeAlso description: The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information. predicate_modifier: description: A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion range: predicate_modifier_enum see_also: - https://github.com/mapping-commons/sssom/issues/107 examples: - value: Not description: Negates the predicate, see documentation of predicate_modifier_enum predicate_label: description: The label of the predicate/relation of the mapping. range: string examples: - value: has cross-reference description: The label of the oboInOwl:hasDbXref property to represent cross-references. predicate_type: description: The type of the predicate used to map the subject and object entities. range: entity_type_enum see_also: - https://github.com/mapping-commons/sssom/issues/143 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/predicate-types.sssom.tsv instantiates: - sssom:Propagatable - sssom:Versionable annotations: propagated: true added_in: "1.1" examples: - value: owl:AnnotationProperty - value: owl:ObjectProperty object_id: description: The ID of the object of the mapping. mappings: - owl:annotatedTarget range: EntityReference slot_uri: owl:annotatedTarget examples: - value: HP:0009894 description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears'. object_label: description: The label of object of the mapping. range: string examples: - value: Thickened ears recommended: true object_category: description: The conceptual category to which the subject belongs to. This can be a string denoting the category or a term from a controlled vocabulary. This slot is deliberately underspecified. Conceptual categories can range from those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this optional field is documentation for human reviewers - when a category is known and documented clearly, the cost of interpreting and evaluating the mapping decreases. range: string see_also: - https://github.com/mapping-commons/sssom/issues/13 - https://github.com/mapping-commons/sssom/issues/256 examples: - value: UBERON:0001062 description: The CURIE of the Uberon term for "anatomical entity". - value: anatomical entity description: A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID. - value: biolink:Gene description: The CURIE of the biolink class for genes. mapping_justification: description: A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable. range: EntityReference pattern: "^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining|MappingInversion|StructuralMatching|InstanceBasedMatching|BackgroundKnowledgeBasedMatching)$" required: true any_of: - equals_string: semapv:LexicalMatching - equals_string: semapv:LogicalReasoning - equals_string: semapv:CompositeMatching - equals_string: semapv:UnspecifiedMatching - equals_string: semapv:SemanticSimilarityThresholdMatching - equals_string: semapv:LexicalSimilarityThresholdMatching - equals_string: semapv:MappingChaining - equals_string: semapv:MappingReview - equals_string: semapv:ManualMappingCuration - equals_string: semapv:MappingInversion - equals_string: semapv:StructuralMatching - equals_string: semapv:InstanceBasedMatching - equals_string: semapv:BackgroundKnowledgeBasedMatching examples: - value: semapv:LexicalMatching - value: semapv:ManualMappingCuration see_also: - https://mapping-commons.github.io/semantic-mapping-vocabulary/ - https://www.ebi.ac.uk/ols4/ontologies/semapv object_type: description: The type of entity that is being mapped. range: entity_type_enum instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: owl:Class mapping_set_id: description: A globally unique identifier for the mapping set (not each individual mapping). Should ideally be resolvable. required: true range: NonRelativeURI examples: - value: http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv description: A persistent URI pointing to the latest version of the Mondo - NCIT mapping in the Mondo namespace. mapping_set_version: description: A version string for the mapping. range: string slot_uri: owl:versionInfo examples: - value: "2020-01-01" description: A date-based version that indicates that the mapping was published on the 1st January in 2021. - value: "1.2.1" description: "(A semantic version tag that indicates that this is the 1st major, 2nd minor version, patch 1 (https://semver.org/).)" mapping_set_group: description: Set by the owners of the mapping registry. A way to group related mapping sets for example for UI purposes. range: string mapping_set_title: description: The display name of a mapping set. range: string slot_uri: dcterms:title examples: - value: "The Mondo-OMIM mappings by Monarch Initiative." mapping_set_description: description: A description of the mapping set. range: string slot_uri: dcterms:description examples: - value: "This mapping set was produced to integrate human and mouse phenotype data at the IMPC. It is primarily used for making mouse phenotypes searchable by human synonyms at https://mousephenotype.org/." mapping_set_confidence: instantiates: - sssom:Versionable annotations: added_in: "1.1" description: Mapping-set level confidence is assigned by the creator of the mapping set to indicate their overall confidence in the correctness (i.e., precision) of mappings in the mapping set. Mapping set confidence is intended to be used in cases were the creator wants to express an overall confidence into the agent that curated the individual mappings, for example a lexical matching tool, or a group of students. When not explicitly specified, confidence estimation algorithms should consider the mapping set confidence to be 1.0 by default. range: double minimum_value: 0.0 maximum_value: 1.0 examples: - value: "0.95" description: A confidence score of 0.95, indicating 95% confidence that the mappings in the mapping set are correct. see_also: - https://mapping-commons.github.io/sssom/confidence-model - https://github.com/mapping-commons/sssom/issues/438 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/mapping_set_confidence.sssom.tsv creator_id: description: Identifies the persons or groups responsible for the creation of the mapping. The creator is the agent that put the mapping in its published form, which may be different from the author, which is a person that was actively involved in the assertion of the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs. slot_uri: dcterms:creator range: EntityReference multivalued: true examples: - value: orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165 description: >- The ORCID of the (multiple) creators of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `creator_id` are represented as single strings containing `|`-separated values. - value: orcid:0000-0002-7356-1779 description: The ORCID of the creator of the mapping. creator_label: description: >- A string representing the creator of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in creator_id) for that creator. It is not expected that there should be any link between creator_id and creator_label; in particular, creator_label is not intended to provide a human-friendly version of an identifier in creator_id. range: string multivalued: true examples: - value: Nicolas Matentzoglu|Chris Mungall description: >- The human-readable names of the (multiple) creators of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `creator_label` are represented as single strings containing `|`-separated values. - value: Nicolas Matentzoglu description: The human-readable name of the creator of the mapping. author_id: description: Identifies the persons or groups responsible for asserting the mappings. Recommended to be a list of ORCIDs or otherwise identifying URIs. slot_uri: pav:authoredBy range: EntityReference multivalued: true examples: - value: orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165 description: >- The ORCID of the (multiple) authors of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `author_id` are represented as single strings containing `|`-separated values. - value: orcid:0000-0002-7356-1779 description: The ORCID of the author of the mapping. author_label: description: >- A string representing the author of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in author_id) for that author. It is not expected that there should be any link between author_id and author_label; in particular, author_label is not intended to provide a human-friendly version of an identifier in author_id. range: string multivalued: true examples: - value: Nicolas Matentzoglu|Chris Mungall description: >- The human-readable names of the (multiple) authors of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `author_label` are represented as single strings containing `|`-separated values. - value: Nicolas Matentzoglu description: The human-readable name of the author of the mapping. reviewer_id: description: Identifies the persons or groups that reviewed and confirmed the mapping. Recommended to be a list of ORCIDs or otherwise identifying URIs. range: EntityReference multivalued: true examples: - value: orcid:0000-0002-7356-1779|orcid:0000-0002-6601-2165 description: >- The ORCID of the (multiple) reviewers of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `reviewer_id` are represented as single strings containing `|`-separated values. - value: orcid:0000-0002-7356-1779 description: The ORCID of the reviewer of the mapping. reviewer_label: description: >- A string representing the reviewer of this mapping. This should only be used in the absence of a proper semantic identifier (which would be stored in reviewer_id) for that reviewer. It is not expected that there should be any link between reviewer_id and reviewer_label; in particular, reviewer_label is not intended to provide a human-friendly version of an identifier in reviewer_id. range: string multivalued: true examples: - value: Nicolas Matentzoglu|Chris Mungall description: >- The human-readable names of the (multiple) reviewers of the mapping. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots such as `reviewer_label` are represented as single strings containing `|`-separated values. - value: Nicolas Matentzoglu description: The human-readable name of the reviewer of the mapping. license: description: A url to the license of the mapping. In absence of a license we assume no license. range: NonRelativeURI slot_uri: dcterms:license examples: - value: https://creativecommons.org/licenses/by/4.0/ description: The URI of the Creative Commons Attribution 4.0 International license. subject_source: description: URI of vocabulary or identifier source for the subject. range: EntityReference instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: obo:mondo.owl description: A persistent OBO CURIE pointing to the latest version of the Mondo ontology. - value: wikidata:Q7876491 description: A Wikidata identifier for the Uberon ontology resource. subject_source_version: description: Version IRI or version string of the source of the subject term. range: string instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl description: A persistent Version IRI pointing to the Mondo version '2021-01-30' object_source: description: URI of vocabulary or identifier source for the object. range: EntityReference instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: obo:mondo.owl description: A persistent OBO CURIE pointing to the latest version of the Mondo ontology. - value: wikidata:Q7876491 description: A Wikidata identifier for the Uberon ontology resource. object_source_version: description: Version IRI or version string of the source of the object term. range: string instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl description: A persistent Version IRI pointing to the Mondo version '2021-01-30' mapping_provider: description: URL pointing to the source that provided the mapping, for example an ontology that already contains the mappings, or a database from which it was derived. range: NonRelativeURI instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: https://www.ohdsi.org/ description: A URL pointing to the Observational Health Data Sciences and Informatics initiative. - value: https://monarchinitiative.org/ description: A URL pointing to the Monarch Initiative Resource. mapping_set_source: description: A mapping set or set of mapping set that was used to derive the mapping set. slot_uri: prov:wasDerivedFrom range: NonRelativeURI multivalued: true examples: - value: http://purl.obolibrary.org/obo/mondo/mappings/2022-05-20/mondo_exactmatch_ncit.sssom.tsv description: A persistent, ideally versioned, link to the mapping set from which the current mapping set is derived. mapping_source: description: The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple mapping sets or deriving one mapping set from another. range: EntityReference examples: - value: MONDO_MAPPINGS:mondo_exactmatch_ncit.sssom.tsv description: A reference to the mapping set that originally contained this mapping. mapping_cardinality: description: A value indicating whether the subject (respectively object) of this mapping record is present in other records involving a different object (respectively subject), within the subset of records defined by the cardinality_scope slot (or within the entire mapping set if cardinality_scope is undefined). Note that this is a convenience field, whose values can always be derived from the mapping set. range: mapping_cardinality_enum examples: - value: "1:1" description: A one-to-one mapping. There are no other records in which the same subject is mapped to a different object, and no other records in which the same object is mapped to a different subject. - value: "1:n" description: A one-to-many mapping. There are other records in which the same subject is mapped to at least one different object than the object present in this record; there are no other records in which the object is mapped to a different subject. see_also: - https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality.sssom.tsv - https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-with-unmapped-entities.sssom.tsv - https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-empty.sssom.tsv cardinality_scope: description: A list of mapping slots that define the scope for the value found in the mapping_cardinality slot. Mappings are considered to belong to the same scope if they have the same value for all slots listed in the scope. If no scope is defined, the default scope is empty, meaning that all mappings belong to a single scope that is identical to the entire mapping set. The behaviour if a value in the list does not correspond to a valid slot name is undefined. range: string multivalued: true instantiates: - sssom:Propagatable - sssom:Versionable annotations: propagated: true added_in: "1.1" examples: - value: predicate_id description: >- Indicates that mapping_cardinality is computed relatively to all mappings that have the same predicate. - value: "predicate_id|object_source" description: >- Indicates that mapping_cardinality is computed relatively to all mappings that have the same predicate and the same object source. Note that this is how the example would look like specifically in the SSSOM/TSV format, where multivalued slots like `cardinality_scope` are represented as a single string containing `|`-separated values. see_also: - https://github.com/mapping-commons/sssom/issues/467 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-predicate.sssom.tsv - https://github.com/mapping-commons/sssom/blob/master/examples/schema/cardinality-scope-predicate+object_source.sssom.tsv mapping_tool: description: A reference to the tool or algorithm that was used to generate the mapping. Should be a URL pointing to more info about it, but can be free text. Consider using the mapping_tool_id slot for a more standardised reference. range: string instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: https://github.com/AgreementMakerLight/AML-Project description: A URL pointing to the AgreementMakerLight project. - value: AgreementMakerLight description: A string (name) denoting the AgreementMakerLight project. mapping_tool_id: description: The ID (entity reference) of the tool or algorithm that was used to generate the mapping. range: EntityReference instantiates: - sssom:Propagatable - sssom:Versionable annotations: propagated: true added_in: "1.1" examples: - value: wikidata:Q58057366 description: A wikidata PURL identifying the AgreementMakerLight project. see_also: - https://github.com/mapping-commons/sssom/blob/master/examples/schema/mapping_tool_id.sssom.tsv - https://github.com/mapping-commons/sssom/issues/449 mapping_tool_version: description: Version string that denotes the version of the mapping tool used. range: string instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: v3.2 mapping_date: description: The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file. slot_uri: dcterms:created range: date instantiates: - sssom:Propagatable examples: - value: "2021-01-01" annotations: propagated: true publication_date: description: The date the mapping was published. This is different from the date the mapping was asserted. slot_uri: dcterms:issued range: date examples: - value: "2021-01-01" review_date: description: The date the mapping was reviewed. This is different from the date the mapping was asserted and published. If this field is used in a mapping, reviewer_id and/or reviewer_label MUST also be be set. range: date examples: - value: "2021-01-01" instantiates: - sssom:Versionable annotations: added_in: "1.1" see_also: - https://github.com/mapping-commons/sssom/issues/511 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/review_date.sssom.tsv confidence: description: A value assigned by the creator of the mapping to denote the creator's confidence or estimated probability that the mapping record is correct. A value of 1.0 means the creator has full confidence in the correctness of the mapping record, while a value of 0.0 means the creator is fully unsure whether the mapping record is correct or not. When not explicitly specified, confidence estimation algorithms should consider the mapping confidence to be 1.0 by default. range: double minimum_value: 0.0 maximum_value: 1.0 see_also: - https://mapping-commons.github.io/sssom/confidence-model examples: - value: "0.95" description: A confidence score of 0.95, indicating 95% confidence. reviewer_agreement: description: A value assigned by the reviewer of the mapping to denote their confidence that the mapping record is correct. A value of 1.0 means the reviewer fully agrees with the mapping record. A value of -1.0 means the reviewer fully disagrees with the mapping record. A value of 0.0 means the reviewer is not sure whether the mapping record is correct or not. When not explicitly specified, confidence estimation algorithms should consider the reviewer agreement to be 1.0 by default. range: double minimum_value: -1.0 maximum_value: 1.0 instantiates: - sssom:Versionable annotations: added_in: "1.1" see_also: - https://mapping-commons.github.io/sssom/confidence-model - https://github.com/mapping-commons/sssom/issues/510 - https://github.com/mapping-commons/sssom/pull/519 examples: - value: "1.0" description: A reviewer agreement of 1.0 denotes that the reviewer considers the mapping record to be correct with full confidence - value: "-1.0" description: A reviewer agreement of -1.0 denotes that the reviewer considers the mapping record to be incorrect with full confidence - value: "0.0" description: A reviewer agreement of 0.0 denotes that the reviewer is not sure whether the mapping record is correct or not. subject_match_field: description: >- A list of properties, annotations or attributes related to the subject that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section. range: EntityReference multivalued: true instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: rdfs:label description: "The RDFS label property (rdfs:label) was used to match the subject." - value: skos:prefLabel description: "The SKOS preferred label property (skos:prefLabel) was used to match the subject." see_also: - https://mapping-commons.github.io/sssom/mapping-justifications/#lexical-matching - https://github.com/mapping-commons/sssom/issues/413 object_match_field: description: >- A list of properties, annotations or attributes related to the object that was used to establish the match. This property is recommended for use in conjunction with mapping justifications related to lexical matching, such as `semapv:LexicalMatching`. For additional information see the 'See Also' section. range: EntityReference multivalued: true instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: rdfs:label description: "The RDFS label property (rdfs:label) was used to match the object." - value: skos:prefLabel description: "The SKOS preferred label property (skos:prefLabel) was used to match the object." see_also: - https://mapping-commons.github.io/sssom/mapping-justifications/#lexical-matching - https://github.com/mapping-commons/sssom/issues/413 match_string: description: String that is shared by subj/obj. It is recommended to indicate the fields for the match using the object and subject_match_field slots. range: string multivalued: true examples: - value: "gala" description: "The 'gala' string was matched for both subject and object." subject_preprocessing: description: Method of preprocessing applied to the fields of the subject. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows. range: EntityReference multivalued: true instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: semapv:Stemming - value: semapv:StopWordRemoval object_preprocessing: description: Method of preprocessing applied to the fields of the object. If different preprocessing steps were performed on different fields, it is recommended to store the match in separate rows. range: EntityReference multivalued: true instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: semapv:Stemming - value: semapv:StopWordRemoval curation_rule: description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule. range: EntityReference instantiates: - sssom:Propagatable annotations: propagated: true multivalued: true examples: - value: DISEASE_MAPPING_COMMONS_RULES:MPR2 description: A reference to the Disease Mapping Commons rule with the ID MPR2. see_also: - https://github.com/mapping-commons/sssom/issues/166 - https://github.com/mapping-commons/sssom/pull/258 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule.sssom.tsv - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule-propagated.sssom.tsv curation_rule_text: description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. The textual representation of curation rule is intended to be used in cases where the creation of a resource is not practical from the perspective of the mapping_provider. range: string instantiates: - sssom:Propagatable annotations: propagated: true multivalued: true examples: - value: "The two phenotypes inhere in homologous structures and exhibit the same phenotypic quality." - value: "The two diseases are used synonymous in the medical literature." see_also: - https://github.com/mapping-commons/sssom/issues/166 - https://github.com/mapping-commons/sssom/pull/258 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text.sssom.tsv - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text-propagated.sssom.tsv similarity_score: description: A score between 0 and 1 to denote the similarity between two entities, where 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction with the similarity_measure field, to document, for example, the lexical or semantic match of a matching algorithm. range: double minimum_value: 0.0 maximum_value: 1.0 examples: - value: "0.95" description: A similarity score of 0.95, indicating 95% similarity. see_also: - https://github.com/mapping-commons/sssom/issues/385 - https://github.com/mapping-commons/sssom/pull/386 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv similarity_measure: description: The measure used for computing a similarity score. This field is meant to be used in conjunction with the similarity_score field, to document, for example, the lexical or semantic match of a matching algorithm. To make processing this field as unambiguous as possible, we recommend using wikidata CURIEs, but the type of this field is deliberately unspecified. range: string instantiates: - sssom:Propagatable annotations: propagated: true examples: - value: https://www.wikidata.org/entity/Q865360 description: the Wikidata IRI for the Jaccard index measure). - value: wikidata:Q865360 description: the Wikidata CURIE for the Jaccard index measure). - value: Levenshtein distance description: a score to measure the distance between two character sequences). see_also: - https://github.com/mapping-commons/sssom/issues/385 - https://github.com/mapping-commons/sssom/pull/386 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv issue_tracker_item: description: The issue tracker item discussing this mapping. range: EntityReference examples: - value: SSSOM_GITHUB_ISSUE:166 description: A URL resolving to an issue discussing a new SSSOM element request see_also: - https://github.com/mapping-commons/sssom/issues/78 - https://github.com/mapping-commons/sssom/pull/259 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker_item.sssom.tsv issue_tracker: description: A URL location of the issue tracker for this entity. range: NonRelativeURI examples: - value: https://github.com/mapping-commons/mh_mapping_initiative/issues description: A URL resolving to the issue tracker of the Mouse-Human mapping initiative see_also: - https://github.com/mapping-commons/sssom/issues/78 - https://github.com/mapping-commons/sssom/pull/259 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker.sssom.tsv see_also: description: A URL specific for the mapping instance. E.g. for kboom we have a per-mapping image that shows surrounding axioms that drive probability. Could also be a github issue URL that discussed a complicated alignment slot_uri: rdfs:seeAlso examples: - value: https://github.com/mapping-commons/mh_mapping_initiative/pull/41 description: A URL pointing to the pull request that introduced the mapping. range: NonRelativeURI multivalued: true see_also: - https://github.com/mapping-commons/sssom/issues/422 other: description: "Pipe separated list of key value pairs for properties not part of the SSSOM spec. Can be used to encode additional provenance data. NOTE. This field is not recommended for general use, and should be used sparingly. See https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv for an alternative approach based on extension slots." range: string comment: description: Free text field containing either curator notes or text generated by tool providing additional informative information. slot_uri: rdfs:comment range: string examples: - value: This mapping is weird in that the hierarchical position of the two terms is very different. description: A comment explaining a mapping authors reservation on a mapping. extension_definitions: description: A list that defines the extension slots used in the mapping set. range: extension definition multivalued: true see_also: - https://github.com/mapping-commons/sssom/issues/328 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv record_id: description: >- A unique identifier for a mapping record, that is for an instance of the Mapping class (in the SSSOM/TSV serialisation, this corresponds to an individual row after propagation is applied). This slot is intended to uniquely identify one such record within a mapping set and may for example act as the resource identifier for the record when it is serialised into RDF. This slot MUST NOT be used to “group” several records together to indicate that they pertain to a single mapping (for example, that they represent different versions of the same mapping), by assigning the same ID to several records. When it is used, every record within a set MUST have a unique, non-empty value. The identifier MUST be a URI; beyond that, its format is unconstrained and the identifier MUST be treated as an opaque string. range: EntityReference instantiates: - sssom:Versionable annotations: added_in: "1.1" see_also: - https://github.com/mapping-commons/sssom/issues/359 - https://github.com/mapping-commons/blob/master/examples/schema/record-ids.sssom.tsv classes: mapping set: description: Represents a set of mappings. slot_usage: license: required: true similarity_measure: instantiates: - sssom:Versionable annotations: added_in: "1.1" curation_rule: instantiates: - sssom:Versionable annotations: added_in: "1.1" curation_rule_text: instantiates: - sssom:Versionable annotations: added_in: "1.1" slots: - sssom_version - curie_map - mappings - mapping_set_id - mapping_set_version - mapping_set_source - mapping_set_title - mapping_set_description - mapping_set_confidence - creator_id - creator_label - license - subject_type - subject_source - subject_source_version - object_type - object_source - object_source_version - predicate_type - mapping_provider - cardinality_scope - mapping_tool - mapping_tool_id - mapping_tool_version - mapping_date - publication_date - subject_match_field - object_match_field - subject_preprocessing - object_preprocessing - similarity_measure - curation_rule - curation_rule_text - see_also - issue_tracker - other - comment - extension_definitions mapping: description: Represents an individual mapping between a pair of entities. slots: - record_id - subject_id - subject_label - subject_category - predicate_id - predicate_label - predicate_modifier - object_id - object_label - object_category - mapping_justification - author_id - author_label - reviewer_id - reviewer_label - creator_id - creator_label - license - subject_type - subject_source - subject_source_version - object_type - object_source - object_source_version - predicate_type - mapping_provider - mapping_source - mapping_cardinality - cardinality_scope - mapping_tool - mapping_tool_id - mapping_tool_version - mapping_date - publication_date - review_date - confidence - reviewer_agreement - curation_rule - curation_rule_text - subject_match_field - object_match_field - match_string - subject_preprocessing - object_preprocessing - similarity_score - similarity_measure - see_also - issue_tracker_item - other - comment class_uri: owl:Axiom unique_keys: record_identifier: description: >- Each mapping within a mapping set MAY be identified by a unique, opaque record identifier. This slot MUST be used consistently, in that either all mappings in the set have a such a record identifier, or none of them have one. The behaviour when a set contains both mappings with a record identifier and mappings without a record identifier is unspecified. The behaviour when two mappings have the same record identifier is unspecified. unique_key_slots: - record_id rules: - preconditions: slot_conditions: subject_type: equals_string: "rdfs literal" postconditions: slot_conditions: subject_label: required: true - preconditions: slot_conditions: subject_type: none_of: - equals_string: "rdfs literal" postconditions: slot_conditions: subject_id: required: true - preconditions: slot_conditions: object_type: equals_string: "rdfs literal" postconditions: slot_conditions: object_label: required: true - preconditions: slot_conditions: object_type: none_of: - equals_string: "rdfs literal" postconditions: slot_conditions: object_id: required: true - description: If a review date is provided, then at at least one of reviewer_id or reviewer_label must also be provided preconditions: slot_conditions: review_date: required: true postconditions: any_of: - slot_conditions: reviewer_id: required: true - slot_conditions: reviewer_label: required: true - description: If a reviewer agreement value is provided, then at at least one of reviewer_id or reviewer_label must also be provided preconditions: slot_conditions: reviewer_agreement: required: true postconditions: any_of: - slot_conditions: reviewer_id: required: true - slot_conditions: reviewer_label: required: true mapping registry: description: A registry for managing mapping sets. It holds a set of mapping set references, and can import other registries. slots: - mapping_registry_id - mapping_registry_title - mapping_registry_description - imports - mapping_set_references - documentation - homepage - issue_tracker mapping set reference: description: A reference to a mapping set. It allows to augment mapping set metadata from the perspective of the registry, for example, providing confidence, or a local filename or a grouping. slots: - mapping_set_id - mirror_from - registry_confidence - mapping_set_group - last_updated - local_name prefix: slots: - prefix_name - prefix_url extension definition: description: A definition of an extension (non-standard) slot. attributes: slot_name: description: The name of the extension slot. range: ncname required: true property: description: The property associated with the extension slot. It is intended to provide a non-ambiguous meaning to the slot (contrary to the slot_name, which for brevity reasons may be ambiguous). range: uriorcurie type_hint: description: Expected type of the values of the extension slot. range: uriorcurie Propagatable: class_uri: sssom:Propagatable description: Metamodel extension class to describe slots whose value can be propagated down from the MappingSet class to the Mapping class. see_also: - https://github.com/mapping-commons/sssom/issues/305 attributes: propagated: description: Indicates whether a slot can be propagated from a mapping down to individual mappings. range: boolean Versionable: class_uri: sssom:Versionable description: Metamodel extension class to manage slots that may not exist in all versions of the model. attributes: added_in: description: The version of the specification in which the slot was added. If not specified, the slot must be assumed to have been added in version 1.0. range: sssom_version_enum NoTermFound: class_uri: sssom:NoTermFound description: sssom:NoTermFound can be used in place of a subject_id or object_id when the corresponding entity could not be found. It SHOULD be used in conjunction with a corresponding subject_source or object_source to signify where the term was not found. see_also: - https://github.com/mapping-commons/sssom/issues/28 - https://github.com/mapping-commons/sssom/blob/master/examples/schema/no_term_found.sssom.tsv ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/input/CONFIG.yaml ================================================ model_name: template-config-model root_schema: config_model model_organization: linkml model_author: Harold Solbrig model_author_email: solbrig@jhu.edu model_synopsis: Configuration parameters for LinkML model generation model_description: |- The parameters used to create and generate a new model derived from the LinkML Model Template # Trove Classifiers (https://pypi.org/classifiers/) -- copied to setup.cfg only if not already present classifiers: - "Development Status :: 4 - Beta" - "Environment :: Console" - "Intended Audience :: Developers" - "Intended Audience :: Science/Research" - "Intended Audience :: Information Technology" - "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication" - "Programming Language :: Python :: 3 :: Only" - "Programming Language :: Python :: 3.7" - "Programming Language :: Python :: 3.8" - "Programming Language :: Python :: 3.9" # Keywords -- copied to setup.cfg only if not already present keywords: [linkml, LOD, Modeling, Linked open data, model] # Elements to be generated # See: https://github.com/linkml/linkml/generators/README.md for what the outputs look like # Possible values: graphql -- graphql schema for the model # json -- LinkML model definition in JSON syntax # jsonld_context -- JSON-LD context definitions for model # json_schema -- Model schema in json schema # owl -- OWL representation of model schema # rdf -- LinkML Model definition in RDF # shex -- LinkML model definition in ShEx generate: [jsonld_context, json_schema] ================================================ FILE: tests/input/README.md ================================================ # Tests input directory This file contains samples of your schema. Samples can written in yaml, json, rdf or any other language that the [https://linkml.github.io/linkml-runtime]() importers and exporters support. `test_examples.py` will iterate over this directory, loading each test and: 1) For each runtime generator (yaml, json, rdf, ...) will emit the output in that language 2) Will compare the output to its expected value in the `output` directory and will alert you if something has changed 3) If something HAS changed, will update the output accordingly. [ ] Add a manifest setup https://linkml.github.io/linkml-template-config-model, so we can specify whether we expect the tests to pass or fail and, if they fail, why. ================================================ FILE: tests/test_added_in_annotations.py ================================================ import unittest from jsonasobj2 import as_dict from linkml_runtime.utils.schemaview import SchemaView, load_schema_wrap CURRENT_VERSION = "1.1" class TestAddedInAnnotationTestCase(unittest.TestCase): """Checks that all newly added slots are annotated as such.""" def test_new_slots(self): # For all the classes we care about, we list the pre-existing # slots in the last version before the current one. # That list should be updated after every release corresponding # to a new version of the spec (e.g. after the 1.1 release, add # all the 1.1 slots). baseslots = { "mapping set": [ "curie_map", "mappings", "mapping_set_id", "mapping_set_version", "mapping_set_source", "mapping_set_title", "mapping_set_description", "creator_id", "creator_label", "license", "subject_type", "subject_source", "subject_source_version", "object_type", "object_source", "object_source_version", "mapping_provider", "mapping_tool", "mapping_tool_version", "mapping_date", "publication_date", "subject_match_field", "object_match_field", "subject_preprocessing", "object_preprocessing", "see_also", "issue_tracker", "other", "comment", "extension_definitions", ], "mapping": [ "subject_id", "subject_label", "subject_category", "predicate_id", "predicate_label", "predicate_modifier", "object_id", "object_label", "object_category", "mapping_justification", "author_id", "author_label", "reviewer_id", "reviewer_label", "creator_id", "creator_label", "license", "subject_type", "subject_source", "subject_source_version", "object_type", "object_source", "object_source_version", "mapping_provider", "mapping_source", "mapping_cardinality", "mapping_tool", "mapping_tool_version", "mapping_date", "publication_date", "confidence", "curation_rule", "curation_rule_text", "subject_match_field", "object_match_field", "match_string", "subject_preprocessing", "object_preprocessing", "similarity_score", "similarity_measure", "see_also", "issue_tracker_item", "other", "comment", ], } unannotated = [] annotated_with_wrong_version = [] sv = SchemaView(load_schema_wrap("src/sssom_schema/schema/sssom_schema.yaml")) for class_name, class_view in sv.all_classes().items(): baseslots_for_class = baseslots.get(class_name) if baseslots_for_class is None: continue for slot_name in class_view.slots: if slot_name in baseslots_for_class: # This is a pre-existing slot, move along continue # New slot, check for added_in annotation slot_view = sv.induced_slot(slot_name, class_name) added_in = as_dict(slot_view.annotations).get("added_in", None) if added_in is None: unannotated.append( f"Slot {slot_name} in class {class_name} is not annotated" ) else: version = added_in.get("value") if version != CURRENT_VERSION: annotated_with_wrong_version.append( f"Slot {slot_name} in class {class_name} is annotated with a wrong version ({version})" ) self.assertListEqual([], unannotated) self.assertListEqual([], annotated_with_wrong_version) if __name__ == "__main__": unittest.main() ================================================ FILE: utils/get-value.sh ================================================ #!/usr/bin/bash # get the value of a key in the about.yaml file # https://stackoverflow.com/questions/1221833/pipe-output-and-capture-exit-status-in-bash grep $1 about.yaml | sed "s/$1:[[:space:]]//" ; test ${PIPESTATUS[0]} -eq 0